doxygen/html/VPlanRecipes%5F8cpp%5Fsource.html

//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file contains implementations for different VPlan recipes.

///

//===----------------------------------------------------------------------===//


#include "LoopVectorizationPlanner.h"

#include "VPlan.h"

#include "VPlanAnalysis.h"

#include "VPlanHelpers.h"

#include "VPlanPatternMatch.h"

#include "VPlanUtils.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/IVDescriptors.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include <cassert>


using namespace llvm;

using namespace llvm::VPlanPatternMatch;


using VectorParts = SmallVector<Value *, 2>;


#define LV_NAME "loop-vectorize"

#define DEBUG_TYPE LV_NAME


bool VPRecipeBase::mayWriteToMemory() const {

  switch (getVPDefID()) {

  case VPExpressionSC:

    return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();

  case VPInstructionSC: {

    auto *VPI = cast<VPInstruction>(this);

    // Loads read from memory but don't write to memory.

    if (VPI->getOpcode() == Instruction::Load)

      return false;

    return VPI->opcodeMayReadOrWriteFromMemory();

  }

  case VPInterleaveEVLSC:

  case VPInterleaveSC:

    return cast<VPInterleaveBase>(this)->getNumStoreOperands() > 0;

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayWriteToMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyReadsMemory();

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();

  case VPCanonicalIVPHISC:

  case VPBranchOnMaskSC:

  case VPDerivedIVSC:

  case VPFirstOrderRecurrencePHISC:

  case VPReductionPHISC:

  case VPScalarIVStepsSC:

  case VPPredInstPHISC:

    return false;

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayWriteToMemory()) &&

           "underlying instruction may write to memory");

    return false;

  }

  default:

    return true;

  }

}


bool VPRecipeBase::mayReadFromMemory() const {

  switch (getVPDefID()) {

  case VPExpressionSC:

    return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();

  case VPInstructionSC:

    return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayReadFromMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyWritesMemory();

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();

  case VPBranchOnMaskSC:

  case VPDerivedIVSC:

  case VPFirstOrderRecurrencePHISC:

  case VPPredInstPHISC:

  case VPScalarIVStepsSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return false;

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayReadFromMemory()) &&

           "underlying instruction may read from memory");

    return false;

  }

  default:

    // FIXME: Return false if the recipe represents an interleaved store.

    return true;

  }

}


bool VPRecipeBase::mayHaveSideEffects() const {

  switch (getVPDefID()) {

  case VPExpressionSC:

    return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();

  case VPDerivedIVSC:

  case VPFirstOrderRecurrencePHISC:

  case VPPredInstPHISC:

  case VPVectorEndPointerSC:

    return false;

  case VPInstructionSC: {

    auto *VPI = cast<VPInstruction>(this);

    return mayWriteToMemory() ||

           VPI->getOpcode() == VPInstruction::BranchOnCount ||

           VPI->getOpcode() == VPInstruction::BranchOnCond ||

           VPI->getOpcode() == VPInstruction::BranchOnTwoConds;

  }

  case VPWidenCallSC: {

    Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();

    return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();

  }

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPScalarIVStepsSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayHaveSideEffects()) &&

           "underlying instruction has side-effects");

    return false;

  }

  case VPInterleaveEVLSC:

  case VPInterleaveSC:

    return mayWriteToMemory();

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    assert(

        cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==

            mayWriteToMemory() &&

        "mayHaveSideffects result for ingredient differs from this "

        "implementation");

    return mayWriteToMemory();

  case VPReplicateSC: {

    auto *R = cast<VPReplicateRecipe>(this);

    return R->getUnderlyingInstr()->mayHaveSideEffects();

  }

  default:

    return true;

  }

}


void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, InsertPos->getIterator());

}


void VPRecipeBase::insertBefore(VPBasicBlock &BB,

                                iplist<VPRecipeBase>::iterator I) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(I == BB.end() || I->getParent() == &BB);

  BB.insert(this, I);

}


void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));

}


void VPRecipeBase::removeFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  getParent()->getRecipeList().remove(getIterator());

  Parent = nullptr;

}


iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  return getParent()->getRecipeList().erase(getIterator());

}


void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {

  removeFromParent();

  insertAfter(InsertPos);

}


void VPRecipeBase::moveBefore(VPBasicBlock &BB,

                              iplist<VPRecipeBase>::iterator I) {

  removeFromParent();

  insertBefore(BB, I);

}


InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {

  // Get the underlying instruction for the recipe, if there is one. It is used

  // to

  //   * decide if cost computation should be skipped for this recipe,

  //   * apply forced target instruction cost.

  Instruction *UI = nullptr;

  if (auto *S = dyn_cast<VPSingleDefRecipe>(this))

    UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());

  else if (auto *IG = dyn_cast<VPInterleaveBase>(this))

    UI = IG->getInsertPos();

  else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))

    UI = &WidenMem->getIngredient();


  InstructionCost RecipeCost;

  if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {

    RecipeCost = 0;

  } else {

    RecipeCost = computeCost(VF, Ctx);

    if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&

        RecipeCost.isValid()) {

      if (UI)

        RecipeCost = InstructionCost(ForceTargetInstructionCost);

      else

        RecipeCost = InstructionCost(0);

    }

  }


  LLVM_DEBUG({

    dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";

    dump();

  });

  return RecipeCost;

}


InstructionCost VPRecipeBase::computeCost(ElementCount VF,

                                          VPCostContext &Ctx) const {

  llvm_unreachable("subclasses should implement computeCost");

}


bool VPRecipeBase::isPhi() const {

  return (getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC) ||

         isa<VPPhi, VPIRPhi>(this);

}


bool VPRecipeBase::isScalarCast() const {

  auto *VPI = dyn_cast<VPInstruction>(this);

  return VPI && Instruction::isCast(VPI->getOpcode());

}


void VPIRFlags::intersectFlags(const VPIRFlags &Other) {

  assert(OpType == Other.OpType && "OpType must match");

  switch (OpType) {

  case OperationType::OverflowingBinOp:

    WrapFlags.HasNUW &= Other.WrapFlags.HasNUW;

    WrapFlags.HasNSW &= Other.WrapFlags.HasNSW;

    break;

  case OperationType::Trunc:

    TruncFlags.HasNUW &= Other.TruncFlags.HasNUW;

    TruncFlags.HasNSW &= Other.TruncFlags.HasNSW;

    break;

  case OperationType::DisjointOp:

    DisjointFlags.IsDisjoint &= Other.DisjointFlags.IsDisjoint;

    break;

  case OperationType::PossiblyExactOp:

    ExactFlags.IsExact &= Other.ExactFlags.IsExact;

    break;

  case OperationType::GEPOp:

    GEPFlags &= Other.GEPFlags;

    break;

  case OperationType::FPMathOp:

  case OperationType::FCmp:

    assert((OpType != OperationType::FCmp ||

            FCmpFlags.Pred == Other.FCmpFlags.Pred) &&

           "Cannot drop CmpPredicate");

    getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;

    getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;

    break;

  case OperationType::NonNegOp:

    NonNegFlags.NonNeg &= Other.NonNegFlags.NonNeg;

    break;

  case OperationType::Cmp:

    assert(CmpPredicate == Other.CmpPredicate && "Cannot drop CmpPredicate");

    break;

  case OperationType::ReductionOp:

    assert(ReductionFlags.Kind == Other.ReductionFlags.Kind &&

           "Cannot change RecurKind");

    assert(ReductionFlags.IsOrdered == Other.ReductionFlags.IsOrdered &&

           "Cannot change IsOrdered");

    assert(ReductionFlags.IsInLoop == Other.ReductionFlags.IsInLoop &&

           "Cannot change IsInLoop");

    getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;

    getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;

    break;

  case OperationType::Other:

    assert(AllFlags == Other.AllFlags && "Cannot drop other flags");

    break;

  }

}


FastMathFlags VPIRFlags::getFastMathFlags() const {

  assert((OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||

          OpType == OperationType::ReductionOp) &&

         "recipe doesn't have fast math flags");

  const FastMathFlagsTy &F = getFMFsRef();

  FastMathFlags Res;

  Res.setAllowReassoc(F.AllowReassoc);

  Res.setNoNaNs(F.NoNaNs);

  Res.setNoInfs(F.NoInfs);

  Res.setNoSignedZeros(F.NoSignedZeros);

  Res.setAllowReciprocal(F.AllowReciprocal);

  Res.setAllowContract(F.AllowContract);

  Res.setApproxFunc(F.ApproxFunc);

  return Res;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPSingleDefRecipe::dump() const { VPDef::dump(); }


void VPRecipeBase::print(raw_ostream &O, const Twine &Indent,

                         VPSlotTracker &SlotTracker) const {

  printRecipe(O, Indent, SlotTracker);

  if (auto DL = getDebugLoc()) {

    O << ", !dbg ";

    DL.print(O);

  }


  if (auto *Metadata = dyn_cast<VPIRMetadata>(this))

    Metadata->print(O, SlotTracker);

}


#endif


template <unsigned PartOpIdx>

VPValue *


VPUnrollPartAccessor<PartOpIdx>::getUnrollPartOperand(const VPUser &U) const {

  if (U.getNumOperands() == PartOpIdx + 1)

    return U.getOperand(PartOpIdx);

  return nullptr;

}


template <unsigned PartOpIdx>


unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(const VPUser &U) const {

  if (auto *UnrollPartOp = getUnrollPartOperand(U))

    return cast<VPConstantInt>(UnrollPartOp)->getZExtValue();

  return 0;

}


namespace llvm {

template class VPUnrollPartAccessor<1>;

template class VPUnrollPartAccessor<2>;

template class VPUnrollPartAccessor<3>;

}


VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,

                             const VPIRFlags &Flags, const VPIRMetadata &MD,

                             DebugLoc DL, const Twine &Name)

    : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL),

      VPIRMetadata(MD), Opcode(Opcode), Name(Name.str()) {

  assert(flagsValidForOpcode(getOpcode()) &&

         "Set flags not supported for the provided opcode");

  assert((getNumOperandsForOpcode(Opcode) == -1u ||

          getNumOperandsForOpcode(Opcode) == getNumOperands()) &&

         "number of operands does not match opcode");

}


#ifndef NDEBUG


unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {

  if (Instruction::isUnaryOp(Opcode) || Instruction::isCast(Opcode))

    return 1;


  if (Instruction::isBinaryOp(Opcode))

    return 2;


  switch (Opcode) {

  case VPInstruction::StepVector:

  case VPInstruction::VScale:

    return 0;

  case Instruction::Alloca:

  case Instruction::ExtractValue:

  case Instruction::Freeze:

  case Instruction::Load:

  case VPInstruction::BranchOnCond:

  case VPInstruction::Broadcast:

  case VPInstruction::BuildStructVector:

  case VPInstruction::BuildVector:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::ComputeReductionResult:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::ExtractLastLane:

  case VPInstruction::ExtractLastPart:

  case VPInstruction::ExtractPenultimateElement:

  case VPInstruction::Not:

  case VPInstruction::ResumeForEpilogue:

  case VPInstruction::Reverse:

  case VPInstruction::Unpack:

    return 1;

  case Instruction::ICmp:

  case Instruction::FCmp:

  case Instruction::ExtractElement:

  case Instruction::Store:

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::FirstOrderRecurrenceSplice:

  case VPInstruction::LogicalAnd:

  case VPInstruction::PtrAdd:

  case VPInstruction::WidePtrAdd:

  case VPInstruction::WideIVStep:

    return 2;

  case Instruction::Select:

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::ComputeAnyOfResult:

  case VPInstruction::ComputeFindIVResult:

  case VPInstruction::ReductionStartVector:

  case VPInstruction::ExtractLastActive:

    return 3;

  case Instruction::Call:

  case Instruction::GetElementPtr:

  case Instruction::PHI:

  case Instruction::Switch:

  case VPInstruction::AnyOf:

  case VPInstruction::FirstActiveLane:

  case VPInstruction::LastActiveLane:

  case VPInstruction::SLPLoad:

  case VPInstruction::SLPStore:

  case VPInstruction::ExtractLane:

    // Cannot determine the number of operands from the opcode.

    return -1u;

  }

  llvm_unreachable("all cases should be handled above");

}


#endif


bool VPInstruction::doesGeneratePerAllLanes() const {

  return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);

}


bool VPInstruction::canGenerateScalarForFirstLane() const {

  if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))

    return true;

  if (isSingleScalar() || isVectorToScalar())

    return true;

  switch (Opcode) {

  case Instruction::Freeze:

  case Instruction::ICmp:

  case Instruction::PHI:

  case Instruction::Select:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::BranchOnCount:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::PtrAdd:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::AnyOf:

  case VPInstruction::Not:

    return true;

  default:

    return false;

  }

}


Value *VPInstruction::generate(VPTransformState &State) {

  IRBuilderBase &Builder = State.Builder;


  if (Instruction::isBinaryOp(getOpcode())) {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);

    auto *Res =

        Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);

    if (auto *I = dyn_cast<Instruction>(Res))

      applyFlags(*I);

    return Res;

  }


  switch (getOpcode()) {

  case VPInstruction::Not: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    return Builder.CreateNot(A, Name);

  }

  case Instruction::ExtractElement: {

    assert(State.VF.isVector() && "Only extract elements from vectors");

    if (auto *Idx = dyn_cast<VPConstantInt>(getOperand(1)))

      return State.get(getOperand(0), VPLane(Idx->getZExtValue()));

    Value *Vec = State.get(getOperand(0));

    Value *Idx = State.get(getOperand(1), /*IsScalar=*/true);

    return Builder.CreateExtractElement(Vec, Idx, Name);

  }

  case Instruction::Freeze: {

    Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this));

    return Builder.CreateFreeze(Op, Name);

  }

  case Instruction::FCmp:

  case Instruction::ICmp: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);

    return Builder.CreateCmp(getPredicate(), A, B, Name);

  }

  case Instruction::PHI: {

    llvm_unreachable("should be handled by VPPhi::execute");

  }

  case Instruction::Select: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *Cond =

        State.get(getOperand(0),

                  OnlyFirstLaneUsed || vputils::isSingleScalar(getOperand(0)));

    Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);

    Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);

    return Builder.CreateSelect(Cond, Op1, Op2, Name);

  }

  case VPInstruction::ActiveLaneMask: {

    // Get first lane of vector induction variable.

    Value *VIVElem0 = State.get(getOperand(0), VPLane(0));

    // Get the original loop tripcount.

    Value *ScalarTC = State.get(getOperand(1), VPLane(0));


    // If this part of the active lane mask is scalar, generate the CMP directly

    // to avoid unnecessary extracts.

    if (State.VF.isScalar())

      return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,

                               Name);


    ElementCount EC = State.VF.multiplyCoefficientBy(

        cast<VPConstantInt>(getOperand(2))->getZExtValue());

    auto *PredTy = VectorType::get(Builder.getInt1Ty(), EC);

    return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,

                                   {PredTy, ScalarTC->getType()},

                                   {VIVElem0, ScalarTC}, nullptr, Name);

  }

  case VPInstruction::FirstOrderRecurrenceSplice: {

    // Generate code to combine the previous and current values in vector v3.

    //

    //   vector.ph:

    //     v_init = vector(..., ..., ..., a[-1])

    //     br vector.body

    //

    //   vector.body

    //     i = phi [0, vector.ph], [i+4, vector.body]

    //     v1 = phi [v_init, vector.ph], [v2, vector.body]

    //     v2 = a[i, i+1, i+2, i+3];

    //     v3 = vector(v1(3), v2(0, 1, 2))


    auto *V1 = State.get(getOperand(0));

    if (!V1->getType()->isVectorTy())

      return V1;

    Value *V2 = State.get(getOperand(1));

    return Builder.CreateVectorSplice(V1, V2, -1, Name);

  }

  case VPInstruction::CalculateTripCountMinusVF: {

    unsigned UF = getParent()->getPlan()->getUF();

    Value *ScalarTC = State.get(getOperand(0), VPLane(0));

    Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);

    Value *Sub = Builder.CreateSub(ScalarTC, Step);

    Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);

    Value *Zero = ConstantInt::getNullValue(ScalarTC->getType());

    return Builder.CreateSelect(Cmp, Sub, Zero);

  }

  case VPInstruction::ExplicitVectorLength: {

    // TODO: Restructure this code with an explicit remainder loop, vsetvli can

    // be outside of the main loop.

    Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);

    // Compute EVL

    assert(AVL->getType()->isIntegerTy() &&

           "Requested vector length should be an integer.");


    assert(State.VF.isScalable() && "Expected scalable vector factor.");

    Value *VFArg = Builder.getInt32(State.VF.getKnownMinValue());


    Value *EVL = Builder.CreateIntrinsic(

        Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,

        {AVL, VFArg, Builder.getTrue()});

    return EVL;

  }

  case VPInstruction::CanonicalIVIncrementForPart: {

    unsigned Part = getUnrollPart(*this);

    auto *IV = State.get(getOperand(0), VPLane(0));

    assert(Part != 0 && "Must have a positive part");

    // The canonical IV is incremented by the vectorization factor (num of

    // SIMD elements) times the unroll part.

    Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);

    return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),

                             hasNoSignedWrap());

  }

  case VPInstruction::BranchOnCond: {

    Value *Cond = State.get(getOperand(0), VPLane(0));

    // Replace the temporary unreachable terminator with a new conditional

    // branch, hooking it up to backward destination for latch blocks now, and

    // to forward destination(s) later when they are created.

    // Second successor may be backwards - iff it is already in VPBB2IRBB.

    VPBasicBlock *SecondVPSucc =

        cast<VPBasicBlock>(getParent()->getSuccessors()[1]);

    BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc);

    BasicBlock *IRBB = State.CFG.VPBB2IRBB[getParent()];

    auto *Br = Builder.CreateCondBr(Cond, IRBB, SecondIRSucc);

    // First successor is always forward, reset it to nullptr.

    Br->setSuccessor(0, nullptr);

    IRBB->getTerminator()->eraseFromParent();

    applyMetadata(*Br);

    return Br;

  }

  case VPInstruction::Broadcast: {

    return Builder.CreateVectorSplat(

        State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");

  }

  case VPInstruction::BuildStructVector: {

    // For struct types, we need to build a new 'wide' struct type, where each

    // element is widened, i.e., we create a struct of vectors.

    auto *StructTy =

        cast<StructType>(State.TypeAnalysis.inferScalarType(getOperand(0)));

    Value *Res = PoisonValue::get(toVectorizedTy(StructTy, State.VF));

    for (const auto &[LaneIndex, Op] : enumerate(operands())) {

      for (unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();

           FieldIndex++) {

        Value *ScalarValue =

            Builder.CreateExtractValue(State.get(Op, true), FieldIndex);

        Value *VectorValue = Builder.CreateExtractValue(Res, FieldIndex);

        VectorValue =

            Builder.CreateInsertElement(VectorValue, ScalarValue, LaneIndex);

        Res = Builder.CreateInsertValue(Res, VectorValue, FieldIndex);

      }

    }

    return Res;

  }

  case VPInstruction::BuildVector: {

    auto *ScalarTy = State.TypeAnalysis.inferScalarType(getOperand(0));

    auto NumOfElements = ElementCount::getFixed(getNumOperands());

    Value *Res = PoisonValue::get(toVectorizedTy(ScalarTy, NumOfElements));

    for (const auto &[Idx, Op] : enumerate(operands()))

      Res = Builder.CreateInsertElement(Res, State.get(Op, true),

                                        Builder.getInt32(Idx));

    return Res;

  }

  case VPInstruction::ReductionStartVector: {

    if (State.VF.isScalar())

      return State.get(getOperand(0), true);

    IRBuilderBase::FastMathFlagGuard FMFG(Builder);

    Builder.setFastMathFlags(getFastMathFlags());

    // If this start vector is scaled then it should produce a vector with fewer

    // elements than the VF.

    ElementCount VF = State.VF.divideCoefficientBy(

        cast<VPConstantInt>(getOperand(2))->getZExtValue());

    auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));

    return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),

                                       Builder.getInt32(0));

  }

  case VPInstruction::ComputeAnyOfResult: {

    // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary

    // and will be removed by breaking up the recipe further.

    auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));

    auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());

    Value *ReducedPartRdx = State.get(getOperand(2));

    for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)

      ReducedPartRdx =

          Builder.CreateBinOp(Instruction::Or, State.get(getOperand(Idx)),

                              ReducedPartRdx, "bin.rdx");

    return createAnyOfReduction(Builder, ReducedPartRdx,

                                State.get(getOperand(1), VPLane(0)), OrigPhi);

  }

  case VPInstruction::ComputeFindIVResult: {

    // The recipe's operands are the start value, the sentinel value, followed

    // by one operand for each part of the reduction.

    unsigned UF = getNumOperands() - 2;

    Value *ReducedResult = State.get(getOperand(2));

    RecurKind MinMaxKind = getRecurKind();

    assert((MinMaxKind == RecurKind::SMin || MinMaxKind == RecurKind::SMax ||

            MinMaxKind == RecurKind::UMin || MinMaxKind == RecurKind::UMax) &&

           "unexpected recurrence kind for ComputeFindIVResult");

    for (unsigned Part = 1; Part < UF; ++Part)

      ReducedResult = createMinMaxOp(Builder, MinMaxKind, ReducedResult,

                                     State.get(getOperand(2 + Part)));


    // Reduce the vector to a scalar.

    bool IsMaxRdx =

        MinMaxKind == RecurKind::SMax || MinMaxKind == RecurKind::UMax;

    bool IsSigned =

        MinMaxKind == RecurKind::SMin || MinMaxKind == RecurKind::SMax;

    if (ReducedResult->getType()->isVectorTy())

      ReducedResult = IsMaxRdx

                          ? Builder.CreateIntMaxReduce(ReducedResult, IsSigned)

                          : Builder.CreateIntMinReduce(ReducedResult, IsSigned);

    // Correct the final reduction result back to the start value if the

    // reduction result is the sentinel value.

    Value *Start = State.get(getOperand(0), true);

    Value *Sentinel = getOperand(1)->getLiveInIRValue();

    Value *Cmp =

        Builder.CreateICmpNE(ReducedResult, Sentinel, "rdx.select.cmp");

    return Builder.CreateSelect(Cmp, ReducedResult, Start, "rdx.select");

  }

  case VPInstruction::ComputeReductionResult: {

    RecurKind RK = getRecurKind();

    bool IsOrdered = isReductionOrdered();

    bool IsInLoop = isReductionInLoop();

    assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&

           "should be handled by ComputeFindIVResult");


    // The recipe may have multiple operands to be reduced together.

    unsigned NumOperandsToReduce = getNumOperands();

    VectorParts RdxParts(NumOperandsToReduce);

    for (unsigned Part = 0; Part < NumOperandsToReduce; ++Part)

      RdxParts[Part] = State.get(getOperand(Part), IsInLoop);


    IRBuilderBase::FastMathFlagGuard FMFG(Builder);

    if (hasFastMathFlags())

      Builder.setFastMathFlags(getFastMathFlags());


    // Reduce multiple operands into one.

    Value *ReducedPartRdx = RdxParts[0];

    if (IsOrdered) {

      ReducedPartRdx = RdxParts[NumOperandsToReduce - 1];

    } else {

      // Floating-point operations should have some FMF to enable the reduction.

      for (unsigned Part = 1; Part < NumOperandsToReduce; ++Part) {

        Value *RdxPart = RdxParts[Part];

        if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))

          ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);

        else {

          // For sub-recurrences, each part's reduction variable is already

          // negative, we need to do: reduce.add(-acc_uf0 + -acc_uf1)

          Instruction::BinaryOps Opcode =

              RK == RecurKind::Sub

                  ? Instruction::Add

                  : (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK);

          ReducedPartRdx =

              Builder.CreateBinOp(Opcode, RdxPart, ReducedPartRdx, "bin.rdx");

        }

      }

    }


    // Create the reduction after the loop. Note that inloop reductions create

    // the target reduction in the loop using a Reduction recipe.

    if (State.VF.isVector() && !IsInLoop) {

      // TODO: Support in-order reductions based on the recurrence descriptor.

      // All ops in the reduction inherit fast-math-flags from the recurrence

      // descriptor.

      ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);

    }


    return ReducedPartRdx;

  }

  case VPInstruction::ExtractLastLane:

  case VPInstruction::ExtractPenultimateElement: {

    unsigned Offset =

        getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;

    Value *Res;

    if (State.VF.isVector()) {

      assert(Offset <= State.VF.getKnownMinValue() &&

             "invalid offset to extract from");

      // Extract lane VF - Offset from the operand.

      Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));

    } else {

      // TODO: Remove ExtractLastLane for scalar VFs.

      assert(Offset <= 1 && "invalid offset to extract from");

      Res = State.get(getOperand(0));

    }

    if (isa<ExtractElementInst>(Res))

      Res->setName(Name);

    return Res;

  }

  case VPInstruction::LogicalAnd: {

    Value *A = State.get(getOperand(0));

    Value *B = State.get(getOperand(1));

    return Builder.CreateLogicalAnd(A, B, Name);

  }

  case VPInstruction::PtrAdd: {

    assert(vputils::onlyFirstLaneUsed(this) &&

           "can only generate first lane for PtrAdd");

    Value *Ptr = State.get(getOperand(0), VPLane(0));

    Value *Addend = State.get(getOperand(1), VPLane(0));

    return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());

  }

  case VPInstruction::WidePtrAdd: {

    Value *Ptr =

        State.get(getOperand(0), vputils::isSingleScalar(getOperand(0)));

    Value *Addend = State.get(getOperand(1));

    return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());

  }

  case VPInstruction::AnyOf: {

    Value *Res = Builder.CreateFreeze(State.get(getOperand(0)));

    for (VPValue *Op : drop_begin(operands()))

      Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(Op)));

    return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);

  }

  case VPInstruction::ExtractLane: {

    assert(getNumOperands() != 2 && "ExtractLane from single source should be "

                                    "simplified to ExtractElement.");

    Value *LaneToExtract = State.get(getOperand(0), true);

    Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));

    Value *Res = nullptr;

    Value *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);


    for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {

      Value *VectorStart =

          Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));

      Value *VectorIdx = Idx == 1

                             ? LaneToExtract

                             : Builder.CreateSub(LaneToExtract, VectorStart);

      Value *Ext = State.VF.isScalar()

                       ? State.get(getOperand(Idx))

                       : Builder.CreateExtractElement(

                             State.get(getOperand(Idx)), VectorIdx);

      if (Res) {

        Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);

        Res = Builder.CreateSelect(Cmp, Ext, Res);

      } else {

        Res = Ext;

      }

    }

    return Res;

  }

  case VPInstruction::FirstActiveLane: {

    if (getNumOperands() == 1) {

      Value *Mask = State.get(getOperand(0));

      return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask,

                                                  /*ZeroIsPoison=*/false, Name);

    }

    // If there are multiple operands, create a chain of selects to pick the

    // first operand with an active lane and add the number of lanes of the

    // preceding operands.

    Value *RuntimeVF = getRuntimeVF(Builder, Builder.getInt64Ty(), State.VF);

    unsigned LastOpIdx = getNumOperands() - 1;

    Value *Res = nullptr;

    for (int Idx = LastOpIdx; Idx >= 0; --Idx) {

      Value *TrailingZeros =

          State.VF.isScalar()

              ? Builder.CreateZExt(

                    Builder.CreateICmpEQ(State.get(getOperand(Idx)),

                                         Builder.getFalse()),

                    Builder.getInt64Ty())

              : Builder.CreateCountTrailingZeroElems(

                    Builder.getInt64Ty(), State.get(getOperand(Idx)),

                    /*ZeroIsPoison=*/false, Name);

      Value *Current = Builder.CreateAdd(

          Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros);

      if (Res) {

        Value *Cmp = Builder.CreateICmpNE(TrailingZeros, RuntimeVF);

        Res = Builder.CreateSelect(Cmp, Current, Res);

      } else {

        Res = Current;

      }

    }


    return Res;

  }

  case VPInstruction::ResumeForEpilogue:

    return State.get(getOperand(0), true);

  case VPInstruction::Reverse:

    return Builder.CreateVectorReverse(State.get(getOperand(0)), "reverse");

  case VPInstruction::ExtractLastActive: {

    Value *Data = State.get(getOperand(0));

    Value *Mask = State.get(getOperand(1));

    Value *Default = State.get(getOperand(2), /*IsScalar=*/true);

    Type *VTy = Data->getType();

    return Builder.CreateIntrinsic(

        Intrinsic::experimental_vector_extract_last_active, {VTy},

        {Data, Mask, Default});

  }

  default:

    llvm_unreachable("Unsupported opcode for instruction");

  }

}


InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode(

    unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const {

  Type *ScalarTy = Ctx.Types.inferScalarType(this);

  Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy;

  switch (Opcode) {

  case Instruction::FNeg:

    return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor: {

    TargetTransformInfo::OperandValueInfo RHSInfo = {

        TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};


    if (VF.isVector()) {

      // Certain instructions can be cheaper to vectorize if they have a

      // constant second vector operand. One example of this are shifts on x86.

      VPValue *RHS = getOperand(1);

      RHSInfo = Ctx.getOperandInfo(RHS);


      if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&

          getOperand(1)->isDefinedOutsideLoopRegions())

        RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;

    }


    Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());

    SmallVector<const Value *, 4> Operands;

    if (CtxI)

      Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());

    return Ctx.TTI.getArithmeticInstrCost(

        Opcode, ResultTy, Ctx.CostKind,

        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

        RHSInfo, Operands, CtxI, &Ctx.TLI);

  }

  case Instruction::Freeze:

    // This opcode is unknown. Assume that it is the same as 'mul'.

    return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, ResultTy,

                                          Ctx.CostKind);

  case Instruction::ExtractValue:

    return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,

                                             Ctx.CostKind);

  case Instruction::ICmp:

  case Instruction::FCmp: {

    Type *ScalarOpTy = Ctx.Types.inferScalarType(getOperand(0));

    Type *OpTy = VF.isVector() ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;

    Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());

    return Ctx.TTI.getCmpSelInstrCost(

        Opcode, OpTy, CmpInst::makeCmpResultType(OpTy), getPredicate(),

        Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},

        {TTI::OK_AnyValue, TTI::OP_None}, CtxI);

  }

  case Instruction::BitCast: {

    Type *ScalarTy = Ctx.Types.inferScalarType(this);

    if (ScalarTy->isPointerTy())

      return 0;

    [[fallthrough]];

  }

  case Instruction::SExt:

  case Instruction::ZExt:

  case Instruction::FPToUI:

  case Instruction::FPToSI:

  case Instruction::FPExt:

  case Instruction::PtrToInt:

  case Instruction::PtrToAddr:

  case Instruction::IntToPtr:

  case Instruction::SIToFP:

  case Instruction::UIToFP:

  case Instruction::Trunc:

  case Instruction::FPTrunc:

  case Instruction::AddrSpaceCast: {

    // Computes the CastContextHint from a recipe that may access memory.

    auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {

      if (isa<VPInterleaveBase>(R))

        return TTI::CastContextHint::Interleave;

      if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R)) {

        // Only compute CCH for memory operations, matching the legacy model

        // which only considers loads/stores for cast context hints.

        auto *UI = cast<Instruction>(ReplicateRecipe->getUnderlyingValue());

        if (!isa<LoadInst, StoreInst>(UI))

          return TTI::CastContextHint::None;

        return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked

                                               : TTI::CastContextHint::Normal;


      }

      const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);

      if (WidenMemoryRecipe == nullptr)


        return TTI::CastContextHint::None;

      if (VF.isScalar())

        return TTI::CastContextHint::Normal;

      if (!WidenMemoryRecipe->isConsecutive())

        return TTI::CastContextHint::GatherScatter;

      if (WidenMemoryRecipe->isReverse())

        return TTI::CastContextHint::Reversed;

      if (WidenMemoryRecipe->isMasked())

        return TTI::CastContextHint::Masked;

      return TTI::CastContextHint::Normal;

    };


    VPValue *Operand = getOperand(0);

    TTI::CastContextHint CCH = TTI::CastContextHint::None;

    // For Trunc/FPTrunc, get the context from the only user.

    if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {

      auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {

        if (R->getNumUsers() == 0 || R->hasMoreThanOneUniqueUser())

          return nullptr;

        return dyn_cast<VPRecipeBase>(*R->user_begin());

      };

      if (VPRecipeBase *Recipe = GetOnlyUser(this)) {

        if (match(Recipe, m_Reverse(m_VPValue())))

          Recipe = GetOnlyUser(cast<VPInstruction>(Recipe));

        if (Recipe)

          CCH = ComputeCCH(Recipe);

      }

    }

    // For Z/Sext, get the context from the operand.

    else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||

             Opcode == Instruction::FPExt) {

      if (auto *Recipe = Operand->getDefiningRecipe()) {

        VPValue *ReverseOp;

        if (match(Recipe, m_Reverse(m_VPValue(ReverseOp))))

          Recipe = ReverseOp->getDefiningRecipe();

        if (Recipe)

          CCH = ComputeCCH(Recipe);

      }

    }


    auto *ScalarSrcTy = Ctx.Types.inferScalarType(Operand);

    Type *SrcTy = VF.isVector() ? toVectorTy(ScalarSrcTy, VF) : ScalarSrcTy;

    // Arm TTI will use the underlying instruction to determine the cost.

    return Ctx.TTI.getCastInstrCost(

        Opcode, ResultTy, SrcTy, CCH, Ctx.CostKind,

        dyn_cast_if_present<Instruction>(getUnderlyingValue()));

  }

  case Instruction::Select: {

    SelectInst *SI = cast_or_null<SelectInst>(getUnderlyingValue());

    bool IsScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();

    Type *ScalarTy = Ctx.Types.inferScalarType(this);


    VPValue *Op0, *Op1;

    bool IsLogicalAnd =

        match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1)));

    bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));


    if (!IsScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&

        (IsLogicalAnd || IsLogicalOr)) {

      // select x, y, false --> x & y

      // select x, true, y --> x | y

      const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);

      const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);


      SmallVector<const Value *, 2> Operands;

      if (SI && all_of(operands(),

                       [](VPValue *Op) { return Op->getUnderlyingValue(); }))

        append_range(Operands, SI->operands());

      return Ctx.TTI.getArithmeticInstrCost(

          IsLogicalOr ? Instruction::Or : Instruction::And, ResultTy,

          Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);

    }


    Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));

    if (!IsScalarCond)

      CondTy = VectorType::get(CondTy, VF);


    llvm::CmpPredicate Pred;

    if (!match(getOperand(0), m_Cmp(Pred, m_VPValue(), m_VPValue())))

      if (auto *CondIRV = dyn_cast<VPIRValue>(getOperand(0)))

        if (auto *Cmp = dyn_cast<CmpInst>(CondIRV->getValue()))

          Pred = Cmp->getPredicate();

    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

    return Ctx.TTI.getCmpSelInstrCost(

        Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,

        {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);

  }

  }

  llvm_unreachable("called for unsupported opcode");

}


InstructionCost VPInstruction::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  if (Instruction::isBinaryOp(getOpcode())) {

    if (!getUnderlyingValue() && getOpcode() != Instruction::FMul) {

      // TODO: Compute cost for VPInstructions without underlying values once

      // the legacy cost model has been retired.

      return 0;

    }


    assert(!doesGeneratePerAllLanes() &&

           "Should only generate a vector value or single scalar, not scalars "

           "for all lanes.");

    return getCostForRecipeWithOpcode(

        getOpcode(),

        vputils::onlyFirstLaneUsed(this) ? ElementCount::getFixed(1) : VF, Ctx);

  }


  switch (getOpcode()) {

  case Instruction::Select: {

    llvm::CmpPredicate Pred = CmpInst::BAD_ICMP_PREDICATE;

    match(getOperand(0), m_Cmp(Pred, m_VPValue(), m_VPValue()));

    auto *CondTy = Ctx.Types.inferScalarType(getOperand(0));

    auto *VecTy = Ctx.Types.inferScalarType(getOperand(1));

    if (!vputils::onlyFirstLaneUsed(this)) {

      CondTy = toVectorTy(CondTy, VF);

      VecTy = toVectorTy(VecTy, VF);

    }

    return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred,

                                      Ctx.CostKind);

  }

  case Instruction::ExtractElement:

  case VPInstruction::ExtractLane: {

    if (VF.isScalar()) {

      // ExtractLane with VF=1 takes care of handling extracting across multiple

      // parts.

      return 0;

    }


    // Add on the cost of extracting the element.

    auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);

    return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,

                                      Ctx.CostKind);

  }

  case VPInstruction::AnyOf: {

    auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

    return Ctx.TTI.getArithmeticReductionCost(

        Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);

  }

  case VPInstruction::FirstActiveLane: {

    Type *ScalarTy = Ctx.Types.inferScalarType(getOperand(0));

    if (VF.isScalar())

      return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,

                                        CmpInst::makeCmpResultType(ScalarTy),

                                        CmpInst::ICMP_EQ, Ctx.CostKind);

    // Calculate the cost of determining the lane index.

    auto *PredTy = toVectorTy(ScalarTy, VF);

    IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts,

                                  Type::getInt64Ty(Ctx.LLVMCtx),

                                  {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});

    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

  }

  case VPInstruction::LastActiveLane: {

    Type *ScalarTy = Ctx.Types.inferScalarType(getOperand(0));

    if (VF.isScalar())

      return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,

                                        CmpInst::makeCmpResultType(ScalarTy),

                                        CmpInst::ICMP_EQ, Ctx.CostKind);

    // Calculate the cost of determining the lane index: NOT + cttz_elts + SUB.

    auto *PredTy = toVectorTy(ScalarTy, VF);

    IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts,

                                  Type::getInt64Ty(Ctx.LLVMCtx),

                                  {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});

    InstructionCost Cost = Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

    // Add cost of NOT operation on the predicate.

    Cost += Ctx.TTI.getArithmeticInstrCost(

        Instruction::Xor, PredTy, Ctx.CostKind,

        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

        {TargetTransformInfo::OK_UniformConstantValue,

         TargetTransformInfo::OP_None});

    // Add cost of SUB operation on the index.

    Cost += Ctx.TTI.getArithmeticInstrCost(

        Instruction::Sub, Type::getInt64Ty(Ctx.LLVMCtx), Ctx.CostKind);

    return Cost;

  }

  case VPInstruction::ExtractLastActive: {

    Type *ScalarTy = Ctx.Types.inferScalarType(this);

    Type *VecTy = toVectorTy(ScalarTy, VF);

    Type *MaskTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF);

    IntrinsicCostAttributes ICA(

        Intrinsic::experimental_vector_extract_last_active, ScalarTy,

        {VecTy, MaskTy, ScalarTy});

    return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind);

  }

  case VPInstruction::FirstOrderRecurrenceSplice: {

    assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");

    SmallVector<int> Mask(VF.getKnownMinValue());

    std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);

    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);


    return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,

                                  cast<VectorType>(VectorTy),

                                  cast<VectorType>(VectorTy), Mask,

                                  Ctx.CostKind, VF.getKnownMinValue() - 1);

  }

  case VPInstruction::ActiveLaneMask: {

    Type *ArgTy = Ctx.Types.inferScalarType(getOperand(0));

    unsigned Multiplier = cast<VPConstantInt>(getOperand(2))->getZExtValue();

    Type *RetTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF * Multiplier);

    IntrinsicCostAttributes Attrs(Intrinsic::get_active_lane_mask, RetTy,

                                  {ArgTy, ArgTy});

    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

  }

  case VPInstruction::ExplicitVectorLength: {

    Type *Arg0Ty = Ctx.Types.inferScalarType(getOperand(0));

    Type *I32Ty = Type::getInt32Ty(Ctx.LLVMCtx);

    Type *I1Ty = Type::getInt1Ty(Ctx.LLVMCtx);

    IntrinsicCostAttributes Attrs(Intrinsic::experimental_get_vector_length,

                                  I32Ty, {Arg0Ty, I32Ty, I1Ty});

    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

  }

  case VPInstruction::Reverse: {

    assert(VF.isVector() && "Reverse operation must be vector type");

    auto *VectorTy = cast<VectorType>(

        toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));

    return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,

                                  VectorTy, /*Mask=*/{}, Ctx.CostKind,

                                  /*Index=*/0);

  }

  case VPInstruction::ExtractLastLane: {

    // Add on the cost of extracting the element.

    auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);

    return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,

                                                    VecTy, Ctx.CostKind, 0);

  }

  case VPInstruction::ExtractPenultimateElement:

    if (VF == ElementCount::getScalable(1))

      return InstructionCost::getInvalid();

    [[fallthrough]];

  default:

    // TODO: Compute cost other VPInstructions once the legacy cost model has

    // been retired.

    assert(!getUnderlyingValue() &&

           "unexpected VPInstruction witht underlying value");

    return 0;

  }

}


bool VPInstruction::isVectorToScalar() const {

  return getOpcode() == VPInstruction::ExtractLastLane ||

         getOpcode() == VPInstruction::ExtractPenultimateElement ||

         getOpcode() == Instruction::ExtractElement ||

         getOpcode() == VPInstruction::ExtractLane ||

         getOpcode() == VPInstruction::FirstActiveLane ||

         getOpcode() == VPInstruction::LastActiveLane ||

         getOpcode() == VPInstruction::ComputeAnyOfResult ||

         getOpcode() == VPInstruction::ComputeFindIVResult ||

         getOpcode() == VPInstruction::ExtractLastActive ||

         getOpcode() == VPInstruction::ComputeReductionResult ||

         getOpcode() == VPInstruction::AnyOf;

}


bool VPInstruction::isSingleScalar() const {

  switch (getOpcode()) {

  case Instruction::PHI:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::ResumeForEpilogue:

  case VPInstruction::VScale:

    return true;

  default:

    return isScalarCast();

  }

}


void VPInstruction::execute(VPTransformState &State) {

  assert(!State.Lane && "VPInstruction executing an Lane");

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  assert(flagsValidForOpcode(getOpcode()) &&

         "Set flags not supported for the provided opcode");

  if (hasFastMathFlags())

    State.Builder.setFastMathFlags(getFastMathFlags());

  Value *GeneratedValue = generate(State);

  if (!hasResult())

    return;

  assert(GeneratedValue && "generate must produce a value");

  bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&

                                   (vputils::onlyFirstLaneUsed(this) ||

                                    isVectorToScalar() || isSingleScalar());

  assert((((GeneratedValue->getType()->isVectorTy() ||

            GeneratedValue->getType()->isStructTy()) ==

           !GeneratesPerFirstLaneOnly) ||

          State.VF.isScalar()) &&

         "scalar value but not only first lane defined");

  State.set(this, GeneratedValue,

            /*IsScalar*/ GeneratesPerFirstLaneOnly);

}


bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {

  if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))

    return false;

  switch (getOpcode()) {

  case Instruction::GetElementPtr:

  case Instruction::ExtractElement:

  case Instruction::Freeze:

  case Instruction::FCmp:

  case Instruction::ICmp:

  case Instruction::Select:

  case Instruction::PHI:

  case VPInstruction::AnyOf:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::BranchOnCount:

  case VPInstruction::Broadcast:

  case VPInstruction::BuildStructVector:

  case VPInstruction::BuildVector:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::ExtractLane:

  case VPInstruction::ExtractLastLane:

  case VPInstruction::ExtractLastPart:

  case VPInstruction::ExtractPenultimateElement:

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::FirstActiveLane:

  case VPInstruction::LastActiveLane:

  case VPInstruction::ExtractLastActive:

  case VPInstruction::FirstOrderRecurrenceSplice:

  case VPInstruction::LogicalAnd:

  case VPInstruction::Not:

  case VPInstruction::PtrAdd:

  case VPInstruction::WideIVStep:

  case VPInstruction::WidePtrAdd:

  case VPInstruction::StepVector:

  case VPInstruction::ReductionStartVector:

  case VPInstruction::Reverse:

  case VPInstruction::VScale:

  case VPInstruction::Unpack:

    return false;

  default:

    return true;

  }

}


bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))

    return vputils::onlyFirstLaneUsed(this);


  switch (getOpcode()) {

  default:

    return false;

  case Instruction::ExtractElement:

    return Op == getOperand(1);

  case Instruction::PHI:

    return true;

  case Instruction::FCmp:

  case Instruction::ICmp:

  case Instruction::Select:

  case Instruction::Or:

  case Instruction::Freeze:

  case VPInstruction::Not:

    // TODO: Cover additional opcodes.

    return vputils::onlyFirstLaneUsed(this);

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnCond:

  case VPInstruction::Broadcast:

  case VPInstruction::ReductionStartVector:

    return true;

  case VPInstruction::BuildStructVector:

  case VPInstruction::BuildVector:

    // Before replicating by VF, Build(Struct)Vector uses all lanes of the

    // operand, after replicating its operands only the first lane is used.

    // Before replicating, it will have only a single operand.

    return getNumOperands() > 1;

  case VPInstruction::PtrAdd:

    return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);

  case VPInstruction::WidePtrAdd:

    // WidePtrAdd supports scalar and vector base addresses.

    return false;

  case VPInstruction::ComputeAnyOfResult:

    return Op == getOperand(1);

  case VPInstruction::ComputeFindIVResult:

    return Op == getOperand(0);

  case VPInstruction::ExtractLane:

    return Op == getOperand(0);

  };

  llvm_unreachable("switch should return");

}


bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  if (Instruction::isBinaryOp(getOpcode()))

    return vputils::onlyFirstPartUsed(this);


  switch (getOpcode()) {

  default:

    return false;

  case Instruction::FCmp:

  case Instruction::ICmp:

  case Instruction::Select:

    return vputils::onlyFirstPartUsed(this);

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::CanonicalIVIncrementForPart:

    return true;

  };

  llvm_unreachable("switch should return");

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInstruction::dump() const {

  VPSlotTracker SlotTracker(getParent()->getPlan());

  printRecipe(dbgs(), "", SlotTracker);

}


void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";


  if (hasResult()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  switch (getOpcode()) {

  case VPInstruction::Not:

    O << "not";

    break;

  case VPInstruction::SLPLoad:

    O << "combined load";

    break;

  case VPInstruction::SLPStore:

    O << "combined store";

    break;

  case VPInstruction::ActiveLaneMask:

    O << "active lane mask";

    break;

  case VPInstruction::ExplicitVectorLength:

    O << "EXPLICIT-VECTOR-LENGTH";

    break;

  case VPInstruction::FirstOrderRecurrenceSplice:

    O << "first-order splice";

    break;

  case VPInstruction::BranchOnCond:

    O << "branch-on-cond";

    break;

  case VPInstruction::BranchOnTwoConds:

    O << "branch-on-two-conds";

    break;

  case VPInstruction::CalculateTripCountMinusVF:

    O << "TC > VF ? TC - VF : 0";

    break;

  case VPInstruction::CanonicalIVIncrementForPart:

    O << "VF * Part +";

    break;

  case VPInstruction::BranchOnCount:

    O << "branch-on-count";

    break;

  case VPInstruction::Broadcast:

    O << "broadcast";

    break;

  case VPInstruction::BuildStructVector:

    O << "buildstructvector";

    break;

  case VPInstruction::BuildVector:

    O << "buildvector";

    break;

  case VPInstruction::ExtractLane:

    O << "extract-lane";

    break;

  case VPInstruction::ExtractLastLane:

    O << "extract-last-lane";

    break;

  case VPInstruction::ExtractLastPart:

    O << "extract-last-part";

    break;

  case VPInstruction::ExtractPenultimateElement:

    O << "extract-penultimate-element";

    break;

  case VPInstruction::ComputeAnyOfResult:

    O << "compute-anyof-result";

    break;

  case VPInstruction::ComputeFindIVResult:

    O << "compute-find-iv-result";

    break;

  case VPInstruction::ComputeReductionResult:

    O << "compute-reduction-result";

    break;

  case VPInstruction::LogicalAnd:

    O << "logical-and";

    break;

  case VPInstruction::PtrAdd:

    O << "ptradd";

    break;

  case VPInstruction::WidePtrAdd:

    O << "wide-ptradd";

    break;

  case VPInstruction::AnyOf:

    O << "any-of";

    break;

  case VPInstruction::FirstActiveLane:

    O << "first-active-lane";

    break;

  case VPInstruction::LastActiveLane:

    O << "last-active-lane";

    break;

  case VPInstruction::ReductionStartVector:

    O << "reduction-start-vector";

    break;

  case VPInstruction::ResumeForEpilogue:

    O << "resume-for-epilogue";

    break;

  case VPInstruction::Reverse:

    O << "reverse";

    break;

  case VPInstruction::Unpack:

    O << "unpack";

    break;

  case VPInstruction::ExtractLastActive:

    O << "extract-last-active";

    break;

  default:

    O << Instruction::getOpcodeName(getOpcode());

  }


  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPInstructionWithType::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  if (isScalarCast()) {

    Value *Op = State.get(getOperand(0), VPLane(0));

    Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()),

                                           Op, ResultTy);

    State.set(this, Cast, VPLane(0));

    return;

  }

  switch (getOpcode()) {

  case VPInstruction::StepVector: {

    Value *StepVector =

        State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));

    State.set(this, StepVector);

    break;

  }

  case VPInstruction::VScale: {

    Value *VScale = State.Builder.CreateVScale(ResultTy);

    State.set(this, VScale, true);

    break;

  }


  default:

    llvm_unreachable("opcode not implemented yet");

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInstructionWithType::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";

  printAsOperand(O, SlotTracker);

  O << " = ";


  switch (getOpcode()) {

  case VPInstruction::WideIVStep:

    O << "wide-iv-step ";

    printOperands(O, SlotTracker);

    break;

  case VPInstruction::StepVector:

    O << "step-vector " << *ResultTy;

    break;

  case VPInstruction::VScale:

    O << "vscale " << *ResultTy;

    break;

  default:

    assert(Instruction::isCast(getOpcode()) && "unhandled opcode");

    O << Instruction::getOpcodeName(getOpcode()) << " ";

    printOperands(O, SlotTracker);

    O << " to " << *ResultTy;

  }

}


#endif


void VPPhi::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  PHINode *NewPhi = State.Builder.CreatePHI(

      State.TypeAnalysis.inferScalarType(this), 2, getName());

  unsigned NumIncoming = getNumIncoming();

  if (getParent() != getParent()->getPlan()->getScalarPreheader()) {

    // TODO: Fixup all incoming values of header phis once recipes defining them

    // are introduced.

    NumIncoming = 1;

  }

  for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {

    Value *IncV = State.get(getIncomingValue(Idx), VPLane(0));

    BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));

    NewPhi->addIncoming(IncV, PredBB);

  }

  State.set(this, NewPhi, VPLane(0));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPPhi::printRecipe(raw_ostream &O, const Twine &Indent,

                        VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";

  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printPhiOperands(O, SlotTracker);

}


#endif


VPIRInstruction *VPIRInstruction ::create(Instruction &I) {

  if (auto *Phi = dyn_cast<PHINode>(&I))

    return new VPIRPhi(*Phi);

  return new VPIRInstruction(I);

}


void VPIRInstruction::execute(VPTransformState &State) {

  assert(!isa<VPIRPhi>(this) && getNumOperands() == 0 &&

         "PHINodes must be handled by VPIRPhi");

  // Advance the insert point after the wrapped IR instruction. This allows

  // interleaving VPIRInstructions and other recipes.

  State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));

}


InstructionCost VPIRInstruction::computeCost(ElementCount VF,

                                             VPCostContext &Ctx) const {

  // The recipe wraps an existing IR instruction on the border of VPlan's scope,

  // hence it does not contribute to the cost-modeling for the VPlan.

  return 0;

}


void VPIRInstruction::extractLastLaneOfLastPartOfFirstOperand(

    VPBuilder &Builder) {

  assert(isa<PHINode>(getInstruction()) &&

         "can only update exiting operands to phi nodes");

  assert(getNumOperands() > 0 && "must have at least one operand");

  VPValue *Exiting = getOperand(0);

  if (isa<VPIRValue>(Exiting))

    return;


  Exiting = Builder.createNaryOp(VPInstruction::ExtractLastPart, Exiting);

  Exiting = Builder.createNaryOp(VPInstruction::ExtractLastLane, Exiting);

  setOperand(0, Exiting);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPIRInstruction::printRecipe(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent << "IR " << I;

}


#endif


void VPIRPhi::execute(VPTransformState &State) {

  PHINode *Phi = &getIRPhi();

  for (const auto &[Idx, Op] : enumerate(operands())) {

    VPValue *ExitValue = Op;

    auto Lane = vputils::isSingleScalar(ExitValue)

                    ? VPLane::getFirstLane()

                    : VPLane::getLastLaneForVF(State.VF);

    VPBlockBase *Pred = getParent()->getPredecessors()[Idx];

    auto *PredVPBB = Pred->getExitingBasicBlock();

    BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];

    // Set insertion point in PredBB in case an extract needs to be generated.

    // TODO: Model extracts explicitly.

    State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());

    Value *V = State.get(ExitValue, VPLane(Lane));

    // If there is no existing block for PredBB in the phi, add a new incoming

    // value. Otherwise update the existing incoming value for PredBB.

    if (Phi->getBasicBlockIndex(PredBB) == -1)

      Phi->addIncoming(V, PredBB);

    else

      Phi->setIncomingValueForBlock(PredBB, V);

  }


  // Advance the insert point after the wrapped IR instruction. This allows

  // interleaving VPIRInstructions and other recipes.

  State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));

}


void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {

  VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());

  assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&

         "Number of phi operands must match number of predecessors");

  unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);

  R->removeOperand(Position);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPPhiAccessors::printPhiOperands(raw_ostream &O,

                                      VPSlotTracker &SlotTracker) const {

  interleaveComma(enumerate(getAsRecipe()->operands()), O,

                  [this, &O, &SlotTracker](auto Op) {

                    O << "[ ";

                    Op.value()->printAsOperand(O, SlotTracker);

                    O << ", ";

                    getIncomingBlock(Op.index())->printAsOperand(O);

                    O << " ]";

                  });

}


#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPIRPhi::printRecipe(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  VPIRInstruction::printRecipe(O, Indent, SlotTracker);


  if (getNumOperands() != 0) {

    O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";

    interleaveComma(incoming_values_and_blocks(), O,

                    [&O, &SlotTracker](auto Op) {

                      std::get<0>(Op)->printAsOperand(O, SlotTracker);

                      O << " from ";

                      std::get<1>(Op)->printAsOperand(O);

                    });

    O << ")";

  }

}


#endif


void VPIRMetadata::applyMetadata(Instruction &I) const {

  for (const auto &[Kind, Node] : Metadata)

    I.setMetadata(Kind, Node);

}


void VPIRMetadata::intersect(const VPIRMetadata &Other) {

  SmallVector<std::pair<unsigned, MDNode *>> MetadataIntersection;

  for (const auto &[KindA, MDA] : Metadata) {

    for (const auto &[KindB, MDB] : Other.Metadata) {

      if (KindA == KindB && MDA == MDB) {

        MetadataIntersection.emplace_back(KindA, MDA);

        break;

      }

    }

  }

  Metadata = std::move(MetadataIntersection);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPIRMetadata::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {

  const Module *M = SlotTracker.getModule();

  if (Metadata.empty() || !M)

    return;


  ArrayRef<StringRef> MDNames = SlotTracker.getMDNames();

  O << " (";

  interleaveComma(Metadata, O, [&](const auto &KindNodePair) {

    auto [Kind, Node] = KindNodePair;

    assert(Kind < MDNames.size() && !MDNames[Kind].empty() &&

           "Unexpected unnamed metadata kind");

    O << "!" << MDNames[Kind] << " ";

    Node->printAsOperand(O, M);

  });

  O << ")";

}


#endif


void VPWidenCallRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  assert(Variant != nullptr && "Can't create vector function.");


  FunctionType *VFTy = Variant->getFunctionType();

  // Add return type if intrinsic is overloaded on it.

  SmallVector<Value *, 4> Args;

  for (const auto &I : enumerate(args())) {

    Value *Arg;

    // Some vectorized function variants may also take a scalar argument,

    // e.g. linear parameters for pointers. This needs to be the scalar value

    // from the start of the respective part when interleaving.

    if (!VFTy->getParamType(I.index())->isVectorTy())

      Arg = State.get(I.value(), VPLane(0));

    else

      Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));

    Args.push_back(Arg);

  }


  auto *CI = cast_or_null<CallInst>(getUnderlyingValue());

  SmallVector<OperandBundleDef, 1> OpBundles;

  if (CI)

    CI->getOperandBundlesAsDefs(OpBundles);


  CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);

  applyFlags(*V);

  applyMetadata(*V);

  V->setCallingConv(Variant->getCallingConv());


  if (!V->getType()->isVoidTy())

    State.set(this, V);

}


InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),

                                  Variant->getFunctionType()->params(),

                                  Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenCallRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CALL ";


  Function *CalledFn = getCalledScalarFunction();

  if (CalledFn->getReturnType()->isVoidTy())

    O << "void ";

  else {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  O << "call";

  printFlags(O);

  O << " @" << CalledFn->getName() << "(";

  interleaveComma(args(), O, [&O, &SlotTracker](VPValue *Op) {

    Op->printAsOperand(O, SlotTracker);

  });

  O << ")";


  O << " (using library function";

  if (Variant->hasName())

    O << ": " << Variant->getName();

  O << ")";

}


#endif


void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");


  SmallVector<Type *, 2> TysForDecl;

  // Add return type if intrinsic is overloaded on it.

  if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1,

                                             State.TTI)) {

    Type *RetTy = toVectorizedTy(getResultType(), State.VF);

    ArrayRef<Type *> ContainedTys = getContainedTypes(RetTy);

    for (auto [Idx, Ty] : enumerate(ContainedTys)) {

      if (isVectorIntrinsicWithStructReturnOverloadAtField(VectorIntrinsicID,

                                                           Idx, State.TTI))

        TysForDecl.push_back(Ty);

    }

  }

  SmallVector<Value *, 4> Args;

  for (const auto &I : enumerate(operands())) {

    // Some intrinsics have a scalar argument - don't replace it with a

    // vector.

    Value *Arg;

    if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),

                                           State.TTI))

      Arg = State.get(I.value(), VPLane(0));

    else

      Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));

    if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),

                                               State.TTI))

      TysForDecl.push_back(Arg->getType());

    Args.push_back(Arg);

  }


  // Use vector version of the intrinsic.

  Module *M = State.Builder.GetInsertBlock()->getModule();

  Function *VectorF =

      Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);

  assert(VectorF &&

         "Can't retrieve vector intrinsic or vector-predication intrinsics.");


  auto *CI = cast_or_null<CallInst>(getUnderlyingValue());

  SmallVector<OperandBundleDef, 1> OpBundles;

  if (CI)

    CI->getOperandBundlesAsDefs(OpBundles);


  CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);


  applyFlags(*V);

  applyMetadata(*V);


  if (!V->getType()->isVoidTy())

    State.set(this, V);

}


/// Compute the cost for the intrinsic \p ID with \p Operands, produced by \p R.


static InstructionCost getCostForIntrinsics(Intrinsic::ID ID,

                                            ArrayRef<const VPValue *> Operands,

                                            const VPRecipeWithIRFlags &R,

                                            ElementCount VF,

                                            VPCostContext &Ctx) {

  // Some backends analyze intrinsic arguments to determine cost. Use the

  // underlying value for the operand if it has one. Otherwise try to use the

  // operand of the underlying call instruction, if there is one. Otherwise

  // clear Arguments.

  // TODO: Rework TTI interface to be independent of concrete IR values.

  SmallVector<const Value *> Arguments;

  for (const auto &[Idx, Op] : enumerate(Operands)) {

    auto *V = Op->getUnderlyingValue();

    if (!V) {

      if (auto *UI = dyn_cast_or_null<CallBase>(R.getUnderlyingValue())) {

        Arguments.push_back(UI->getArgOperand(Idx));

        continue;

      }

      Arguments.clear();

      break;

    }

    Arguments.push_back(V);

  }


  Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);

  Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;

  SmallVector<Type *> ParamTys;

  for (const VPValue *Op : Operands) {

    ParamTys.push_back(VF.isVector()

                           ? toVectorTy(Ctx.Types.inferScalarType(Op), VF)

                           : Ctx.Types.inferScalarType(Op));

  }


  // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.

  FastMathFlags FMF =

      R.hasFastMathFlags() ? R.getFastMathFlags() : FastMathFlags();

  IntrinsicCostAttributes CostAttrs(

      ID, RetTy, Arguments, ParamTys, FMF,

      dyn_cast_or_null<IntrinsicInst>(R.getUnderlyingValue()),

      InstructionCost::getInvalid(), &Ctx.TLI);

  return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);

}


InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,

                                                    VPCostContext &Ctx) const {

  SmallVector<const VPValue *> ArgOps(operands());

  return getCostForIntrinsics(VectorIntrinsicID, ArgOps, *this, VF, Ctx);

}


StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {

  return Intrinsic::getBaseName(VectorIntrinsicID);

}


bool VPWidenIntrinsicRecipe::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  return all_of(enumerate(operands()), [this, &Op](const auto &X) {

    auto [Idx, V] = X;

    return V != Op || isVectorIntrinsicWithScalarOpAtArg(getVectorIntrinsicID(),

                                                         Idx, nullptr);

  });

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenIntrinsicRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                         VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-INTRINSIC ";

  if (ResultTy->isVoidTy()) {

    O << "void ";

  } else {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  O << "call";

  printFlags(O);

  O << getIntrinsicName() << "(";


  interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {

    Op->printAsOperand(O, SlotTracker);

  });

  O << ")";

}


#endif


void VPHistogramRecipe::execute(VPTransformState &State) {

  IRBuilderBase &Builder = State.Builder;


  Value *Address = State.get(getOperand(0));

  Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);

  VectorType *VTy = cast<VectorType>(Address->getType());


  // The histogram intrinsic requires a mask even if the recipe doesn't;

  // if the mask operand was omitted then all lanes should be executed and

  // we just need to synthesize an all-true mask.

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask())

    Mask = State.get(VPMask);

  else

    Mask =

        Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));


  // If this is a subtract, we want to invert the increment amount. We may

  // add a separate intrinsic in future, but for now we'll try this.

  if (Opcode == Instruction::Sub)

    IncAmt = Builder.CreateNeg(IncAmt);

  else

    assert(Opcode == Instruction::Add && "only add or sub supported for now");


  State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,

                                {VTy, IncAmt->getType()},

                                {Address, IncAmt, Mask});

}


InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  // FIXME: Take the gather and scatter into account as well. For now we're

  //        generating the same cost as the fallback path, but we'll likely

  //        need to create a new TTI method for determining the cost, including

  //        whether we can use base + vec-of-smaller-indices or just

  //        vec-of-pointers.

  assert(VF.isVector() && "Invalid VF for histogram cost");

  Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));

  VPValue *IncAmt = getOperand(1);

  Type *IncTy = Ctx.Types.inferScalarType(IncAmt);

  VectorType *VTy = VectorType::get(IncTy, VF);


  // Assume that a non-constant update value (or a constant != 1) requires

  // a multiply, and add that into the cost.

  InstructionCost MulCost =

      Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);

  if (auto *CI = dyn_cast<VPConstantInt>(IncAmt))

    if (CI->isOne())

      MulCost = TTI::TCC_Free;


  // Find the cost of the histogram operation itself.

  Type *PtrTy = VectorType::get(AddressTy, VF);

  Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);

  IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,

                              Type::getVoidTy(Ctx.LLVMCtx),

                              {PtrTy, IncTy, MaskTy});


  // Add the costs together with the add/sub operation.

  return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +

         Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPHistogramRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-HISTOGRAM buckets: ";

  getOperand(0)->printAsOperand(O, SlotTracker);


  if (Opcode == Instruction::Sub)

    O << ", dec: ";

  else {

    assert(Opcode == Instruction::Add);

    O << ", inc: ";

  }

  getOperand(1)->printAsOperand(O, SlotTracker);


  if (VPValue *Mask = getMask()) {

    O << ", mask: ";

    Mask->printAsOperand(O, SlotTracker);

  }

}


#endif


VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {

  AllowReassoc = FMF.allowReassoc();

  NoNaNs = FMF.noNaNs();

  NoInfs = FMF.noInfs();

  NoSignedZeros = FMF.noSignedZeros();

  AllowReciprocal = FMF.allowReciprocal();

  AllowContract = FMF.allowContract();

  ApproxFunc = FMF.approxFunc();

}


#if !defined(NDEBUG)


bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {

  switch (OpType) {

  case OperationType::OverflowingBinOp:

    return Opcode == Instruction::Add || Opcode == Instruction::Sub ||

           Opcode == Instruction::Mul || Opcode == Instruction::Shl ||

           Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;

  case OperationType::Trunc:

    return Opcode == Instruction::Trunc;

  case OperationType::DisjointOp:

    return Opcode == Instruction::Or;

  case OperationType::PossiblyExactOp:

    return Opcode == Instruction::AShr || Opcode == Instruction::LShr ||

           Opcode == Instruction::UDiv || Opcode == Instruction::SDiv;

  case OperationType::GEPOp:

    return Opcode == Instruction::GetElementPtr ||

           Opcode == VPInstruction::PtrAdd ||

           Opcode == VPInstruction::WidePtrAdd;

  case OperationType::FPMathOp:

    return Opcode == Instruction::Call || Opcode == Instruction::FAdd ||

           Opcode == Instruction::FMul || Opcode == Instruction::FSub ||

           Opcode == Instruction::FNeg || Opcode == Instruction::FDiv ||

           Opcode == Instruction::FRem || Opcode == Instruction::FPExt ||

           Opcode == Instruction::FPTrunc || Opcode == Instruction::Select ||

           Opcode == VPInstruction::WideIVStep ||

           Opcode == VPInstruction::ReductionStartVector;

  case OperationType::FCmp:

    return Opcode == Instruction::FCmp;

  case OperationType::NonNegOp:

    return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;

  case OperationType::Cmp:

    return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;

  case OperationType::ReductionOp:

    return Opcode == VPInstruction::ComputeReductionResult ||

           Opcode == VPInstruction::ComputeFindIVResult;

  case OperationType::Other:

    return true;

  }

  llvm_unreachable("Unknown OperationType enum");

}


#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPIRFlags::printFlags(raw_ostream &O) const {

  switch (OpType) {

  case OperationType::Cmp:

    O << " " << CmpInst::getPredicateName(getPredicate());

    break;

  case OperationType::FCmp:

    O << " " << CmpInst::getPredicateName(getPredicate());

    getFastMathFlags().print(O);

    break;

  case OperationType::DisjointOp:

    if (DisjointFlags.IsDisjoint)

      O << " disjoint";

    break;

  case OperationType::PossiblyExactOp:

    if (ExactFlags.IsExact)

      O << " exact";

    break;

  case OperationType::OverflowingBinOp:

    if (WrapFlags.HasNUW)

      O << " nuw";

    if (WrapFlags.HasNSW)

      O << " nsw";

    break;

  case OperationType::Trunc:

    if (TruncFlags.HasNUW)

      O << " nuw";

    if (TruncFlags.HasNSW)

      O << " nsw";

    break;

  case OperationType::FPMathOp:

    getFastMathFlags().print(O);

    break;

  case OperationType::GEPOp:

    if (GEPFlags.isInBounds())

      O << " inbounds";

    else if (GEPFlags.hasNoUnsignedSignedWrap())

      O << " nusw";

    if (GEPFlags.hasNoUnsignedWrap())

      O << " nuw";

    break;

  case OperationType::NonNegOp:

    if (NonNegFlags.NonNeg)

      O << " nneg";

    break;

  case OperationType::ReductionOp: {

    RecurKind RK = getRecurKind();

    O << " (";

    switch (RK) {

    case RecurKind::SMax:

      O << "smax";

      break;

    case RecurKind::SMin:

      O << "smin";

      break;

    case RecurKind::UMax:

      O << "umax";

      break;

    case RecurKind::UMin:

      O << "umin";

      break;

    case RecurKind::FMinNum:

      O << "fminnum";

      break;

    case RecurKind::FMaxNum:

      O << "fmaxnum";

      break;

    case RecurKind::FMinimum:

      O << "fminimum";

      break;

    case RecurKind::FMaximum:

      O << "fmaximum";

      break;

    case RecurKind::FMinimumNum:

      O << "fminimumnum";

      break;

    case RecurKind::FMaximumNum:

      O << "fmaximumnum";

      break;

    default:

      O << Instruction::getOpcodeName(RecurrenceDescriptor::getOpcode(RK));

      break;

    }

    if (isReductionInLoop())

      O << ", in-loop";

    if (isReductionOrdered())

      O << ", ordered";

    O << ")";

    getFastMathFlags().print(O);

    break;

  }

  case OperationType::Other:

    break;

  }

  O << " ";

}


#endif


void VPWidenRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  switch (Opcode) {

  case Instruction::Call:

  case Instruction::Br:

  case Instruction::PHI:

  case Instruction::GetElementPtr:

    llvm_unreachable("This instruction is handled by a different recipe.");

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::FNeg:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor: {

    // Just widen unops and binops.

    SmallVector<Value *, 2> Ops;

    for (VPValue *VPOp : operands())

      Ops.push_back(State.get(VPOp));


    Value *V = Builder.CreateNAryOp(Opcode, Ops);


    if (auto *VecOp = dyn_cast<Instruction>(V)) {

      applyFlags(*VecOp);

      applyMetadata(*VecOp);

    }


    // Use this vector value for all users of the original instruction.

    State.set(this, V);

    break;

  }

  case Instruction::ExtractValue: {

    assert(getNumOperands() == 2 && "expected single level extractvalue");

    Value *Op = State.get(getOperand(0));

    Value *Extract = Builder.CreateExtractValue(

        Op, cast<VPConstantInt>(getOperand(1))->getZExtValue());

    State.set(this, Extract);

    break;

  }

  case Instruction::Freeze: {

    Value *Op = State.get(getOperand(0));

    Value *Freeze = Builder.CreateFreeze(Op);

    State.set(this, Freeze);

    break;

  }

  case Instruction::ICmp:

  case Instruction::FCmp: {

    // Widen compares. Generate vector compares.

    bool FCmp = Opcode == Instruction::FCmp;

    Value *A = State.get(getOperand(0));

    Value *B = State.get(getOperand(1));

    Value *C = nullptr;

    if (FCmp) {

      C = Builder.CreateFCmp(getPredicate(), A, B);

    } else {

      C = Builder.CreateICmp(getPredicate(), A, B);

    }

    if (auto *I = dyn_cast<Instruction>(C)) {

      applyFlags(*I);

      applyMetadata(*I);

    }

    State.set(this, C);

    break;

  }

  case Instruction::Select: {

    VPValue *CondOp = getOperand(0);

    Value *Cond = State.get(CondOp, vputils::isSingleScalar(CondOp));

    Value *Op0 = State.get(getOperand(1));

    Value *Op1 = State.get(getOperand(2));

    Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);

    State.set(this, Sel);

    if (auto *I = dyn_cast<Instruction>(Sel)) {

      if (isa<FPMathOperator>(I))

        applyFlags(*I);

      applyMetadata(*I);

    }

    break;

  }

  default:

    // This instruction is not vectorized by simple widening.

    LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "

                      << Instruction::getOpcodeName(Opcode));

    llvm_unreachable("Unhandled instruction!");

  } // end of switch.


#if !defined(NDEBUG)

  // Verify that VPlan type inference results agree with the type of the

  // generated values.

  assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), State.VF) ==

             State.get(this)->getType() &&

         "inferred type and type from generated instructions do not match");

#endif

}


InstructionCost VPWidenRecipe::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  switch (Opcode) {

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

    // If the div/rem operation isn't safe to speculate and requires

    // predication, then the only way we can even create a vplan is to insert

    // a select on the second input operand to ensure we use the value of 1

    // for the inactive lanes. The select will be costed separately.

  case Instruction::FNeg:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor:

  case Instruction::Freeze:

  case Instruction::ExtractValue:

  case Instruction::ICmp:

  case Instruction::FCmp:

  case Instruction::Select:

    return getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);

  default:

    llvm_unreachable("Unsupported opcode for instruction");

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode);

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPWidenCastRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  /// Vectorize casts.

  assert(State.VF.isVector() && "Not vectorizing?");

  Type *DestTy = VectorType::get(getResultType(), State.VF);

  VPValue *Op = getOperand(0);

  Value *A = State.get(Op);

  Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);

  State.set(this, Cast);

  if (auto *CastOp = dyn_cast<Instruction>(Cast)) {

    applyFlags(*CastOp);

    applyMetadata(*CastOp);

  }

}


InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  // TODO: In some cases, VPWidenCastRecipes are created but not considered in

  // the legacy cost model, including truncates/extends when evaluating a

  // reduction in a smaller type.

  if (!getUnderlyingValue())

    return 0;

  return getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenCastRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CAST ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode);

  printFlags(O);

  printOperands(O, SlotTracker);

  O << " to " << *getResultType();

}


#endif


InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenIntOrFpInductionRecipe::printRecipe(

    raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-INDUCTION";

  printFlags(O);

  printOperands(O, SlotTracker);


  if (auto *TI = getTruncInst())

    O << " (truncated to " << *TI->getType() << ")";

}


#endif


bool VPWidenIntOrFpInductionRecipe::isCanonical() const {

  // The step may be defined by a recipe in the preheader (e.g. if it requires

  // SCEV expansion), but for the canonical induction the step is required to be

  // 1, which is represented as live-in.

  auto *StepC = dyn_cast<VPConstantInt>(getStepValue());

  auto *StartC = dyn_cast<VPConstantInt>(getStartValue());

  return StartC && StartC->isZero() && StepC && StepC->isOne() &&

         getScalarType() == getRegion()->getCanonicalIVType();

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPDerivedIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = DERIVED-IV ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << " + ";

  getOperand(1)->printAsOperand(O, SlotTracker);

  O << " * ";

  getStepValue()->printAsOperand(O, SlotTracker);

}


#endif


void VPScalarIVStepsRecipe::execute(VPTransformState &State) {

  // Fast-math-flags propagate from the original induction instruction.

  IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);

  if (hasFastMathFlags())

    State.Builder.setFastMathFlags(getFastMathFlags());


  /// Compute scalar induction steps. \p ScalarIV is the scalar induction

  /// variable on which to base the steps, \p Step is the size of the step.


  Value *BaseIV = State.get(getOperand(0), VPLane(0));

  Value *Step = State.get(getStepValue(), VPLane(0));

  IRBuilderBase &Builder = State.Builder;


  // Ensure step has the same type as that of scalar IV.

  Type *BaseIVTy = BaseIV->getType()->getScalarType();

  assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");


  // We build scalar steps for both integer and floating-point induction

  // variables. Here, we determine the kind of arithmetic we will perform.

  Instruction::BinaryOps AddOp;

  Instruction::BinaryOps MulOp;

  if (BaseIVTy->isIntegerTy()) {

    AddOp = Instruction::Add;

    MulOp = Instruction::Mul;

  } else {

    AddOp = InductionOpcode;

    MulOp = Instruction::FMul;

  }


  // Determine the number of scalars we need to generate for each unroll

  // iteration.

  bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);

  // Compute the scalar steps and save the results in State.

  Type *IntStepTy =

      IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());


  unsigned StartLane = 0;

  unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();

  if (State.Lane) {

    StartLane = State.Lane->getKnownLane();

    EndLane = StartLane + 1;

  }

  Value *StartIdx0;

  if (getUnrollPart(*this) == 0)

    StartIdx0 = ConstantInt::get(IntStepTy, 0);

  else {

    StartIdx0 = State.get(getOperand(2), true);

    if (getUnrollPart(*this) != 1) {

      StartIdx0 =

          Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),

                                                        getUnrollPart(*this)));

    }

    StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);

  }


  if (BaseIVTy->isFloatingPointTy())

    StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);


  for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {

    // It is okay if the induction variable type cannot hold the lane number,

    // we expect truncation in this case.

    Constant *LaneValue =

        BaseIVTy->isIntegerTy()

            ? ConstantInt::get(BaseIVTy, Lane, /*IsSigned=*/false,

                               /*ImplicitTrunc=*/true)

            : ConstantFP::get(BaseIVTy, Lane);

    Value *StartIdx = Builder.CreateBinOp(AddOp, StartIdx0, LaneValue);

    // The step returned by `createStepForVF` is a runtime-evaluated value

    // when VF is scalable. Otherwise, it should be folded into a Constant.

    assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&

           "Expected StartIdx to be folded to a constant when VF is not "

           "scalable");

    auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);

    auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);

    State.set(this, Add, VPLane(Lane));

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPScalarIVStepsRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = SCALAR-STEPS ";

  printOperands(O, SlotTracker);

}


#endif


bool VPWidenGEPRecipe::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  return vputils::isSingleScalar(Op);

}


void VPWidenGEPRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  // Construct a vector GEP by widening the operands of the scalar GEP as

  // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP

  // results in a vector of pointers when at least one operand of the GEP

  // is vector-typed. Thus, to keep the representation compact, we only use

  // vector-typed operands for loop-varying values.


  bool AllOperandsAreInvariant = all_of(operands(), [](VPValue *Op) {

    return Op->isDefinedOutsideLoopRegions();

  });

  if (AllOperandsAreInvariant) {

    // If we are vectorizing, but the GEP has only loop-invariant operands,

    // the GEP we build (by only using vector-typed operands for

    // loop-varying values) would be a scalar pointer. Thus, to ensure we

    // produce a vector of pointers, we need to either arbitrarily pick an

    // operand to broadcast, or broadcast a clone of the original GEP.

    // Here, we broadcast a clone of the original.


    SmallVector<Value *> Ops;

    for (unsigned I = 0, E = getNumOperands(); I != E; I++)

      Ops.push_back(State.get(getOperand(I), VPLane(0)));


    auto *NewGEP =

        State.Builder.CreateGEP(getSourceElementType(), Ops[0], drop_begin(Ops),

                                "", getGEPNoWrapFlags());

    Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);

    State.set(this, Splat);

    return;

  }


  // If the GEP has at least one loop-varying operand, we are sure to

  // produce a vector of pointers unless VF is scalar.

  // The pointer operand of the new GEP. If it's loop-invariant, we

  // won't broadcast it.

  auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant());


  // Collect all the indices for the new GEP. If any index is

  // loop-invariant, we won't broadcast it.

  SmallVector<Value *, 4> Indices;

  for (unsigned I = 1, E = getNumOperands(); I < E; I++) {

    VPValue *Operand = getOperand(I);

    Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1)));

  }


  // Create the new GEP. Note that this GEP may be a scalar if VF == 1,

  // but it should be a vector, otherwise.

  auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices,

                                         "", getGEPNoWrapFlags());

  assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&

         "NewGEP is not a pointer vector");

  State.set(this, NewGEP);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                   VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-GEP ";

  O << (isPointerLoopInvariant() ? "Inv" : "Var");

  for (size_t I = 0; I < getNumOperands() - 1; ++I)

    O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";


  O << " ";

  printAsOperand(O, SlotTracker);

  O << " = getelementptr";

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPVectorEndPointerRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  unsigned CurrentPart = getUnrollPart(*this);

  const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();

  Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this));


  // The wide store needs to start at the last vector element.

  Value *RunTimeVF = State.get(getVFValue(), VPLane(0));

  if (IndexTy != RunTimeVF->getType())

    RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);

  // NumElt = Stride * CurrentPart * RunTimeVF

  Value *NumElt = Builder.CreateMul(

      ConstantInt::getSigned(IndexTy, Stride * (int64_t)CurrentPart),

      RunTimeVF);

  // LastLane = Stride * (RunTimeVF - 1)

  Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1));

  if (Stride != 1)

    LastLane =

        Builder.CreateMul(ConstantInt::getSigned(IndexTy, Stride), LastLane);

  Value *Ptr = State.get(getOperand(0), VPLane(0));

  Value *ResultPtr =

      Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());

  ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",

                                getGEPNoWrapFlags());


  State.set(this, ResultPtr, /*IsScalar*/ true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                           VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = vector-end-pointer";

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPVectorPointerRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  assert(getOffset() &&

         "Expected prior simplification of recipe without offset");

  Value *Ptr = State.get(getOperand(0), VPLane(0));

  Value *Offset = State.get(getOffset(), true);

  Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "",

                                       getGEPNoWrapFlags());

  State.set(this, ResultPtr, /*IsScalar*/ true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPVectorPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = vector-pointer";

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


InstructionCost VPBlendRecipe::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  // A blend will be expanded to a select VPInstruction, which will generate a

  // scalar select if only the first lane is used.

  if (vputils::onlyFirstLaneUsed(this))

    VF = ElementCount::getFixed(1);


  Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

  Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);

  return (getNumIncomingValues() - 1) *

         Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,

                                    CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPBlendRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "BLEND ";

  printAsOperand(O, SlotTracker);

  O << " =";

  if (getNumIncomingValues() == 1) {

    // Not a User of any mask: not really blending, this is a

    // single-predecessor phi.

    O << " ";

    getIncomingValue(0)->printAsOperand(O, SlotTracker);

  } else {

    for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {

      O << " ";

      getIncomingValue(I)->printAsOperand(O, SlotTracker);

      if (I == 0)

        continue;

      O << "/";

      getMask(I)->printAsOperand(O, SlotTracker);

    }

  }

}


#endif


void VPReductionRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Reduction being replicated.");

  RecurKind Kind = getRecurrenceKind();

  assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&

         "In-loop AnyOf reductions aren't currently supported");

  // Propagate the fast-math flags carried by the underlying instruction.

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  State.Builder.setFastMathFlags(getFastMathFlags());

  Value *NewVecOp = State.get(getVecOp());

  if (VPValue *Cond = getCondOp()) {

    Value *NewCond = State.get(Cond, State.VF.isScalar());

    VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());

    Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();


    Value *Start = getRecurrenceIdentity(Kind, ElementTy, getFastMathFlags());

    if (State.VF.isVector())

      Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);


    Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);

    NewVecOp = Select;

  }

  Value *NewRed;

  Value *NextInChain;

  if (isOrdered()) {

    Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);

    if (State.VF.isVector())

      NewRed =

          createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);

    else

      NewRed = State.Builder.CreateBinOp(

          (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),

          PrevInChain, NewVecOp);

    PrevInChain = NewRed;

    NextInChain = NewRed;

  } else if (isPartialReduction()) {

    assert(Kind == RecurKind::Add && "Unexpected partial reduction kind");

    Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ false);

    NewRed = State.Builder.CreateIntrinsic(

        PrevInChain->getType(), Intrinsic::vector_partial_reduce_add,

        {PrevInChain, NewVecOp}, nullptr, "partial.reduce");

    PrevInChain = NewRed;

    NextInChain = NewRed;

  } else {

    assert(isInLoop() &&

           "The reduction must either be ordered, partial or in-loop");

    Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);

    NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);

    if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))

      NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);

    else

      NextInChain = State.Builder.CreateBinOp(

          (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),

          PrevInChain, NewRed);

  }

  State.set(this, NextInChain, /*IsScalar*/ !isPartialReduction());

}


void VPReductionEVLRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Reduction being replicated.");


  auto &Builder = State.Builder;

  // Propagate the fast-math flags carried by the underlying instruction.

  IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);

  Builder.setFastMathFlags(getFastMathFlags());


  RecurKind Kind = getRecurrenceKind();

  Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);

  Value *VecOp = State.get(getVecOp());

  Value *EVL = State.get(getEVL(), VPLane(0));


  Value *Mask;

  if (VPValue *CondOp = getCondOp())

    Mask = State.get(CondOp);

  else

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());


  Value *NewRed;

  if (isOrdered()) {

    NewRed = createOrderedReduction(Builder, Kind, VecOp, Prev, Mask, EVL);

  } else {

    NewRed = createSimpleReduction(Builder, VecOp, Kind, Mask, EVL);

    if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))

      NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);

    else

      NewRed = Builder.CreateBinOp(

          (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind), NewRed,

          Prev);

  }

  State.set(this, NewRed, /*IsScalar*/ true);

}


InstructionCost VPReductionRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  RecurKind RdxKind = getRecurrenceKind();

  Type *ElementTy = Ctx.Types.inferScalarType(this);

  auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));

  unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);

  FastMathFlags FMFs = getFastMathFlags();

  std::optional<FastMathFlags> OptionalFMF =

      ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;


  if (isPartialReduction()) {

    InstructionCost CondCost = 0;

    if (isConditional()) {

      CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;

      auto *CondTy = cast<VectorType>(

          toVectorTy(Ctx.Types.inferScalarType(getCondOp()), VF));

      CondCost = Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy,

                                            CondTy, Pred, Ctx.CostKind);

    }

    return CondCost + Ctx.TTI.getPartialReductionCost(

                          Opcode, ElementTy, ElementTy, ElementTy, VF,

                          TargetTransformInfo::PR_None,

                          TargetTransformInfo::PR_None, std::nullopt,

                          Ctx.CostKind);

  }


  // TODO: Support any-of reductions.

  assert(

      (!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||

       ForceTargetInstructionCost.getNumOccurrences() > 0) &&

      "Any-of reduction not implemented in VPlan-based cost model currently.");


  // Note that TTI should model the cost of moving result to the scalar register

  // and the BinOp cost in the getMinMaxReductionCost().

  if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {

    Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);

    return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);

  }


  // Note that TTI should model the cost of moving result to the scalar register

  // and the BinOp cost in the getArithmeticReductionCost().

  return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,

                                            Ctx.CostKind);

}


VPExpressionRecipe::VPExpressionRecipe(

    ExpressionTypes ExpressionType,

    ArrayRef<VPSingleDefRecipe *> ExpressionRecipes)

    : VPSingleDefRecipe(VPDef::VPExpressionSC, {}, {}),

      ExpressionRecipes(ExpressionRecipes), ExpressionType(ExpressionType) {

  assert(!ExpressionRecipes.empty() && "Nothing to combine?");

  assert(

      none_of(ExpressionRecipes,

              [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&

      "expression cannot contain recipes with side-effects");


  // Maintain a copy of the expression recipes as a set of users.

  SmallPtrSet<VPUser *, 4> ExpressionRecipesAsSetOfUsers;

  for (auto *R : ExpressionRecipes)

    ExpressionRecipesAsSetOfUsers.insert(R);


  // Recipes in the expression, except the last one, must only be used by

  // (other) recipes inside the expression. If there are other users, external

  // to the expression, use a clone of the recipe for external users.

  for (VPSingleDefRecipe *R : reverse(ExpressionRecipes)) {

    if (R != ExpressionRecipes.back() &&

        any_of(R->users(), [&ExpressionRecipesAsSetOfUsers](VPUser *U) {

          return !ExpressionRecipesAsSetOfUsers.contains(U);

        })) {

      // There are users outside of the expression. Clone the recipe and use the

      // clone those external users.

      VPSingleDefRecipe *CopyForExtUsers = R->clone();

      R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers](

                                                VPUser &U, unsigned) {

        return !ExpressionRecipesAsSetOfUsers.contains(&U);

      });

      CopyForExtUsers->insertBefore(R);

    }

    if (R->getParent())

      R->removeFromParent();

  }


  // Internalize all external operands to the expression recipes. To do so,

  // create new temporary VPValues for all operands defined by a recipe outside

  // the expression. The original operands are added as operands of the

  // VPExpressionRecipe itself.

  for (auto *R : ExpressionRecipes) {

    for (const auto &[Idx, Op] : enumerate(R->operands())) {

      auto *Def = Op->getDefiningRecipe();

      if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))

        continue;

      addOperand(Op);

      LiveInPlaceholders.push_back(new VPSymbolicValue());

    }

  }


  // Replace each external operand with the first one created for it in

  // LiveInPlaceholders.

  for (auto *R : ExpressionRecipes)

    for (auto const &[LiveIn, Tmp] : zip(operands(), LiveInPlaceholders))

      R->replaceUsesOfWith(LiveIn, Tmp);

}


void VPExpressionRecipe::decompose() {

  for (auto *R : ExpressionRecipes)

    // Since the list could contain duplicates, make sure the recipe hasn't

    // already been inserted.

    if (!R->getParent())

      R->insertBefore(this);


  for (const auto &[Idx, Op] : enumerate(operands()))

    LiveInPlaceholders[Idx]->replaceAllUsesWith(Op);


  replaceAllUsesWith(ExpressionRecipes.back());

  ExpressionRecipes.clear();

}


InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,

                                                VPCostContext &Ctx) const {

  Type *RedTy = Ctx.Types.inferScalarType(this);

  auto *SrcVecTy = cast<VectorType>(

      toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));

  assert(RedTy->isIntegerTy() &&

         "VPExpressionRecipe only supports integer types currently.");

  unsigned Opcode = RecurrenceDescriptor::getOpcode(

      cast<VPReductionRecipe>(ExpressionRecipes.back())->getRecurrenceKind());

  switch (ExpressionType) {

  case ExpressionTypes::ExtendedReduction: {

    unsigned Opcode = RecurrenceDescriptor::getOpcode(

        cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());

    auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);


    return cast<VPReductionRecipe>(ExpressionRecipes.back())

                   ->isPartialReduction()

               ? Ctx.TTI.getPartialReductionCost(

                     Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr,

                     RedTy, VF,

                     TargetTransformInfo::getPartialReductionExtendKind(

                         ExtR->getOpcode()),

                     TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind)

               : Ctx.TTI.getExtendedReductionCost(

                     Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy,

                     SrcVecTy, std::nullopt, Ctx.CostKind);

  }

  case ExpressionTypes::MulAccReduction:

    return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,

                                          Ctx.CostKind);


  case ExpressionTypes::ExtNegatedMulAccReduction:

    assert(Opcode == Instruction::Add && "Unexpected opcode");

    Opcode = Instruction::Sub;

    [[fallthrough]];

  case ExpressionTypes::ExtMulAccReduction: {

    auto *RedR = cast<VPReductionRecipe>(ExpressionRecipes.back());

    if (RedR->isPartialReduction()) {

      auto *Ext0R = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

      auto *Ext1R = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);

      auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);

      return Ctx.TTI.getPartialReductionCost(

          Opcode, Ctx.Types.inferScalarType(getOperand(0)),

          Ctx.Types.inferScalarType(getOperand(1)), RedTy, VF,

          TargetTransformInfo::getPartialReductionExtendKind(

              Ext0R->getOpcode()),

          TargetTransformInfo::getPartialReductionExtendKind(

              Ext1R->getOpcode()),

          Mul->getOpcode(), Ctx.CostKind);

    }

    return Ctx.TTI.getMulAccReductionCost(

        cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==

            Instruction::ZExt,

        Opcode, RedTy, SrcVecTy, Ctx.CostKind);

  }

  }

  llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");

}


bool VPExpressionRecipe::mayReadOrWriteMemory() const {

  return any_of(ExpressionRecipes, [](VPSingleDefRecipe *R) {

    return R->mayReadFromMemory() || R->mayWriteToMemory();

  });

}


bool VPExpressionRecipe::mayHaveSideEffects() const {

  assert(

      none_of(ExpressionRecipes,

              [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&

      "expression cannot contain recipes with side-effects");

  return false;

}


bool VPExpressionRecipe::isSingleScalar() const {

  // Cannot use vputils::isSingleScalar(), because all external operands

  // of the expression will be live-ins while bundled.

  auto *RR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());

  return RR && !RR->isPartialReduction();

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPExpressionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "EXPRESSION ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());

  unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());


  switch (ExpressionType) {

  case ExpressionTypes::ExtendedReduction: {

    getOperand(1)->printAsOperand(O, SlotTracker);

    O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";

    O << Instruction::getOpcodeName(Opcode) << " (";

    getOperand(0)->printAsOperand(O, SlotTracker);

    Red->printFlags(O);


    auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

    O << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "

      << *Ext0->getResultType();

    if (Red->isConditional()) {

      O << ", ";

      Red->getCondOp()->printAsOperand(O, SlotTracker);

    }

    O << ")";

    break;

  }

  case ExpressionTypes::ExtNegatedMulAccReduction: {

    getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);

    O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";

    O << Instruction::getOpcodeName(

             RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))

      << " (sub (0, mul";

    auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);

    Mul->printFlags(O);

    O << "(";

    getOperand(0)->printAsOperand(O, SlotTracker);

    auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

    O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "

      << *Ext0->getResultType() << "), (";

    getOperand(1)->printAsOperand(O, SlotTracker);

    auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);

    O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "

      << *Ext1->getResultType() << ")";

    if (Red->isConditional()) {

      O << ", ";

      Red->getCondOp()->printAsOperand(O, SlotTracker);

    }

    O << "))";

    break;

  }

  case ExpressionTypes::MulAccReduction:

  case ExpressionTypes::ExtMulAccReduction: {

    getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);

    O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";

    O << Instruction::getOpcodeName(

             RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))

      << " (";

    O << "mul";

    bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction;

    auto *Mul = cast<VPWidenRecipe>(IsExtended ? ExpressionRecipes[2]

                                               : ExpressionRecipes[0]);

    Mul->printFlags(O);

    if (IsExtended)

      O << "(";

    getOperand(0)->printAsOperand(O, SlotTracker);

    if (IsExtended) {

      auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

      O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "

        << *Ext0->getResultType() << "), (";

    } else {

      O << ", ";

    }

    getOperand(1)->printAsOperand(O, SlotTracker);

    if (IsExtended) {

      auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);

      O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "

        << *Ext1->getResultType() << ")";

    }

    if (Red->isConditional()) {

      O << ", ";

      Red->getCondOp()->printAsOperand(O, SlotTracker);

    }

    O << ")";

    break;

  }

  }

}


void VPReductionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  if (isPartialReduction())

    O << Indent << "PARTIAL-REDUCE ";

  else

    O << Indent << "REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  getChainOp()->printAsOperand(O, SlotTracker);

  O << " +";

  printFlags(O);

  O << " reduce."

    << Instruction::getOpcodeName(

           RecurrenceDescriptor::getOpcode(getRecurrenceKind()))

    << " (";

  getVecOp()->printAsOperand(O, SlotTracker);

  if (isConditional()) {

    O << ", ";

    getCondOp()->printAsOperand(O, SlotTracker);

  }

  O << ")";

}


void VPReductionEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                       VPSlotTracker &SlotTracker) const {

  O << Indent << "REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  getChainOp()->printAsOperand(O, SlotTracker);

  O << " +";

  printFlags(O);

  O << " vp.reduce."

    << Instruction::getOpcodeName(

           RecurrenceDescriptor::getOpcode(getRecurrenceKind()))

    << " (";

  getVecOp()->printAsOperand(O, SlotTracker);

  O << ", ";

  getEVL()->printAsOperand(O, SlotTracker);

  if (isConditional()) {

    O << ", ";

    getCondOp()->printAsOperand(O, SlotTracker);

  }

  O << ")";

}


#endif


/// A helper function to scalarize a single Instruction in the innermost loop.

/// Generates a sequence of scalar instances for lane \p Lane. Uses the VPValue

/// operands from \p RepRecipe instead of \p Instr's operands.


static void scalarizeInstruction(const Instruction *Instr,

                                 VPReplicateRecipe *RepRecipe,

                                 const VPLane &Lane, VPTransformState &State) {

  assert((!Instr->getType()->isAggregateType() ||

          canVectorizeTy(Instr->getType())) &&

         "Expected vectorizable or non-aggregate type.");


  // Does this instruction return a value ?

  bool IsVoidRetTy = Instr->getType()->isVoidTy();


  Instruction *Cloned = Instr->clone();

  if (!IsVoidRetTy) {

    Cloned->setName(Instr->getName() + ".cloned");

    Type *ResultTy = State.TypeAnalysis.inferScalarType(RepRecipe);

    // The operands of the replicate recipe may have been narrowed, resulting in

    // a narrower result type. Update the type of the cloned instruction to the

    // correct type.

    if (ResultTy != Cloned->getType())

      Cloned->mutateType(ResultTy);

  }


  RepRecipe->applyFlags(*Cloned);

  RepRecipe->applyMetadata(*Cloned);


  if (RepRecipe->hasPredicate())

    cast<CmpInst>(Cloned)->setPredicate(RepRecipe->getPredicate());


  if (auto DL = RepRecipe->getDebugLoc())

    State.setDebugLocFrom(DL);


  // Replace the operands of the cloned instructions with their scalar

  // equivalents in the new loop.

  for (const auto &I : enumerate(RepRecipe->operands())) {

    auto InputLane = Lane;

    VPValue *Operand = I.value();

    if (vputils::isSingleScalar(Operand))

      InputLane = VPLane::getFirstLane();

    Cloned->setOperand(I.index(), State.get(Operand, InputLane));

  }


  // Place the cloned scalar in the new loop.

  State.Builder.Insert(Cloned);


  State.set(RepRecipe, Cloned, Lane);


  // If we just cloned a new assumption, add it the assumption cache.

  if (auto *II = dyn_cast<AssumeInst>(Cloned))

    State.AC->registerAssumption(II);


  assert(

      (RepRecipe->getRegion() ||

       !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||

       all_of(RepRecipe->operands(),

              [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&

      "Expected a recipe is either within a region or all of its operands "

      "are defined outside the vectorized region.");

}


void VPReplicateRecipe::execute(VPTransformState &State) {

  Instruction *UI = getUnderlyingInstr();


  if (!State.Lane) {

    assert(IsSingleScalar && "VPReplicateRecipes outside replicate regions "

                             "must have already been unrolled");

    scalarizeInstruction(UI, this, VPLane(0), State);

    return;

  }


  assert((State.VF.isScalar() || !isSingleScalar()) &&

         "uniform recipe shouldn't be predicated");

  assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");

  scalarizeInstruction(UI, this, *State.Lane, State);

  // Insert scalar instance packing it into a vector.

  if (State.VF.isVector() && shouldPack()) {

    Value *WideValue =

        State.Lane->isFirstLane()

            ? PoisonValue::get(toVectorizedTy(UI->getType(), State.VF))

            : State.get(this);

    State.set(this, State.packScalarIntoVectorizedValue(this, WideValue,

                                                        *State.Lane));

  }

}


bool VPReplicateRecipe::shouldPack() const {

  // Find if the recipe is used by a widened recipe via an intervening

  // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.

  return any_of(users(), [](const VPUser *U) {

    if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))

      return !vputils::onlyScalarValuesUsed(PredR);

    return false;

  });

}


/// Returns a SCEV expression for \p Ptr if it is a pointer computation for

/// which the legacy cost model computes a SCEV expression when computing the

/// address cost. Computing SCEVs for VPValues is incomplete and returns

/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In

/// those cases we fall back to the legacy cost model. Otherwise return nullptr.


static const SCEV *getAddressAccessSCEV(const VPValue *Ptr,

                                        PredicatedScalarEvolution &PSE,

                                        const Loop *L) {

  const SCEV *Addr = vputils::getSCEVExprForVPValue(Ptr, PSE, L);

  if (isa<SCEVCouldNotCompute>(Addr))

    return Addr;


  return vputils::isAddressSCEVForCost(Addr, *PSE.getSE(), L) ? Addr : nullptr;

}


/// Returns true if \p V is used as part of the address of another load or

/// store.


static bool isUsedByLoadStoreAddress(const VPUser *V) {

  SmallPtrSet<const VPUser *, 4> Seen;

  SmallVector<const VPUser *> WorkList = {V};


  while (!WorkList.empty()) {

    auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val());

    if (!Cur || !Seen.insert(Cur).second)

      continue;


    auto *Blend = dyn_cast<VPBlendRecipe>(Cur);

    // Skip blends that use V only through a compare by checking if any incoming

    // value was already visited.

    if (Blend && none_of(seq<unsigned>(0, Blend->getNumIncomingValues()),

                         [&](unsigned I) {

                           return Seen.contains(

                               Blend->getIncomingValue(I)->getDefiningRecipe());

                         }))

      continue;


    for (VPUser *U : Cur->users()) {

      if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U))

        if (InterleaveR->getAddr() == Cur)

          return true;

      if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) {

        if (RepR->getOpcode() == Instruction::Load &&

            RepR->getOperand(0) == Cur)

          return true;

        if (RepR->getOpcode() == Instruction::Store &&

            RepR->getOperand(1) == Cur)

          return true;

      }

      if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {

        if (MemR->getAddr() == Cur && MemR->isConsecutive())

          return true;

      }

    }


    // The legacy cost model only supports scalarization loads/stores with phi

    // addresses, if the phi is directly used as load/store address. Don't

    // traverse further for Blends.

    if (Blend)

      continue;


    append_range(WorkList, Cur->users());

  }

  return false;

}


InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  Instruction *UI = cast<Instruction>(getUnderlyingValue());

  // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan

  // transform, avoid computing their cost multiple times for now.

  Ctx.SkipCostComputation.insert(UI);


  if (VF.isScalable() && !isSingleScalar())

    return InstructionCost::getInvalid();


  switch (UI->getOpcode()) {

  case Instruction::Alloca:

    if (VF.isScalable())

      return InstructionCost::getInvalid();

    return Ctx.TTI.getArithmeticInstrCost(

        Instruction::Mul, Ctx.Types.inferScalarType(this), Ctx.CostKind);

  case Instruction::GetElementPtr:

    // We mark this instruction as zero-cost because the cost of GEPs in

    // vectorized code depends on whether the corresponding memory instruction

    // is scalarized or not. Therefore, we handle GEPs with the memory

    // instruction cost.

    return 0;

  case Instruction::Call: {

    auto *CalledFn =

        cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());


    SmallVector<const VPValue *> ArgOps(drop_end(operands()));

    SmallVector<Type *, 4> Tys;

    for (const VPValue *ArgOp : ArgOps)

      Tys.push_back(Ctx.Types.inferScalarType(ArgOp));


    if (CalledFn->isIntrinsic())

      // Various pseudo-intrinsics with costs of 0 are scalarized instead of

      // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.

      switch (CalledFn->getIntrinsicID()) {

      case Intrinsic::assume:

      case Intrinsic::lifetime_end:

      case Intrinsic::lifetime_start:

      case Intrinsic::sideeffect:

      case Intrinsic::pseudoprobe:

      case Intrinsic::experimental_noalias_scope_decl: {

        assert(getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,

                                    ElementCount::getFixed(1), Ctx) == 0 &&

               "scalarizing intrinsic should be free");

        return InstructionCost(0);

      }

      default:

        break;

      }


    Type *ResultTy = Ctx.Types.inferScalarType(this);

    InstructionCost ScalarCallCost =

        Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);

    if (isSingleScalar()) {

      if (CalledFn->isIntrinsic())

        ScalarCallCost = std::min(

            ScalarCallCost,

            getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,

                                 ElementCount::getFixed(1), Ctx));

      return ScalarCallCost;

    }


    return ScalarCallCost * VF.getFixedValue() +

           Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF);

  }

  case Instruction::Add:

  case Instruction::Sub:

  case Instruction::FAdd:

  case Instruction::FSub:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor:

  case Instruction::ICmp:

  case Instruction::FCmp:

    return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),

                                      Ctx) *

           (isSingleScalar() ? 1 : VF.getFixedValue());

  case Instruction::SDiv:

  case Instruction::UDiv:

  case Instruction::SRem:

  case Instruction::URem: {

    InstructionCost ScalarCost =

        getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx);

    if (isSingleScalar())

      return ScalarCost;


    ScalarCost = ScalarCost * VF.getFixedValue() +

                 Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(this),

                                              to_vector(operands()), VF);

    // If the recipe is not predicated (i.e. not in a replicate region), return

    // the scalar cost. Otherwise handle predicated cost.

    if (!getRegion()->isReplicator())

      return ScalarCost;


    // Account for the phi nodes that we will create.

    ScalarCost += VF.getFixedValue() *

                  Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

    // Scale the cost by the probability of executing the predicated blocks.

    // This assumes the predicated block for each vector lane is equally

    // likely.

    ScalarCost /= Ctx.getPredBlockCostDivisor(UI->getParent());

    return ScalarCost;

  }

  case Instruction::Load:

  case Instruction::Store: {

    // TODO: See getMemInstScalarizationCost for how to handle replicating and

    // predicated cases.

    const VPRegionBlock *ParentRegion = getRegion();

    if (ParentRegion && ParentRegion->isReplicator())

      break;


    bool IsLoad = UI->getOpcode() == Instruction::Load;

    const VPValue *PtrOp = getOperand(!IsLoad);

    const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.PSE, Ctx.L);

    if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV))

      break;


    Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));

    Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp);

    const Align Alignment = getLoadStoreAlignment(UI);

    unsigned AS = cast<PointerType>(ScalarPtrTy)->getAddressSpace();

    TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));

    InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(

        UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);


    Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);

    bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();

    bool UsedByLoadStoreAddress =

        !PreferVectorizedAddressing && isUsedByLoadStoreAddress(this);

    InstructionCost ScalarCost =

        ScalarMemOpCost +

        Ctx.TTI.getAddressComputationCost(

            PtrTy, UsedByLoadStoreAddress ? nullptr : Ctx.PSE.getSE(), PtrSCEV,

            Ctx.CostKind);

    if (isSingleScalar())

      return ScalarCost;


    SmallVector<const VPValue *> OpsToScalarize;

    Type *ResultTy = Type::getVoidTy(PtrTy->getContext());

    // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we

    // don't assign scalarization overhead in general, if the target prefers

    // vectorized addressing or the loaded value is used as part of an address

    // of another load or store.

    if (!UsedByLoadStoreAddress) {

      bool EfficientVectorLoadStore =

          Ctx.TTI.supportsEfficientVectorElementLoadStore();

      if (!(IsLoad && !PreferVectorizedAddressing) &&

          !(!IsLoad && EfficientVectorLoadStore))

        append_range(OpsToScalarize, operands());


      if (!EfficientVectorLoadStore)

        ResultTy = Ctx.Types.inferScalarType(this);

    }


    return (ScalarCost * VF.getFixedValue()) +

           Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true);

  }

  case Instruction::SExt:

  case Instruction::ZExt:

  case Instruction::FPToUI:

  case Instruction::FPToSI:

  case Instruction::FPExt:

  case Instruction::PtrToInt:

  case Instruction::PtrToAddr:

  case Instruction::IntToPtr:

  case Instruction::SIToFP:

  case Instruction::UIToFP:

  case Instruction::Trunc:

  case Instruction::FPTrunc:

  case Instruction::AddrSpaceCast: {

    return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),

                                      Ctx) *

           (isSingleScalar() ? 1 : VF.getFixedValue());

  }

  case Instruction::ExtractValue:

  case Instruction::InsertValue:

    return Ctx.TTI.getInsertExtractValueCost(getOpcode(), Ctx.CostKind);

  }


  return Ctx.getLegacyCost(UI, VF);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPReplicateRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << (IsSingleScalar ? "CLONE " : "REPLICATE ");


  if (!getUnderlyingInstr()->getType()->isVoidTy()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }

  if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {

    O << "call";

    printFlags(O);

    O << "@" << CB->getCalledFunction()->getName() << "(";

    interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),

                    O, [&O, &SlotTracker](VPValue *Op) {

                      Op->printAsOperand(O, SlotTracker);

                    });

    O << ")";

  } else {

    O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());

    printFlags(O);

    printOperands(O, SlotTracker);

  }


  if (shouldPack())

    O << " (S->V)";

}


#endif


void VPBranchOnMaskRecipe::execute(VPTransformState &State) {

  assert(State.Lane && "Branch on Mask works only on single instance.");


  VPValue *BlockInMask = getOperand(0);

  Value *ConditionBit = State.get(BlockInMask, *State.Lane);


  // Replace the temporary unreachable terminator with a new conditional branch,

  // whose two destinations will be set later when they are created.

  auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();

  assert(isa<UnreachableInst>(CurrentTerminator) &&

         "Expected to replace unreachable terminator with conditional branch.");

  auto CondBr =

      State.Builder.CreateCondBr(ConditionBit, State.CFG.PrevBB, nullptr);

  CondBr->setSuccessor(0, nullptr);

  CurrentTerminator->eraseFromParent();

}


InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,

                                                  VPCostContext &Ctx) const {

  // The legacy cost model doesn't assign costs to branches for individual

  // replicate regions. Match the current behavior in the VPlan cost model for

  // now.

  return 0;

}


void VPPredInstPHIRecipe::execute(VPTransformState &State) {

  assert(State.Lane && "Predicated instruction PHI works per instance.");

  Instruction *ScalarPredInst =

      cast<Instruction>(State.get(getOperand(0), *State.Lane));

  BasicBlock *PredicatedBB = ScalarPredInst->getParent();

  BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();

  assert(PredicatingBB && "Predicated block has no single predecessor.");

  assert(isa<VPReplicateRecipe>(getOperand(0)) &&

         "operand must be VPReplicateRecipe");


  // By current pack/unpack logic we need to generate only a single phi node: if

  // a vector value for the predicated instruction exists at this point it means

  // the instruction has vector users only, and a phi for the vector value is

  // needed. In this case the recipe of the predicated instruction is marked to

  // also do that packing, thereby "hoisting" the insert-element sequence.

  // Otherwise, a phi node for the scalar value is needed.

  if (State.hasVectorValue(getOperand(0))) {

    auto *VecI = cast<Instruction>(State.get(getOperand(0)));

    assert((isa<InsertElementInst, InsertValueInst>(VecI)) &&

           "Packed operands must generate an insertelement or insertvalue");


    // If VectorI is a struct, it will be a sequence like:

    // %1       = insertvalue %unmodified, %x, 0

    // %2       = insertvalue %1, %y, 1

    // %VectorI = insertvalue %2, %z, 2

    // To get the unmodified vector we need to look through the chain.

    if (auto *StructTy = dyn_cast<StructType>(VecI->getType()))

      for (unsigned I = 0; I < StructTy->getNumContainedTypes() - 1; I++)

        VecI = cast<InsertValueInst>(VecI->getOperand(0));


    PHINode *VPhi = State.Builder.CreatePHI(VecI->getType(), 2);

    VPhi->addIncoming(VecI->getOperand(0), PredicatingBB); // Unmodified vector.

    VPhi->addIncoming(VecI, PredicatedBB); // New vector with inserted element.

    if (State.hasVectorValue(this))

      State.reset(this, VPhi);

    else

      State.set(this, VPhi);

    // NOTE: Currently we need to update the value of the operand, so the next

    // predicated iteration inserts its generated value in the correct vector.

    State.reset(getOperand(0), VPhi);

  } else {

    if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())

      return;


    Type *PredInstType = State.TypeAnalysis.inferScalarType(getOperand(0));

    PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);

    Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),

                     PredicatingBB);

    Phi->addIncoming(ScalarPredInst, PredicatedBB);

    if (State.hasScalarValue(this, *State.Lane))

      State.reset(this, Phi, *State.Lane);

    else

      State.set(this, Phi, *State.Lane);

    // NOTE: Currently we need to update the value of the operand, so the next

    // predicated iteration inserts its generated value in the correct vector.

    State.reset(getOperand(0), Phi, *State.Lane);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPPredInstPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                      VPSlotTracker &SlotTracker) const {

  O << Indent << "PHI-PREDICATED-INSTRUCTION ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  printOperands(O, SlotTracker);

}


#endif


InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,

                                                 VPCostContext &Ctx) const {

  Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);

  unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))

                    ->getAddressSpace();

  unsigned Opcode = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this)

                        ? Instruction::Load

                        : Instruction::Store;


  if (!Consecutive) {

    // TODO: Using the original IR may not be accurate.

    // Currently, ARM will use the underlying IR to calculate gather/scatter

    // instruction cost.

    assert(!Reverse &&

           "Inconsecutive memory access should not have the order.");


    const Value *Ptr = getLoadStorePointerOperand(&Ingredient);

    Type *PtrTy = Ptr->getType();


    // If the address value is uniform across all lanes, then the address can be

    // calculated with scalar type and broadcast.

    if (!vputils::isSingleScalar(getAddr()))

      PtrTy = toVectorTy(PtrTy, VF);


    unsigned IID = isa<VPWidenLoadRecipe>(this)      ? Intrinsic::masked_gather

                   : isa<VPWidenStoreRecipe>(this)   ? Intrinsic::masked_scatter

                   : isa<VPWidenLoadEVLRecipe>(this) ? Intrinsic::vp_gather

                                                     : Intrinsic::vp_scatter;

    return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,

                                             Ctx.CostKind) +

           Ctx.TTI.getMemIntrinsicInstrCost(

               MemIntrinsicCostAttributes(IID, Ty, Ptr, IsMasked, Alignment,

                                          &Ingredient),

               Ctx.CostKind);

  }


  InstructionCost Cost = 0;

  if (IsMasked) {

    unsigned IID = isa<VPWidenLoadRecipe>(this) ? Intrinsic::masked_load

                                                : Intrinsic::masked_store;

    Cost += Ctx.TTI.getMemIntrinsicInstrCost(

        MemIntrinsicCostAttributes(IID, Ty, Alignment, AS), Ctx.CostKind);

  } else {

    TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(

        isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)

                                                           : getOperand(1));

    Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind,

                                    OpInfo, &Ingredient);

  }

  return Cost;

}


void VPWidenLoadRecipe::execute(VPTransformState &State) {

  Type *ScalarDataTy = getLoadStoreType(&Ingredient);

  auto *DataTy = VectorType::get(ScalarDataTy, State.VF);

  bool CreateGather = !isConsecutive();


  auto &Builder = State.Builder;

  Value *Mask = nullptr;

  if (auto *VPMask = getMask()) {

    // Mask reversal is only needed for non-all-one (null) masks, as reverse

    // of a null all-one mask is a null mask.

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = Builder.CreateVectorReverse(Mask, "reverse");

  }


  Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);

  Value *NewLI;

  if (CreateGather) {

    NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,

                                       "wide.masked.gather");

  } else if (Mask) {

    NewLI =

        Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,

                                 PoisonValue::get(DataTy), "wide.masked.load");

  } else {

    NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");

  }

  applyMetadata(*cast<Instruction>(NewLI));

  State.set(this, NewLI);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenLoadRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = load ";

  printOperands(O, SlotTracker);

}


#endif


/// Use all-true mask for reverse rather than actual mask, as it avoids a

/// dependence w/o affecting the result.


static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,

                                     Value *EVL, const Twine &Name) {

  VectorType *ValTy = cast<VectorType>(Operand->getType());

  Value *AllTrueMask =

      Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());

  return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,

                                 {Operand, AllTrueMask, EVL}, nullptr, Name);

}


void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {

  Type *ScalarDataTy = getLoadStoreType(&Ingredient);

  auto *DataTy = VectorType::get(ScalarDataTy, State.VF);

  bool CreateGather = !isConsecutive();


  auto &Builder = State.Builder;

  CallInst *NewLI;

  Value *EVL = State.get(getEVL(), VPLane(0));

  Value *Addr = State.get(getAddr(), !CreateGather);

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask()) {

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");

  } else {

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());

  }


  if (CreateGather) {

    NewLI =

        Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},

                                nullptr, "wide.masked.gather");

  } else {

    NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_load,

                                    {Addr, Mask, EVL}, nullptr, "vp.op.load");

  }

  NewLI->addParamAttr(

      0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));

  applyMetadata(*NewLI);

  Instruction *Res = NewLI;

  State.set(this, Res);

}


InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,

                                                  VPCostContext &Ctx) const {

  if (!Consecutive || IsMasked)

    return VPWidenMemoryRecipe::computeCost(VF, Ctx);


  // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()

  // here because the EVL recipes using EVL to replace the tail mask. But in the

  // legacy model, it will always calculate the cost of mask.

  // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost  when we

  // don't need to compare to the legacy cost model.

  Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);

  unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))

                    ->getAddressSpace();

  return Ctx.TTI.getMemIntrinsicInstrCost(

      MemIntrinsicCostAttributes(Intrinsic::vp_load, Ty, Alignment, AS),

      Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenLoadEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                       VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = vp.load ";

  printOperands(O, SlotTracker);

}


#endif


void VPWidenStoreRecipe::execute(VPTransformState &State) {

  VPValue *StoredVPValue = getStoredValue();

  bool CreateScatter = !isConsecutive();


  auto &Builder = State.Builder;


  Value *Mask = nullptr;

  if (auto *VPMask = getMask()) {

    // Mask reversal is only needed for non-all-one (null) masks, as reverse

    // of a null all-one mask is a null mask.

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = Builder.CreateVectorReverse(Mask, "reverse");

  }


  Value *StoredVal = State.get(StoredVPValue);

  Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);

  Instruction *NewSI = nullptr;

  if (CreateScatter)

    NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);

  else if (Mask)

    NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);

  else

    NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);

  applyMetadata(*NewSI);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenStoreRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN store ";

  printOperands(O, SlotTracker);

}


#endif


void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {

  VPValue *StoredValue = getStoredValue();

  bool CreateScatter = !isConsecutive();


  auto &Builder = State.Builder;


  CallInst *NewSI = nullptr;

  Value *StoredVal = State.get(StoredValue);

  Value *EVL = State.get(getEVL(), VPLane(0));

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask()) {

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");

  } else {

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());

  }

  Value *Addr = State.get(getAddr(), !CreateScatter);

  if (CreateScatter) {

    NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),

                                    Intrinsic::vp_scatter,

                                    {StoredVal, Addr, Mask, EVL});

  } else {

    NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),

                                    Intrinsic::vp_store,

                                    {StoredVal, Addr, Mask, EVL});

  }

  NewSI->addParamAttr(

      1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));

  applyMetadata(*NewSI);

}


InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,

                                                   VPCostContext &Ctx) const {

  if (!Consecutive || IsMasked)

    return VPWidenMemoryRecipe::computeCost(VF, Ctx);


  // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()

  // here because the EVL recipes using EVL to replace the tail mask. But in the

  // legacy model, it will always calculate the cost of mask.

  // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we

  // don't need to compare to the legacy cost model.

  Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);

  unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))

                    ->getAddressSpace();

  return Ctx.TTI.getMemIntrinsicInstrCost(

      MemIntrinsicCostAttributes(Intrinsic::vp_store, Ty, Alignment, AS),

      Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenStoreEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN vp.store ";

  printOperands(O, SlotTracker);

}


#endif


static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V,

                                     VectorType *DstVTy, const DataLayout &DL) {

  // Verify that V is a vector type with same number of elements as DstVTy.

  auto VF = DstVTy->getElementCount();

  auto *SrcVecTy = cast<VectorType>(V->getType());

  assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");

  Type *SrcElemTy = SrcVecTy->getElementType();

  Type *DstElemTy = DstVTy->getElementType();

  assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&

         "Vector elements must have same size");


  // Do a direct cast if element types are castable.

  if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {

    return Builder.CreateBitOrPointerCast(V, DstVTy);

  }

  // V cannot be directly casted to desired vector type.

  // May happen when V is a floating point vector but DstVTy is a vector of

  // pointers or vice-versa. Handle this using a two-step bitcast using an

  // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.

  assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&

         "Only one type should be a pointer type");

  assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&

         "Only one type should be a floating point type");

  Type *IntTy =

      IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));

  auto *VecIntTy = VectorType::get(IntTy, VF);

  Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);

  return Builder.CreateBitOrPointerCast(CastVal, DstVTy);

}


/// Return a vector containing interleaved elements from multiple

/// smaller input vectors.


static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,

                                const Twine &Name) {

  unsigned Factor = Vals.size();

  assert(Factor > 1 && "Tried to interleave invalid number of vectors");


  VectorType *VecTy = cast<VectorType>(Vals[0]->getType());

#ifndef NDEBUG

  for (Value *Val : Vals)

    assert(Val->getType() == VecTy && "Tried to interleave mismatched types");

#endif


  // Scalable vectors cannot use arbitrary shufflevectors (only splats), so

  // must use intrinsics to interleave.

  if (VecTy->isScalableTy()) {

    assert(Factor <= 8 && "Unsupported interleave factor for scalable vectors");

    return Builder.CreateVectorInterleave(Vals, Name);

  }


  // Fixed length. Start by concatenating all vectors into a wide vector.

  Value *WideVec = concatenateVectors(Builder, Vals);


  // Interleave the elements into the wide vector.

  const unsigned NumElts = VecTy->getElementCount().getFixedValue();

  return Builder.CreateShuffleVector(

      WideVec, createInterleaveMask(NumElts, Factor), Name);

}


// Try to vectorize the interleave group that \p Instr belongs to.

//

// E.g. Translate following interleaved load group (factor = 3):

//   for (i = 0; i < N; i+=3) {

//     R = Pic[i];             // Member of index 0

//     G = Pic[i+1];           // Member of index 1

//     B = Pic[i+2];           // Member of index 2

//     ... // do something to R, G, B

//   }

// To:

//   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B

//   %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9>   ; R elements

//   %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10>  ; G elements

//   %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11>  ; B elements

//

// Or translate following interleaved store group (factor = 3):

//   for (i = 0; i < N; i+=3) {

//     ... do something to R, G, B

//     Pic[i]   = R;           // Member of index 0

//     Pic[i+1] = G;           // Member of index 1

//     Pic[i+2] = B;           // Member of index 2

//   }

// To:

//   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>

//   %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>

//   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,

//        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements

//   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B


void VPInterleaveRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Interleave group being replicated.");

  assert((!needsMaskForGaps() || !State.VF.isScalable()) &&

         "Masking gaps for scalable vectors is not yet supported.");

  const InterleaveGroup<Instruction> *Group = getInterleaveGroup();

  Instruction *Instr = Group->getInsertPos();


  // Prepare for the vector type of the interleaved load/store.

  Type *ScalarTy = getLoadStoreType(Instr);

  unsigned InterleaveFactor = Group->getFactor();

  auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);


  VPValue *BlockInMask = getMask();

  VPValue *Addr = getAddr();

  Value *ResAddr = State.get(Addr, VPLane(0));


  auto CreateGroupMask = [&BlockInMask, &State,

                          &InterleaveFactor](Value *MaskForGaps) -> Value * {

    if (State.VF.isScalable()) {

      assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");

      assert(InterleaveFactor <= 8 &&

             "Unsupported deinterleave factor for scalable vectors");

      auto *ResBlockInMask = State.get(BlockInMask);

      SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);

      return interleaveVectors(State.Builder, Ops, "interleaved.mask");

    }


    if (!BlockInMask)

      return MaskForGaps;


    Value *ResBlockInMask = State.get(BlockInMask);

    Value *ShuffledMask = State.Builder.CreateShuffleVector(

        ResBlockInMask,

        createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),

        "interleaved.mask");

    return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,

                                                   ShuffledMask, MaskForGaps)

                       : ShuffledMask;

  };


  const DataLayout &DL = Instr->getDataLayout();

  // Vectorize the interleaved load group.

  if (isa<LoadInst>(Instr)) {

    Value *MaskForGaps = nullptr;

    if (needsMaskForGaps()) {

      MaskForGaps =

          createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);

      assert(MaskForGaps && "Mask for Gaps is required but it is null");

    }


    Instruction *NewLoad;

    if (BlockInMask || MaskForGaps) {

      Value *GroupMask = CreateGroupMask(MaskForGaps);

      Value *PoisonVec = PoisonValue::get(VecTy);

      NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,

                                               Group->getAlign(), GroupMask,

                                               PoisonVec, "wide.masked.vec");

    } else

      NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,

                                                Group->getAlign(), "wide.vec");

    applyMetadata(*NewLoad);

    // TODO: Also manage existing metadata using VPIRMetadata.

    Group->addMetadata(NewLoad);


    ArrayRef<VPRecipeValue *> VPDefs = definedValues();

    if (VecTy->isScalableTy()) {

      // Scalable vectors cannot use arbitrary shufflevectors (only splats),

      // so must use intrinsics to deinterleave.

      assert(InterleaveFactor <= 8 &&

             "Unsupported deinterleave factor for scalable vectors");

      NewLoad = State.Builder.CreateIntrinsic(

          Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),

          NewLoad->getType(), NewLoad,

          /*FMFSource=*/nullptr, "strided.vec");

    }


    auto CreateStridedVector = [&InterleaveFactor, &State,

                                &NewLoad](unsigned Index) -> Value * {

      assert(Index < InterleaveFactor && "Illegal group index");

      if (State.VF.isScalable())

        return State.Builder.CreateExtractValue(NewLoad, Index);


      // For fixed length VF, use shuffle to extract the sub-vectors from the

      // wide load.

      auto StrideMask =

          createStrideMask(Index, InterleaveFactor, State.VF.getFixedValue());

      return State.Builder.CreateShuffleVector(NewLoad, StrideMask,

                                               "strided.vec");

    };


    for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {

      Instruction *Member = Group->getMember(I);


      // Skip the gaps in the group.

      if (!Member)

        continue;


      Value *StridedVec = CreateStridedVector(I);


      // If this member has different type, cast the result type.

      if (Member->getType() != ScalarTy) {

        VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

        StridedVec =

            createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);

      }


      if (Group->isReverse())

        StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");


      State.set(VPDefs[J], StridedVec);

      ++J;

    }

    return;

  }


  // The sub vector type for current instruction.

  auto *SubVT = VectorType::get(ScalarTy, State.VF);


  // Vectorize the interleaved store group.

  Value *MaskForGaps =

      createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);

  assert(((MaskForGaps != nullptr) == needsMaskForGaps()) &&

         "Mismatch between NeedsMaskForGaps and MaskForGaps");

  ArrayRef<VPValue *> StoredValues = getStoredValues();

  // Collect the stored vector from each member.

  SmallVector<Value *, 4> StoredVecs;

  unsigned StoredIdx = 0;

  for (unsigned i = 0; i < InterleaveFactor; i++) {

    assert((Group->getMember(i) || MaskForGaps) &&

           "Fail to get a member from an interleaved store group");

    Instruction *Member = Group->getMember(i);


    // Skip the gaps in the group.

    if (!Member) {

      Value *Undef = PoisonValue::get(SubVT);

      StoredVecs.push_back(Undef);

      continue;

    }


    Value *StoredVec = State.get(StoredValues[StoredIdx]);

    ++StoredIdx;


    if (Group->isReverse())

      StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");


    // If this member has different type, cast it to a unified type.


    if (StoredVec->getType() != SubVT)

      StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);


    StoredVecs.push_back(StoredVec);

  }


  // Interleave all the smaller vectors into one wider vector.

  Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");

  Instruction *NewStoreInstr;

  if (BlockInMask || MaskForGaps) {

    Value *GroupMask = CreateGroupMask(MaskForGaps);

    NewStoreInstr = State.Builder.CreateMaskedStore(

        IVec, ResAddr, Group->getAlign(), GroupMask);

  } else

    NewStoreInstr =

        State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());


  applyMetadata(*NewStoreInstr);

  // TODO: Also manage existing metadata using VPIRMetadata.

  Group->addMetadata(NewStoreInstr);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInterleaveRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  const InterleaveGroup<Instruction> *IG = getInterleaveGroup();

  O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";

  IG->getInsertPos()->printAsOperand(O, false);

  O << ", ";

  getAddr()->printAsOperand(O, SlotTracker);

  VPValue *Mask = getMask();

  if (Mask) {

    O << ", ";

    Mask->printAsOperand(O, SlotTracker);

  }


  unsigned OpIdx = 0;

  for (unsigned i = 0; i < IG->getFactor(); ++i) {

    if (!IG->getMember(i))

      continue;

    if (getNumStoreOperands() > 0) {

      O << "\n" << Indent << "  store ";

      getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);

      O << " to index " << i;

    } else {

      O << "\n" << Indent << "  ";

      getVPValue(OpIdx)->printAsOperand(O, SlotTracker);

      O << " = load from index " << i;

    }

    ++OpIdx;

  }

}


#endif


void VPInterleaveEVLRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Interleave group being replicated.");

  assert(State.VF.isScalable() &&

         "Only support scalable VF for EVL tail-folding.");

  assert(!needsMaskForGaps() &&

         "Masking gaps for scalable vectors is not yet supported.");

  const InterleaveGroup<Instruction> *Group = getInterleaveGroup();

  Instruction *Instr = Group->getInsertPos();


  // Prepare for the vector type of the interleaved load/store.

  Type *ScalarTy = getLoadStoreType(Instr);

  unsigned InterleaveFactor = Group->getFactor();

  assert(InterleaveFactor <= 8 &&

         "Unsupported deinterleave/interleave factor for scalable vectors");

  ElementCount WideVF = State.VF * InterleaveFactor;

  auto *VecTy = VectorType::get(ScalarTy, WideVF);


  VPValue *Addr = getAddr();

  Value *ResAddr = State.get(Addr, VPLane(0));

  Value *EVL = State.get(getEVL(), VPLane(0));

  Value *InterleaveEVL = State.Builder.CreateMul(

      EVL, ConstantInt::get(EVL->getType(), InterleaveFactor), "interleave.evl",

      /* NUW= */ true, /* NSW= */ true);

  LLVMContext &Ctx = State.Builder.getContext();


  Value *GroupMask = nullptr;

  if (VPValue *BlockInMask = getMask()) {

    SmallVector<Value *> Ops(InterleaveFactor, State.get(BlockInMask));

    GroupMask = interleaveVectors(State.Builder, Ops, "interleaved.mask");

  } else {

    GroupMask =

        State.Builder.CreateVectorSplat(WideVF, State.Builder.getTrue());

  }


  // Vectorize the interleaved load group.

  if (isa<LoadInst>(Instr)) {

    CallInst *NewLoad = State.Builder.CreateIntrinsic(

        VecTy, Intrinsic::vp_load, {ResAddr, GroupMask, InterleaveEVL}, nullptr,

        "wide.vp.load");

    NewLoad->addParamAttr(0,

                          Attribute::getWithAlignment(Ctx, Group->getAlign()));


    applyMetadata(*NewLoad);

    // TODO: Also manage existing metadata using VPIRMetadata.

    Group->addMetadata(NewLoad);


    // Scalable vectors cannot use arbitrary shufflevectors (only splats),

    // so must use intrinsics to deinterleave.

    NewLoad = State.Builder.CreateIntrinsic(

        Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),

        NewLoad->getType(), NewLoad,

        /*FMFSource=*/nullptr, "strided.vec");


    const DataLayout &DL = Instr->getDataLayout();

    for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {

      Instruction *Member = Group->getMember(I);

      // Skip the gaps in the group.

      if (!Member)

        continue;


      Value *StridedVec = State.Builder.CreateExtractValue(NewLoad, I);

      // If this member has different type, cast the result type.

      if (Member->getType() != ScalarTy) {

        VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

        StridedVec =

            createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);

      }


      State.set(getVPValue(J), StridedVec);

      ++J;

    }

    return;

  } // End for interleaved load.


  // The sub vector type for current instruction.

  auto *SubVT = VectorType::get(ScalarTy, State.VF);

  // Vectorize the interleaved store group.

  ArrayRef<VPValue *> StoredValues = getStoredValues();

  // Collect the stored vector from each member.

  SmallVector<Value *, 4> StoredVecs;

  const DataLayout &DL = Instr->getDataLayout();

  for (unsigned I = 0, StoredIdx = 0; I < InterleaveFactor; I++) {

    Instruction *Member = Group->getMember(I);

    // Skip the gaps in the group.

    if (!Member) {

      StoredVecs.push_back(PoisonValue::get(SubVT));

      continue;

    }


    Value *StoredVec = State.get(StoredValues[StoredIdx]);

    // If this member has different type, cast it to a unified type.

    if (StoredVec->getType() != SubVT)

      StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);


    StoredVecs.push_back(StoredVec);

    ++StoredIdx;

  }


  // Interleave all the smaller vectors into one wider vector.

  Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");

  CallInst *NewStore =

      State.Builder.CreateIntrinsic(Type::getVoidTy(Ctx), Intrinsic::vp_store,

                                    {IVec, ResAddr, GroupMask, InterleaveEVL});

  NewStore->addParamAttr(1,

                         Attribute::getWithAlignment(Ctx, Group->getAlign()));


  applyMetadata(*NewStore);

  // TODO: Also manage existing metadata using VPIRMetadata.

  Group->addMetadata(NewStore);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInterleaveEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  const InterleaveGroup<Instruction> *IG = getInterleaveGroup();

  O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";

  IG->getInsertPos()->printAsOperand(O, false);

  O << ", ";

  getAddr()->printAsOperand(O, SlotTracker);

  O << ", ";

  getEVL()->printAsOperand(O, SlotTracker);

  if (VPValue *Mask = getMask()) {

    O << ", ";

    Mask->printAsOperand(O, SlotTracker);

  }


  unsigned OpIdx = 0;

  for (unsigned i = 0; i < IG->getFactor(); ++i) {

    if (!IG->getMember(i))

      continue;

    if (getNumStoreOperands() > 0) {

      O << "\n" << Indent << "  vp.store ";

      getOperand(2 + OpIdx)->printAsOperand(O, SlotTracker);

      O << " to index " << i;

    } else {

      O << "\n" << Indent << "  ";

      getVPValue(OpIdx)->printAsOperand(O, SlotTracker);

      O << " = vp.load from index " << i;

    }

    ++OpIdx;

  }

}


#endif


InstructionCost VPInterleaveBase::computeCost(ElementCount VF,

                                              VPCostContext &Ctx) const {

  Instruction *InsertPos = getInsertPos();

  // Find the VPValue index of the interleave group. We need to skip gaps.

  unsigned InsertPosIdx = 0;

  for (unsigned Idx = 0; IG->getFactor(); ++Idx)

    if (auto *Member = IG->getMember(Idx)) {

      if (Member == InsertPos)

        break;

      InsertPosIdx++;

    }

  Type *ValTy = Ctx.Types.inferScalarType(

      getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)

                                : getStoredValues()[InsertPosIdx]);

  auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));

  unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))

                    ->getAddressSpace();


  unsigned InterleaveFactor = IG->getFactor();

  auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);


  // Holds the indices of existing members in the interleaved group.

  SmallVector<unsigned, 4> Indices;

  for (unsigned IF = 0; IF < InterleaveFactor; IF++)

    if (IG->getMember(IF))

      Indices.push_back(IF);


  // Calculate the cost of the whole interleaved group.

  InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost(

      InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,

      IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);


  if (!IG->isReverse())

    return Cost;


  return Cost + IG->getNumMembers() *

                    Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,

                                           VectorTy, VectorTy, {}, Ctx.CostKind,

                                           0);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPCanonicalIVPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                         VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = CANONICAL-INDUCTION ";

  printOperands(O, SlotTracker);

}


#endif


bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {

  return vputils::onlyScalarValuesUsed(this) &&

         (!IsScalable || vputils::onlyFirstLaneUsed(this));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenPointerInductionRecipe::printRecipe(

    raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {

  assert((getNumOperands() == 3 || getNumOperands() == 5) &&

         "unexpected number of operands");

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-POINTER-INDUCTION ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << ", ";

  getStepValue()->printAsOperand(O, SlotTracker);

  O << ", ";

  getOperand(2)->printAsOperand(O, SlotTracker);

  if (getNumOperands() == 5) {

    O << ", ";

    getOperand(3)->printAsOperand(O, SlotTracker);

    O << ", ";

    getOperand(4)->printAsOperand(O, SlotTracker);

  }

}


void VPExpandSCEVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = EXPAND SCEV " << *Expr;

}


#endif


void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {

  Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);

  Type *STy = CanonicalIV->getType();

  IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());

  ElementCount VF = State.VF;

  Value *VStart = VF.isScalar()

                      ? CanonicalIV

                      : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");

  Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));

  if (VF.isVector()) {

    VStep = Builder.CreateVectorSplat(VF, VStep);

    VStep =

        Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));

  }

  Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");

  State.set(this, CanonicalVectorIV);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenCanonicalIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                           VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-CANONICAL-INDUCTION ";

  printOperands(O, SlotTracker);

}


#endif


void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  // Create a vector from the initial value.

  auto *VectorInit = getStartValue()->getLiveInIRValue();


  Type *VecTy = State.VF.isScalar()

                    ? VectorInit->getType()

                    : VectorType::get(VectorInit->getType(), State.VF);


  BasicBlock *VectorPH =

      State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));

  if (State.VF.isVector()) {

    auto *IdxTy = Builder.getInt32Ty();

    auto *One = ConstantInt::get(IdxTy, 1);

    IRBuilder<>::InsertPointGuard Guard(Builder);

    Builder.SetInsertPoint(VectorPH->getTerminator());

    auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);

    auto *LastIdx = Builder.CreateSub(RuntimeVF, One);

    VectorInit = Builder.CreateInsertElement(

        PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");

  }


  // Create a phi node for the new recurrence.

  PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");

  Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());

  Phi->addIncoming(VectorInit, VectorPH);

  State.set(this, Phi);

}


InstructionCost


VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,

                                             VPCostContext &Ctx) const {

  if (VF.isScalar())

    return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);


  return 0;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPFirstOrderRecurrencePHIRecipe::printRecipe(

    raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {

  O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";

  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}


#endif


void VPReductionPHIRecipe::execute(VPTransformState &State) {

  // Reductions do not have to start at zero. They can start with

  // any loop invariant values.

  VPValue *StartVPV = getStartValue();


  // In order to support recurrences we need to be able to vectorize Phi nodes.

  // Phi nodes have cycles, so we need to vectorize them in two stages. This is

  // stage #1: We create a new vector PHI node with no incoming edges. We'll use

  // this value when we vectorize all of the instructions that use the PHI.

  BasicBlock *VectorPH =

      State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));

  bool ScalarPHI = State.VF.isScalar() || isInLoop();

  Value *StartV = State.get(StartVPV, ScalarPHI);

  Type *VecTy = StartV->getType();


  BasicBlock *HeaderBB = State.CFG.PrevBB;

  assert(State.CurrentParentLoop->getHeader() == HeaderBB &&

         "recipe must be in the vector loop header");

  auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");

  Phi->insertBefore(HeaderBB->getFirstInsertionPt());

  State.set(this, Phi, isInLoop());


  Phi->addIncoming(StartV, VectorPH);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPReductionPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                       VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-REDUCTION-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

  if (getVFScaleFactor() > 1)

    O << " (VF scaled by 1/" << getVFScaleFactor() << ")";

}


#endif


void VPWidenPHIRecipe::execute(VPTransformState &State) {

  Value *Op0 = State.get(getOperand(0));

  Type *VecTy = Op0->getType();

  Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);

  State.set(this, VecPhi);

}


InstructionCost VPWidenPHIRecipe::computeCost(ElementCount VF,

                                              VPCostContext &Ctx) const {

  return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                   VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printPhiOperands(O, SlotTracker);

}


#endif


void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {

  BasicBlock *VectorPH =

      State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));

  Value *StartMask = State.get(getOperand(0));

  PHINode *Phi =

      State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");

  Phi->addIncoming(StartMask, VectorPH);

  State.set(this, Phi);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPActiveLaneMaskPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                            VPSlotTracker &SlotTracker) const {

  O << Indent << "ACTIVE-LANE-MASK-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}


#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPEVLBasedIVPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}


#endif


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

addOperand
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Definition AMDGPUDisassembler.cpp:81

Arguments
AMDGPU Lower Kernel Arguments
Definition AMDGPULowerKernelArguments.cpp:253

Select
AMDGPU Register Bank Select
Definition AMDGPURegBankSelect.cpp:68

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

AssumptionCache.h

BasicBlockUtils.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

CommandLine.h

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:52

IRBuilder.h

BasicBlock.h

Instruction.h

Type.h

Value.h

IVDescriptors.h

users
iv users
Definition IVUsers.cpp:48

Instructions.h

getMask
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
Definition InterleavedAccessPass.cpp:588

Intrinsics.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3370

LoopInfo.h

LoopUtils.h

LoopVectorizationPlanner.h
This file provides a LoopVectorizationPlanner class.

getAddressAccessSCEV
static const SCEV * getAddressAccessSCEV(Value *Ptr, LoopVectorizationLegality *Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop)
Gets Address Access SCEV after verifying that the access pattern is loop invariant except the inducti...
Definition LoopVectorize.cpp:5217

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

isOrdered
static bool isOrdered(const Instruction *I)
Definition MemorySSA.cpp:1745

OpIdx
MachineInstr unsigned OpIdx
Definition NVPTXPrologEpilogPass.cpp:56

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:73

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScalarEvolutionExpressions.h

SmallVector.h
This file defines the SmallVector class.

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39

Twine.h

VPlanAnalysis.h

VPlanHelpers.h
This file contains the declarations of different VPlan-related auxiliary helpers.

VPlanPatternMatch.h

createReverseEVL
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
Definition VPlanRecipes.cpp:3707

interleaveVectors
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
Definition VPlanRecipes.cpp:3902

getCostForIntrinsics
static InstructionCost getCostForIntrinsics(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost for the intrinsic ID with Operands, produced by R.
Definition VPlanRecipes.cpp:1901

createBitOrPointerCast
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
Definition VPlanRecipes.cpp:3870

VectorParts
SmallVector< Value *, 2 > VectorParts
Definition VPlanRecipes.cpp:45

isUsedByLoadStoreAddress
static bool isUsedByLoadStoreAddress(const VPUser *V)
Returns true if V is used as part of the address of another load or store.
Definition VPlanRecipes.cpp:3252

scalarizeInstruction
static void scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State)
A helper function to scalarize a single Instruction in the innermost loop.
Definition VPlanRecipes.cpp:3142

getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247

VPlanUtils.h

VPlan.h
This file contains the declarations of the Vectorization Plan base classes:

IV
static const uint32_t IV[8]
Definition blake3_impl.h:83

Node
Definition ItaniumDemangle.h:166

Node::printAsOperand
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
Definition ItaniumDemangle.h:275

VectorType
Definition ItaniumDemangle.h:1189

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142

llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137

llvm::Attribute::getWithAlignment
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition Attributes.cpp:234

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::getFirstInsertionPt
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition BasicBlock.cpp:393

llvm::BasicBlock::getFirstNonPHIIt
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition BasicBlock.cpp:337

llvm::BasicBlock::getSinglePredecessor
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition BasicBlock.cpp:437

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233

llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition Instructions.h:3151

llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition InstrTypes.h:1504

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1511

llvm::CastInst::isBitOrNoopPointerCastable
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Definition Instructions.cpp:3189

llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676

llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition InstrTypes.h:709

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:697

llvm::CmpInst::getPredicateName
static LLVM_ABI StringRef getPredicateName(Predicate P)
Definition Instructions.cpp:3625

llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition CmpPredicate.h:23

llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getNullValue
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition Constants.cpp:390

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::DebugLoc
A debug info location.
Definition DebugLoc.h:123

llvm::ElementCount
Definition TypeSize.h:298

llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324

llvm::ElementCount::getScalable
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22

llvm::FastMathFlags::print
LLVM_ABI void print(raw_ostream &O) const
Print fast-math flags to O.
Definition Operator.cpp:283

llvm::FastMathFlags::setAllowContract
void setAllowContract(bool B=true)
Definition FMF.h:90

llvm::FastMathFlags::noSignedZeros
bool noSignedZeros() const
Definition FMF.h:67

llvm::FastMathFlags::noInfs
bool noInfs() const
Definition FMF.h:66

llvm::FastMathFlags::setAllowReciprocal
void setAllowReciprocal(bool B=true)
Definition FMF.h:87

llvm::FastMathFlags::allowReciprocal
bool allowReciprocal() const
Definition FMF.h:68

llvm::FastMathFlags::setNoSignedZeros
void setNoSignedZeros(bool B=true)
Definition FMF.h:84

llvm::FastMathFlags::allowReassoc
bool allowReassoc() const
Flag queries.
Definition FMF.h:64

llvm::FastMathFlags::approxFunc
bool approxFunc() const
Definition FMF.h:70

llvm::FastMathFlags::setNoNaNs
void setNoNaNs(bool B=true)
Definition FMF.h:78

llvm::FastMathFlags::setAllowReassoc
void setAllowReassoc(bool B=true)
Flag setters.
Definition FMF.h:75

llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition FMF.h:65

llvm::FastMathFlags::setApproxFunc
void setApproxFunc(bool B=true)
Definition FMF.h:93

llvm::FastMathFlags::setNoInfs
void setNoInfs(bool B=true)
Definition FMF.h:81

llvm::FastMathFlags::allowContract
bool allowContract() const
Definition FMF.h:69

llvm::FunctionType
Class to represent function types.
Definition DerivedTypes.h:105

llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition DerivedTypes.h:137

llvm::Function
Definition Function.h:64

llvm::Function::willReturn
bool willReturn() const
Determine if the function will return.
Definition Function.h:661

llvm::Function::doesNotThrow
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:594

llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214

llvm::IRBuilderBase::FastMathFlagGuard
Definition IRBuilder.h:431

llvm::IRBuilderBase::InsertPointGuard
Definition IRBuilder.h:409

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114

llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2553

llvm::IRBuilderBase::getInt1Ty
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition IRBuilder.h:547

llvm::IRBuilderBase::CreateInsertValue
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2607

llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2541

llvm::IRBuilderBase::CreateVectorSplice
LLVM_ABI Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition IRBuilder.cpp:1110

llvm::IRBuilderBase::CreateVectorSplat
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition IRBuilder.cpp:1141

llvm::IRBuilderBase::CreateExtractValue
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2600

llvm::IRBuilderBase::CreateSelect
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.cpp:1028

llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2619

llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562

llvm::IRBuilderBase::CreatePtrAdd
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2007

llvm::IRBuilderBase::setFastMathFlags
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition IRBuilder.h:345

llvm::IRBuilderBase::getInt64Ty
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition IRBuilder.h:567

llvm::IRBuilderBase::CreateVectorReverse
LLVM_ABI Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition IRBuilder.cpp:1094

llvm::IRBuilderBase::CreateICmpNE
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2304

llvm::IRBuilderBase::getInt64
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition IRBuilder.h:527

llvm::IRBuilderBase::CreateOrReduce
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition IRBuilder.cpp:381

llvm::IRBuilderBase::CreateLogicalAnd
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.h:1725

llvm::IRBuilderBase::CreateIntrinsic
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition IRBuilder.cpp:847

llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522

llvm::IRBuilderBase::CreateCmp
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2434

llvm::IRBuilderBase::CreateNot
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1808

llvm::IRBuilderBase::CreateICmpEQ
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2300

llvm::IRBuilderBase::CreateCountTrailingZeroElems
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Definition IRBuilder.h:1134

llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420

llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197

llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2053

llvm::IRBuilderBase::CreateIntMaxReduce
LLVM_ABI CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
Definition IRBuilder.cpp:389

llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403

llvm::IRBuilderBase::getFalse
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507

llvm::IRBuilderBase::CreateBinOp
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:1708

llvm::IRBuilderBase::CreateICmpUGE
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2312

llvm::IRBuilderBase::CreateIntMinReduce
LLVM_ABI CallInst * CreateIntMinReduce(Value *Src, bool IsSigned=false)
Create a vector integer min reduction intrinsic of the source vector.
Definition IRBuilder.cpp:395

llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2410

llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573

llvm::IRBuilderBase::CreateMul
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2762

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:74

llvm::InstructionCost::isValid
bool isValid() const
Definition InstructionCost.h:80

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::isCast
bool isCast() const
Definition Instruction.h:321

llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition Instruction.h:317

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:108

llvm::Instruction::getOpcodeName
const char * getOpcodeName() const
Definition Instruction.h:314

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition Instruction.h:312

llvm::Instruction::BinaryOps
BinaryOps
Definition Instruction.h:1004

llvm::Instruction::isUnaryOp
bool isUnaryOp() const
Definition Instruction.h:316

llvm::Instruction::CastOps
CastOps
Definition Instruction.h:1018

llvm::IntegerType::get
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318

llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition VectorUtils.h:524

llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition VectorUtils.h:540

llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition VectorUtils.h:594

llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition VectorUtils.h:539

llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition VectorUtils.h:610

llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition VectorUtils.cpp:1765

llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition VectorUtils.h:541

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:181

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::MemIntrinsicCostAttributes
Information for memory intrinsic cost model.
Definition TargetTransformInfo.h:128

llvm::Metadata
Root of the metadata hierarchy.
Definition Metadata.h:64

llvm::Metadata::print
LLVM_ABI void print(raw_ostream &OS, const Module *M=nullptr, bool IsForDebug=false) const
Print.
Definition AsmWriter.cpp:5415

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PHINode
Definition Instructions.h:2641

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition Instructions.h:2775

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition Instructions.h:2675

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:1905

llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition ScalarEvolution.h:2443

llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition ScalarEvolution.h:2490

llvm::RecurrenceDescriptor::getOpcode
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Definition IVDescriptors.cpp:1271

llvm::RecurrenceDescriptor::getOpcode
unsigned getOpcode() const
Definition IVDescriptors.h:233

llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition IVDescriptors.h:281

llvm::RecurrenceDescriptor::isFindIVRecurrenceKind
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition IVDescriptors.h:309

llvm::RecurrenceDescriptor::isMinMaxRecurrenceKind
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
Definition IVDescriptors.h:275

llvm::SCEV
This class represents an analyzed expression in the program.
Definition ScalarEvolution.h:72

llvm::SlotTracker
This class provides computation of slot numbers for LLVM Assembly writing.
Definition AsmWriter.cpp:787

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:389

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:527

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:676

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:946

llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition SmallVector.h:686

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:419

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:83

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1205

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::TargetTransformInfo::getCmpSelInstrCost
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
Definition TargetTransformInfo.cpp:1113

llvm::TargetTransformInfo::getCastInstrCost
LLVM_ABI InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition TargetTransformInfo.cpp:1084

llvm::TargetTransformInfo::getPartialReductionExtendKind
static LLVM_ABI PartialReductionExtendKind getPartialReductionExtendKind(Instruction *I)
Get the kind of extension that an instruction represents.
Definition TargetTransformInfo.cpp:1020

llvm::TargetTransformInfo::getOperandInfo
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition TargetTransformInfo.cpp:903

llvm::TargetTransformInfo::getArithmeticInstrCost
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition TargetTransformInfo.cpp:968

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition TargetTransformInfo.h:1242

llvm::TargetTransformInfo::PR_None
@ PR_None
Definition TargetTransformInfo.h:280

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition TargetTransformInfo.h:359

llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition TargetTransformInfo.h:1226

llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition TargetTransformInfo.h:1214

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition TargetTransformInfo.h:1518

llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
Definition TargetTransformInfo.h:1524

llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
Definition TargetTransformInfo.h:1521

llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
Definition TargetTransformInfo.h:1519

llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
Definition TargetTransformInfo.h:1520

llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
Definition TargetTransformInfo.h:1523

llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
Definition TargetTransformInfo.h:1522

llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition TargetTransformInfo.h:1235

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition TargetTransformInfo.h:1234

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Type::getInt64Ty
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273

llvm::Type::getInt32Ty
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267

llvm::Type::getVoidTy
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352

llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230

llvm::Type::getInt1Ty
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:293

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139

llvm::User::value_op_end
value_op_iterator value_op_end()
Definition User.h:288

llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition User.h:212

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::User::value_op_begin
value_op_iterator value_op_begin()
Definition User.h:285

llvm::VPActiveLaneMaskPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
Definition VPlanRecipes.cpp:4524

llvm::VPActiveLaneMaskPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4535

llvm::VPBasicBlock::getRecipeList
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4062

llvm::VPBasicBlock::end
iterator end()
Definition VPlan.h:4046

llvm::VPBasicBlock::insert
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4075

llvm::VPBlendRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Definition VPlanRecipes.cpp:2676

llvm::VPBlendRecipe::getIncomingValue
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2587

llvm::VPBlendRecipe::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2582

llvm::VPBlendRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2691

llvm::VPBlockBase
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81

llvm::VPBlockBase::getPredecessors
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204

llvm::VPBlockBase::getPlan
VPlan * getPlan()
Definition VPlan.cpp:173

llvm::VPBlockBase::printAsOperand
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349

llvm::VPBranchOnMaskRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
Definition VPlanRecipes.cpp:3535

llvm::VPBranchOnMaskRecipe::execute
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Definition VPlanRecipes.cpp:3518

llvm::VPBuilder
VPlan-based builder utility analogous to IRBuilder.
Definition LoopVectorizationPlanner.h:54

llvm::VPCanonicalIVPHIRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4344

llvm::VPDef
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:356

llvm::VPDef::dump
LLVM_ABI_FOR_TEST void dump() const
Dump the VPDef to stderr (for debugging).
Definition VPlan.cpp:110

llvm::VPDef::getNumDefinedValues
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition VPlanValue.h:477

llvm::VPDef::getVPSingleValue
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition VPlanValue.h:450

llvm::VPDef::getVPValue
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition VPlanValue.h:462

llvm::VPDef::definedValues
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition VPlanValue.h:472

llvm::VPDef::getVPDefID
unsigned getVPDefID() const
Definition VPlanValue.h:482

llvm::VPDerivedIVRecipe::getStartValue
VPIRValue * getStartValue() const
Definition VPlan.h:3810

llvm::VPDerivedIVRecipe::getStepValue
VPValue * getStepValue() const
Definition VPlan.h:3811

llvm::VPDerivedIVRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2439

llvm::VPEVLBasedIVPHIRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4546

llvm::VPExpandSCEVRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4379

llvm::VPExpressionRecipe::decompose
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
Definition VPlanRecipes.cpp:2908

llvm::VPExpressionRecipe::isSingleScalar
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
Definition VPlanRecipes.cpp:2995

llvm::VPExpressionRecipe::mayHaveSideEffects
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
Definition VPlanRecipes.cpp:2987

llvm::VPExpressionRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
Definition VPlanRecipes.cpp:2922

llvm::VPExpressionRecipe::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
Definition VPlanRecipes.cpp:2981

llvm::VPExpressionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3004

llvm::VPHeaderPHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
Definition VPlanRecipes.cpp:2409

llvm::VPHeaderPHIRecipe::getStartValue
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2107

llvm::VPHistogramRecipe::execute
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
Definition VPlanRecipes.cpp:1985

llvm::VPHistogramRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
Definition VPlanRecipes.cpp:2014

llvm::VPHistogramRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2048

llvm::VPHistogramRecipe::getMask
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1858

llvm::VPIRFlags
Class to record and manage LLVM IR flags.
Definition VPlan.h:608

llvm::VPIRFlags::FMFs
FastMathFlagsTy FMFs
Definition VPlan.h:695

llvm::VPIRFlags::ReductionFlags
ReductionFlagsTy ReductionFlags
Definition VPlan.h:697

llvm::VPIRFlags::flagsValidForOpcode
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
Definition VPlanRecipes.cpp:2079

llvm::VPIRFlags::WrapFlags
WrapFlagsTy WrapFlags
Definition VPlan.h:689

llvm::VPIRFlags::CmpPredicate
CmpInst::Predicate CmpPredicate
Definition VPlan.h:688

llvm::VPIRFlags::printFlags
void printFlags(raw_ostream &O) const
Definition VPlanRecipes.cpp:2121

llvm::VPIRFlags::GEPFlags
GEPNoWrapFlags GEPFlags
Definition VPlan.h:693

llvm::VPIRFlags::hasFastMathFlags
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:882

llvm::VPIRFlags::getFastMathFlags
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
Definition VPlanRecipes.cpp:364

llvm::VPIRFlags::isReductionOrdered
bool isReductionOrdered() const
Definition VPlan.h:932

llvm::VPIRFlags::TruncFlags
TruncFlagsTy TruncFlags
Definition VPlan.h:690

llvm::VPIRFlags::getPredicate
CmpInst::Predicate getPredicate() const
Definition VPlan.h:859

llvm::VPIRFlags::ExactFlags
ExactFlagsTy ExactFlags
Definition VPlan.h:692

llvm::VPIRFlags::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition VPlan.h:909

llvm::VPIRFlags::intersectFlags
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
Definition VPlanRecipes.cpp:314

llvm::VPIRFlags::VPIRFlags
VPIRFlags()
Definition VPlan.h:702

llvm::VPIRFlags::getGEPNoWrapFlags
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:874

llvm::VPIRFlags::hasPredicate
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:877

llvm::VPIRFlags::DisjointFlags
DisjointFlagsTy DisjointFlags
Definition VPlan.h:691

llvm::VPIRFlags::AllFlags
unsigned AllFlags
Definition VPlan.h:698

llvm::VPIRFlags::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition VPlan.h:898

llvm::VPIRFlags::FCmpFlags
FCmpFlagsTy FCmpFlags
Definition VPlan.h:696

llvm::VPIRFlags::NonNegFlags
NonNegFlagsTy NonNegFlags
Definition VPlan.h:694

llvm::VPIRFlags::isReductionInLoop
bool isReductionInLoop() const
Definition VPlan.h:938

llvm::VPIRFlags::applyFlags
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:816

llvm::VPIRFlags::getRecurKind
RecurKind getRecurKind() const
Definition VPlan.h:926

llvm::VPIRInstruction::getInstruction
Instruction & getInstruction() const
Definition VPlan.h:1512

llvm::VPIRInstruction::extractLastLaneOfLastPartOfFirstOperand
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
Definition VPlanRecipes.cpp:1655

llvm::VPIRInstruction::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:1640

llvm::VPIRInstruction::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
Definition VPlanRecipes.cpp:1648

llvm::VPIRInstruction::VPIRInstruction
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1487

llvm::VPIRInstruction::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1670

llvm::VPIRMetadata::intersect
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
Definition VPlanRecipes.cpp:1748

llvm::VPIRMetadata::VPIRMetadata
VPIRMetadata()=default

llvm::VPIRMetadata::print
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
Definition VPlanRecipes.cpp:1762

llvm::VPIRMetadata::applyMetadata
void applyMetadata(Instruction &I) const
Add all metadata to I.
Definition VPlanRecipes.cpp:1743

llvm::VPInstructionWithType::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1580

llvm::VPInstructionWithType::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition VPlanRecipes.cpp:1552

llvm::VPInstruction::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlanRecipes.cpp:1118

llvm::VPInstruction::getNumOperandsForOpcode
static unsigned getNumOperandsForOpcode(unsigned Opcode)
Return the number of operands determined by the opcode of the VPInstruction.
Definition VPlanRecipes.cpp:430

llvm::VPInstruction::doesGeneratePerAllLanes
bool doesGeneratePerAllLanes() const
Returns true if this VPInstruction generates scalar values for all lanes.
Definition VPlanRecipes.cpp:497

llvm::VPInstruction::BranchOnCond
@ BranchOnCond
Definition VPlan.h:1110

llvm::VPInstruction::ExtractLastActive
@ ExtractLastActive
Extracts the lane from the first operand corresponding to the last active (non-zero) lane in the mask...
Definition VPlan.h:1195

llvm::VPInstruction::PtrAdd
@ PtrAdd
Definition VPlan.h:1148

llvm::VPInstruction::Reverse
@ Reverse
Definition VPlan.h:1172

llvm::VPInstruction::ExtractLane
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1188

llvm::VPInstruction::Broadcast
@ Broadcast
Definition VPlan.h:1118

llvm::VPInstruction::LastActiveLane
@ LastActiveLane
Definition VPlan.h:1170

llvm::VPInstruction::ComputeAnyOfResult
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1133

llvm::VPInstruction::BranchOnCount
@ BranchOnCount
Definition VPlan.h:1109

llvm::VPInstruction::BranchOnTwoConds
@ BranchOnTwoConds
Definition VPlan.h:1117

llvm::VPInstruction::WideIVStep
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1178

llvm::VPInstruction::ExtractLastPart
@ ExtractLastPart
Definition VPlan.h:1137

llvm::VPInstruction::ExtractPenultimateElement
@ ExtractPenultimateElement
Definition VPlan.h:1143

llvm::VPInstruction::ResumeForEpilogue
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1191

llvm::VPInstruction::Unpack
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1130

llvm::VPInstruction::ActiveLaneMask
@ ActiveLaneMask
Definition VPlan.h:1102

llvm::VPInstruction::FirstActiveLane
@ FirstActiveLane
Definition VPlan.h:1163

llvm::VPInstruction::FirstOrderRecurrenceSplice
@ FirstOrderRecurrenceSplice
Definition VPlan.h:1092

llvm::VPInstruction::ExplicitVectorLength
@ ExplicitVectorLength
Definition VPlan.h:1103

llvm::VPInstruction::ReductionStartVector
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1182

llvm::VPInstruction::SLPStore
@ SLPStore
Definition VPlan.h:1097

llvm::VPInstruction::BuildVector
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1125

llvm::VPInstruction::WidePtrAdd
@ WidePtrAdd
Definition VPlan.h:1151

llvm::VPInstruction::LogicalAnd
@ LogicalAnd
Definition VPlan.h:1144

llvm::VPInstruction::BuildStructVector
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1122

llvm::VPInstruction::VScale
@ VScale
Returns the value for vscale.
Definition VPlan.h:1198

llvm::VPInstruction::CanonicalIVIncrementForPart
@ CanonicalIVIncrementForPart
Definition VPlan.h:1106

llvm::VPInstruction::SLPLoad
@ SLPLoad
Definition VPlan.h:1096

llvm::VPInstruction::ComputeReductionResult
@ ComputeReductionResult
Definition VPlan.h:1135

llvm::VPInstruction::Not
@ Not
Definition VPlan.h:1095

llvm::VPInstruction::CalculateTripCountMinusVF
@ CalculateTripCountMinusVF
Definition VPlan.h:1104

llvm::VPInstruction::StepVector
@ StepVector
Definition VPlan.h:1184

llvm::VPInstruction::ExtractLastLane
@ ExtractLastLane
Definition VPlan.h:1139

llvm::VPInstruction::AnyOf
@ AnyOf
Definition VPlan.h:1157

llvm::VPInstruction::ComputeFindIVResult
@ ComputeFindIVResult
Definition VPlan.h:1134

llvm::VPInstruction::hasResult
bool hasResult() const
Definition VPlan.h:1262

llvm::VPInstruction::opcodeMayReadOrWriteFromMemory
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
Definition VPlanRecipes.cpp:1314

llvm::VPInstruction::dump
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
Definition VPlanRecipes.cpp:1432

llvm::VPInstruction::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
Definition VPlanRecipes.cpp:1437

llvm::VPInstruction::getName
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1303

llvm::VPInstruction::getOpcode
unsigned getOpcode() const
Definition VPlan.h:1246

llvm::VPInstruction::VPInstruction
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlanRecipes.cpp:417

llvm::VPInstruction::usesFirstLaneOnly
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:1360

llvm::VPInstruction::isVectorToScalar
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
Definition VPlanRecipes.cpp:1265

llvm::VPInstruction::isSingleScalar
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
Definition VPlanRecipes.cpp:1279

llvm::VPInstruction::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition VPlanRecipes.cpp:1291

llvm::VPInstruction::usesFirstPartOnly
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlanRecipes.cpp:1410

llvm::VPInterleaveBase::needsMaskForGaps
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2699

llvm::VPInterleaveBase::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this recipe.
Definition VPlanRecipes.cpp:4302

llvm::VPInterleaveBase::getInsertPos
Instruction * getInsertPos() const
Definition VPlan.h:2703

llvm::VPInterleaveBase::getInterleaveGroup
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2701

llvm::VPInterleaveBase::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2693

llvm::VPInterleaveBase::getStoredValues
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2722

llvm::VPInterleaveBase::getAddr
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2687

llvm::VPInterleaveEVLRecipe::getEVL
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2797

llvm::VPInterleaveEVLRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4270

llvm::VPInterleaveEVLRecipe::getNumStoreOperands
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2810

llvm::VPInterleaveEVLRecipe::execute
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
Definition VPlanRecipes.cpp:4158

llvm::VPInterleaveRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4127

llvm::VPInterleaveRecipe::getNumStoreOperands
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2760

llvm::VPInterleaveRecipe::execute
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
Definition VPlanRecipes.cpp:3957

llvm::VPLane
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition VPlanHelpers.h:110

llvm::VPLane::getLastLaneForVF
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition VPlanHelpers.h:151

llvm::VPLane::getLaneFromEnd
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition VPlanHelpers.h:137

llvm::VPLane::getFirstLane
static VPLane getFirstLane()
Definition VPlanHelpers.h:135

llvm::VPPhiAccessors::getAsRecipe
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.

llvm::VPPhiAccessors::getNumIncoming
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1402

llvm::VPPhiAccessors::removeIncomingValueFor
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
Definition VPlanRecipes.cpp:1703

llvm::VPPhiAccessors::getIncomingBlock
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4153

llvm::VPPhiAccessors::incoming_values_and_blocks
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1427

llvm::VPPhiAccessors::getIncomingValue
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1394

llvm::VPPhiAccessors::printPhiOperands
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
Definition VPlanRecipes.cpp:1712

llvm::VPPredInstPHIRecipe::execute
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
Definition VPlanRecipes.cpp:3543

llvm::VPPredInstPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3603

llvm::VPRecipeBase
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387

llvm::VPRecipeBase::mayReadFromMemory
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
Definition VPlanRecipes.cpp:109

llvm::VPRecipeBase::mayHaveSideEffects
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Definition VPlanRecipes.cpp:159

llvm::VPRecipeBase::printRecipe
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...

llvm::VPRecipeBase::getRegion
VPRegionBlock * getRegion()
Definition VPlan.h:4314

llvm::VPRecipeBase::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe, delegating to printRecipe().
Definition VPlanRecipes.cpp:383

llvm::VPRecipeBase::isPhi
bool isPhi() const
Returns true for PHI-like recipes.
Definition VPlanRecipes.cpp:304

llvm::VPRecipeBase::mayWriteToMemory
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
Definition VPlanRecipes.cpp:50

llvm::VPRecipeBase::computeCost
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
Definition VPlanRecipes.cpp:299

llvm::VPRecipeBase::getParent
VPBasicBlock * getParent()
Definition VPlan.h:408

llvm::VPRecipeBase::getDebugLoc
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479

llvm::VPRecipeBase::moveBefore
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
Definition VPlanRecipes.cpp:259

llvm::VPRecipeBase::insertBefore
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
Definition VPlanRecipes.cpp:222

llvm::VPRecipeBase::insertAfter
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
Definition VPlanRecipes.cpp:236

llvm::VPRecipeBase::eraseFromParent
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition VPlanRecipes.cpp:249

llvm::VPRecipeBase::cost
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
Definition VPlanRecipes.cpp:265

llvm::VPRecipeBase::isScalarCast
bool isScalarCast() const
Return true if the recipe is a scalar cast.
Definition VPlanRecipes.cpp:309

llvm::VPRecipeBase::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition VPlanRecipes.cpp:243

llvm::VPRecipeBase::moveAfter
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Definition VPlanRecipes.cpp:254

llvm::VPRecipeBase::VPRecipeBase
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398

llvm::VPRecipeValue::VPValue
friend class VPValue
Definition VPlanValue.h:236

llvm::VPReductionEVLRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition VPlanRecipes.cpp:2771

llvm::VPReductionEVLRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3115

llvm::VPReductionEVLRecipe::getEVL
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2960

llvm::VPReductionPHIRecipe::getVFScaleFactor
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2504

llvm::VPReductionPHIRecipe::isInLoop
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2528

llvm::VPReductionPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4489

llvm::VPReductionPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition VPlanRecipes.cpp:4463

llvm::VPReductionRecipe::isConditional
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2902

llvm::VPReductionRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
Definition VPlanRecipes.cpp:2805

llvm::VPReductionRecipe::getVecOp
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2913

llvm::VPReductionRecipe::getCondOp
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2915

llvm::VPReductionRecipe::getRecurrenceKind
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2898

llvm::VPReductionRecipe::isPartialReduction
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2904

llvm::VPReductionRecipe::getChainOp
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2911

llvm::VPReductionRecipe::isInLoop
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2906

llvm::VPReductionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3092

llvm::VPReductionRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition VPlanRecipes.cpp:2714

llvm::VPRegionBlock
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4197

llvm::VPRegionBlock::isReplicator
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4265

llvm::VPReplicateRecipe
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2982

llvm::VPReplicateRecipe::execute
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
Definition VPlanRecipes.cpp:3200

llvm::VPReplicateRecipe::isSingleScalar
bool isSingleScalar() const
Definition VPlan.h:3023

llvm::VPReplicateRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
Definition VPlanRecipes.cpp:3300

llvm::VPReplicateRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3490

llvm::VPReplicateRecipe::getOpcode
unsigned getOpcode() const
Definition VPlan.h:3052

llvm::VPReplicateRecipe::shouldPack
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
Definition VPlanRecipes.cpp:3225

llvm::VPScalarIVStepsRecipe::getStepValue
VPValue * getStepValue() const
Definition VPlan.h:3877

llvm::VPScalarIVStepsRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2531

llvm::VPScalarIVStepsRecipe::execute
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Definition VPlanRecipes.cpp:2452

llvm::VPSingleDefRecipe
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531

llvm::VPSingleDefRecipe::getUnderlyingInstr
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:594

llvm::VPSingleDefRecipe::dump
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
Definition VPlanRecipes.cpp:381

llvm::VPSingleDefRecipe::VPSingleDefRecipe
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533

llvm::VPSlotTracker
This class can be used to assign names to VPValues.
Definition VPlanHelpers.h:383

llvm::VPTypeAnalysis::inferScalarType
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
Definition VPlanAnalysis.cpp:256

llvm::VPUnrollPartAccessor
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1020

llvm::VPUnrollPartAccessor::getUnrollPartOperand
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
Definition VPlanRecipes.cpp:398

llvm::VPUnrollPartAccessor::getUnrollPart
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
Definition VPlanRecipes.cpp:405

llvm::VPUser
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:253

llvm::VPUser::printOperands
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1428

llvm::VPUser::operands
operand_range operands()
Definition VPlanValue.h:321

llvm::VPUser::setOperand
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:297

llvm::VPUser::getNumOperands
unsigned getNumOperands() const
Definition VPlanValue.h:291

llvm::VPUser::op_begin
operand_iterator op_begin()
Definition VPlanValue.h:317

llvm::VPUser::getOperand
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:292

llvm::VPUser::usesFirstLaneOnly
virtual bool usesFirstLaneOnly(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlanValue.h:336

llvm::VPValue
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:47

llvm::VPValue::getLiveInIRValue
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:133

llvm::VPValue::isDefinedOutsideLoopRegions
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1382

llvm::VPValue::getDefiningRecipe
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:119

llvm::VPValue::printAsOperand
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1424

llvm::VPValue::getUnderlyingValue
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74

llvm::VPValue::replaceAllUsesWith
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1385

llvm::VPVectorEndPointerRecipe::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:2615

llvm::VPVectorEndPointerRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2644

llvm::VPVectorEndPointerRecipe::getVFValue
VPValue * getVFValue()
Definition VPlan.h:1952

llvm::VPVectorPointerRecipe::getSourceElementType
Type * getSourceElementType() const
Definition VPlan.h:2012

llvm::VPVectorPointerRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2666

llvm::VPVectorPointerRecipe::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:2654

llvm::VPWidenCallRecipe::args
operand_range args()
Definition VPlan.h:1814

llvm::VPWidenCallRecipe::getCalledScalarFunction
Function * getCalledScalarFunction() const
Definition VPlan.h:1810

llvm::VPWidenCallRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
Definition VPlanRecipes.cpp:1813

llvm::VPWidenCallRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
Definition VPlanRecipes.cpp:1780

llvm::VPWidenCallRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1821

llvm::VPWidenCanonicalIVRecipe::execute
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
Definition VPlanRecipes.cpp:4387

llvm::VPWidenCanonicalIVRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4406

llvm::VPWidenCastRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2398

llvm::VPWidenCastRecipe::getResultType
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1664

llvm::VPWidenCastRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
Definition VPlanRecipes.cpp:2372

llvm::VPWidenCastRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
Definition VPlanRecipes.cpp:2387

llvm::VPWidenGEPRecipe::execute
void execute(VPTransformState &State) override
Generate the gep nodes.
Definition VPlanRecipes.cpp:2545

llvm::VPWidenGEPRecipe::getSourceElementType
Type * getSourceElementType() const
Definition VPlan.h:1910

llvm::VPWidenGEPRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2600

llvm::VPWidenGEPRecipe::usesFirstLaneOnly
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:2540

llvm::VPWidenInductionRecipe::getStartValue
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2170

llvm::VPWidenInductionRecipe::getStepValue
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2173

llvm::VPWidenIntOrFpInductionRecipe::getStartValue
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2268

llvm::VPWidenIntOrFpInductionRecipe::getTruncInst
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2283

llvm::VPWidenIntOrFpInductionRecipe::getScalarType
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2292

llvm::VPWidenIntOrFpInductionRecipe::isCanonical
bool isCanonical() const
Returns true if the induction is canonical, i.e.
Definition VPlanRecipes.cpp:2428

llvm::VPWidenIntOrFpInductionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2415

llvm::VPWidenIntrinsicRecipe::getVectorIntrinsicID
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1746

llvm::VPWidenIntrinsicRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1964

llvm::VPWidenIntrinsicRecipe::getIntrinsicName
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
Definition VPlanRecipes.cpp:1950

llvm::VPWidenIntrinsicRecipe::usesFirstLaneOnly
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:1954

llvm::VPWidenIntrinsicRecipe::getResultType
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1749

llvm::VPWidenIntrinsicRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
Definition VPlanRecipes.cpp:1848

llvm::VPWidenIntrinsicRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
Definition VPlanRecipes.cpp:1944

llvm::VPWidenMemoryRecipe::IsMasked
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3307

llvm::VPWidenMemoryRecipe::Reverse
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3304

llvm::VPWidenMemoryRecipe::isConsecutive
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3347

llvm::VPWidenMemoryRecipe::Ingredient
Instruction & Ingredient
Definition VPlan.h:3295

llvm::VPWidenMemoryRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Definition VPlanRecipes.cpp:3612

llvm::VPWidenMemoryRecipe::Consecutive
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3301

llvm::VPWidenMemoryRecipe::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3361

llvm::VPWidenMemoryRecipe::Alignment
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3298

llvm::VPWidenMemoryRecipe::getAddr
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3354

llvm::VPWidenMemoryRecipe::isReverse
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3351

llvm::VPWidenPHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
Definition VPlanRecipes.cpp:4508

llvm::VPWidenPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4514

llvm::VPWidenPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition VPlanRecipes.cpp:4501

llvm::VPWidenPointerInductionRecipe::onlyScalarsGenerated
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
Definition VPlanRecipes.cpp:4353

llvm::VPWidenPointerInductionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4359

llvm::VPWidenRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
Definition VPlanRecipes.cpp:2324

llvm::VPWidenRecipe::execute
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
Definition VPlanRecipes.cpp:2218

llvm::VPWidenRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2362

llvm::VPlan::getUF
unsigned getUF() const
Definition VPlan.h:4544

llvm::VPlan::getVectorLoopRegion
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1022

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::setName
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397

llvm::Value::getContext
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106

llvm::Value::mutateType
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:838

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:430

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition DerivedTypes.h:697

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::VectorType::getElementType
Type * getElementType() const
Definition DerivedTypes.h:463

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168

llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

llvm::iplist_impl< simple_ilist< T, Options... >, ilist_traits< T > >::iterator
typename base_list_type::iterator iterator
Definition ilist.h:121

llvm::iplist_impl::erase
iterator erase(iterator where)
Definition ilist.h:204

llvm::iplist_impl::remove
pointer remove(iterator &IT)
Definition ilist.h:188

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::GVNExpression::ExpressionType
ExpressionType
Definition GVNExpression.h:42

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::Intrinsic::getOrInsertDeclaration
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition Intrinsics.cpp:755

llvm::Intrinsic::getDeinterleaveIntrinsicID
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor)
Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
Definition Intrinsics.cpp:1186

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::Intrinsic::getBaseName
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition Intrinsics.cpp:45

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:49

llvm::PatternMatch::m_LogicalOr
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Definition PatternMatch.h:3297

llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition PatternMatch.h:112

llvm::PatternMatch::m_LogicalAnd
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
Definition PatternMatch.h:3279

llvm::RISCVExceptFlags::UF
@ UF
Definition RISCVBaseInfo.h:586

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:475

llvm::SIEncodingFamily::SI
@ SI
Definition SIDefines.h:36

llvm::VPlanPatternMatch
Definition VPlanPatternMatch.h:20

llvm::VPlanPatternMatch::m_VPValue
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
Definition VPlanPatternMatch.h:51

llvm::VPlanPatternMatch::m_Reverse
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
Definition VPlanPatternMatch.h:491

llvm::codeview::CompileSym2Flags::EC
@ EC
Definition CodeView.h:432

llvm::logicalview::LVAttributeKind::Zero
@ Zero
Definition LVOptions.h:130

llvm::memprof::Meta::Start
@ Start
Definition MemProf.h:69

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384

llvm::vputils::isSingleScalar
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
Definition VPlanUtils.cpp:344

llvm::vputils::isAddressSCEVForCost
bool isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, const Loop *L)
Returns true if Addr is an address SCEV that can be passed to TTI::getAddressComputationCost,...
Definition VPlanUtils.cpp:308

llvm::vputils::onlyFirstPartUsed
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition VPlanUtils.cpp:28

llvm::vputils::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition VPlanUtils.cpp:23

llvm::vputils::onlyScalarValuesUsed
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
Definition VPlanUtils.cpp:33

llvm::vputils::getSCEVExprForVPValue
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
Definition VPlanUtils.cpp:140

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316

llvm::createSimpleReduction
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition LoopUtils.cpp:1442

llvm::Offset
@ Offset
Definition DWP.cpp:532

llvm::zip
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737

llvm::getMinMaxReductionIntrinsicOp
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition LoopUtils.cpp:1168

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition Instructions.h:5103

llvm::getRuntimeVF
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
Definition LoopVectorize.cpp:786

llvm::dyn_cast_if_present
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198

llvm::interleaveComma
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2303

llvm::cast_or_null
auto cast_or_null(const Y &Val)
Definition Casting.h:714

llvm::concatenateVectors
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition VectorUtils.cpp:1227

llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Definition Instructions.h:5129

llvm::isa_and_nonnull
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676

llvm::createMinMaxOp
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition LoopUtils.cpp:1261

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition RuntimeDyld.cpp:172

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744

llvm::createBitMaskForGaps
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition VectorUtils.cpp:1124

llvm::createStrideMask
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition VectorUtils.cpp:1164

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406

llvm::createReplicatedMask
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition VectorUtils.cpp:1144

llvm::ComplexDeinterleavingOperation::Splat
@ Splat
Definition ComplexDeinterleavingPass.h:42

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751

llvm::to_vector
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition SmallVector.h:1307

llvm::toVectorizedTy
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
Definition VectorTypeUtils.h:55

llvm::CaptureComponents::Address
@ Address
Definition ModRef.h:313

llvm::ForceTargetInstructionCost
cl::opt< unsigned > ForceTargetInstructionCost

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::drop_end
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323

llvm::isVectorIntrinsicWithStructReturnOverloadAtField
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
Definition VectorUtils.cpp:215

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::canVectorizeTy
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
Definition VectorTypeUtils.h:85

llvm::Data
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189

llvm::createInterleaveMask
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition VectorUtils.cpp:1153

llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition IVDescriptors.h:34

llvm::RecurKind::UMin
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
Definition IVDescriptors.h:46

llvm::RecurKind::FMinimumNum
@ FMinimumNum
FP min with llvm.minimumnum semantics.
Definition IVDescriptors.h:56

llvm::RecurKind::FMinimum
@ FMinimum
FP min with llvm.minimum semantics.
Definition IVDescriptors.h:54

llvm::RecurKind::FMaxNum
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
Definition IVDescriptors.h:53

llvm::RecurKind::Mul
@ Mul
Product of integers.
Definition IVDescriptors.h:40

llvm::RecurKind::FMaximum
@ FMaximum
FP max with llvm.maximum semantics.
Definition IVDescriptors.h:55

llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).
Definition IVDescriptors.h:45

llvm::RecurKind::SMin
@ SMin
Signed integer min implemented in terms of select(cmp()).
Definition IVDescriptors.h:44

llvm::RecurKind::FMinNum
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
Definition IVDescriptors.h:52

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:37

llvm::RecurKind::FMaximumNum
@ FMaximumNum
FP max with llvm.maximumnum semantics.
Definition IVDescriptors.h:57

llvm::RecurKind::UMax
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
Definition IVDescriptors.h:47

llvm::isVectorIntrinsicWithScalarOpAtArg
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition VectorUtils.cpp:143

llvm::getRecurrenceIdentity
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition LoopUtils.cpp:1434

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::createStepForVF
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
Definition LoopVectorize.cpp:772

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945

llvm::getLoadStoreType
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
Definition Instructions.h:5158

llvm::createOrderedReduction
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
Definition LoopUtils.cpp:1495

llvm::getContainedTypes
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
Definition VectorTypeUtils.h:93

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::toVectorTy
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition VectorTypeUtils.h:20

llvm::InstructionUniformity::Default
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20

llvm::PseudoProbeAttributes::Sentinel
@ Sentinel
Definition PseudoProbe.h:34

llvm::createAnyOfReduction
LLVM_ABI Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, Value *InitVal, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of kind RecurKind::AnyOf.
Definition LoopUtils.cpp:1362

llvm::isVectorIntrinsicWithOverloadTypeAtArg
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Definition VectorUtils.cpp:178

raw_ostream.h

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1250

llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition TargetTransformInfo.h:1251

llvm::VPCostContext
Struct to hold various analysis needed for cost computations.
Definition VPlanHelpers.h:329

llvm::VPCostContext::getOperandInfo
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1737

llvm::VPCostContext::CostKind
TargetTransformInfo::TargetCostKind CostKind
Definition VPlanHelpers.h:336

llvm::VPCostContext::Types
VPTypeAnalysis Types
Definition VPlanHelpers.h:332

llvm::VPCostContext::TTI
const TargetTransformInfo & TTI
Definition VPlanHelpers.h:330

llvm::VPFirstOrderRecurrencePHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlanRecipes.cpp:4415

llvm::VPFirstOrderRecurrencePHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
Definition VPlanRecipes.cpp:4445

llvm::VPFirstOrderRecurrencePHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4454

llvm::VPIRPhi
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1550

llvm::VPIRPhi::getIRPhi
PHINode & getIRPhi()
Definition VPlan.h:1558

llvm::VPIRPhi::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1726

llvm::VPIRPhi::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:1676

llvm::VPPhi::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition VPlanRecipes.cpp:1606

llvm::VPPhi::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1625

llvm::VPRecipeWithIRFlags
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:974

llvm::VPRecipeWithIRFlags::getCostForRecipeWithOpcode
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
Definition VPlanRecipes.cpp:928

llvm::VPRecipeWithIRFlags::VPRecipeWithIRFlags
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:975

llvm::VPSymbolicValue
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:226

llvm::VPTransformState::CFGState::VPBB2IRBB
SmallDenseMap< const VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition VPlanHelpers.h:296

llvm::VPTransformState
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition VPlanHelpers.h:190

llvm::VPTransformState::TypeAnalysis
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition VPlanHelpers.h:322

llvm::VPTransformState::CFG
struct llvm::VPTransformState::CFGState CFG

llvm::VPTransformState::get
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:275

llvm::VPTransformState::Builder
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition VPlanHelpers.h:313

llvm::VPTransformState::VF
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition VPlanHelpers.h:199

llvm::VPWidenLoadEVLRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
Definition VPlanRecipes.cpp:3716

llvm::VPWidenLoadEVLRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3768

llvm::VPWidenLoadEVLRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
Definition VPlanRecipes.cpp:3749

llvm::VPWidenLoadEVLRecipe::getEVL
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3439

llvm::VPWidenLoadRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3696

llvm::VPWidenLoadRecipe::execute
void execute(VPTransformState &State) override
Generate a wide load or gather.
Definition VPlanRecipes.cpp:3664

llvm::VPWidenStoreEVLRecipe::getStoredValue
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3522

llvm::VPWidenStoreEVLRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
Definition VPlanRecipes.cpp:3812

llvm::VPWidenStoreEVLRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3863

llvm::VPWidenStoreEVLRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
Definition VPlanRecipes.cpp:3844

llvm::VPWidenStoreEVLRecipe::getEVL
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3525

llvm::VPWidenStoreRecipe::execute
void execute(VPTransformState &State) override
Generate a wide store or scatter.
Definition VPlanRecipes.cpp:3777

llvm::VPWidenStoreRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3805

llvm::VPWidenStoreRecipe::getStoredValue
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3485