docs/doxygen/VPlanTransforms_8cpp_source.html

//===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file implements a set of utility VPlan to VPlan transformations.

///

//===----------------------------------------------------------------------===//


#include "VPlanTransforms.h"

#include "VPRecipeBuilder.h"

#include "VPlan.h"

#include "VPlanAnalysis.h"

#include "VPlanCFG.h"

#include "VPlanDominatorTree.h"

#include "VPlanPatternMatch.h"

#include "VPlanUtils.h"

#include "llvm/ADT/PostOrderIterator.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/TypeSwitch.h"

#include "llvm/Analysis/IVDescriptors.h"

#include "llvm/Analysis/VectorUtils.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/PatternMatch.h"


using namespace llvm;


void VPlanTransforms::VPInstructionsToVPRecipes(

    VPlanPtr &Plan,

    function_ref<const InductionDescriptor *(PHINode *)>

        GetIntOrFpInductionDescriptor,

    ScalarEvolution &SE, const TargetLibraryInfo &TLI) {


  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(

      Plan->getVectorLoopRegion());

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {

    // Skip blocks outside region

    if (!VPBB->getParent())

      break;

    VPRecipeBase *Term = VPBB->getTerminator();

    auto EndIter = Term ? Term->getIterator() : VPBB->end();

    // Introduce each ingredient into VPlan.

    for (VPRecipeBase &Ingredient :

         make_early_inc_range(make_range(VPBB->begin(), EndIter))) {


      VPValue *VPV = Ingredient.getVPSingleValue();

      Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());


      VPRecipeBase *NewRecipe = nullptr;

      if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {

        auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());

        const auto *II = GetIntOrFpInductionDescriptor(Phi);

        if (!II)

          continue;


        VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());

        VPValue *Step =

            vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);

        NewRecipe = new VPWidenIntOrFpInductionRecipe(

            Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc());

      } else {

        assert(isa<VPInstruction>(&Ingredient) &&

               "only VPInstructions expected here");

        assert(!isa<PHINode>(Inst) && "phis should be handled above");

        // Create VPWidenMemoryRecipe for loads and stores.

        if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {

          NewRecipe = new VPWidenLoadRecipe(

              *Load, Ingredient.getOperand(0), nullptr /*Mask*/,

              false /*Consecutive*/, false /*Reverse*/,

              Ingredient.getDebugLoc());

        } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {

          NewRecipe = new VPWidenStoreRecipe(

              *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),

              nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,

              Ingredient.getDebugLoc());

        } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {

          NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());

        } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {

          NewRecipe = new VPWidenIntrinsicRecipe(

              *CI, getVectorIntrinsicIDForCall(CI, &TLI),

              {Ingredient.op_begin(), Ingredient.op_end() - 1}, CI->getType(),

              CI->getDebugLoc());

        } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {

          NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());

        } else if (auto *CI = dyn_cast<CastInst>(Inst)) {

          NewRecipe = new VPWidenCastRecipe(

              CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);

        } else {

          NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());

        }

      }


      NewRecipe->insertBefore(&Ingredient);

      if (NewRecipe->getNumDefinedValues() == 1)

        VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());

      else

        assert(NewRecipe->getNumDefinedValues() == 0 &&

               "Only recpies with zero or one defined values expected");

      Ingredient.eraseFromParent();

    }

  }

}


static bool sinkScalarOperands(VPlan &Plan) {

  auto Iter = vp_depth_first_deep(Plan.getEntry());

  bool Changed = false;

  // First, collect the operands of all recipes in replicate blocks as seeds for

  // sinking.

  SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;

  for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {

    VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();

    if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)

      continue;

    VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);

    if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())

      continue;

    for (auto &Recipe : *VPBB) {

      for (VPValue *Op : Recipe.operands())

        if (auto *Def =

                dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))

          WorkList.insert(std::make_pair(VPBB, Def));

    }

  }


  bool ScalarVFOnly = Plan.hasScalarVFOnly();

  // Try to sink each replicate or scalar IV steps recipe in the worklist.

  for (unsigned I = 0; I != WorkList.size(); ++I) {

    VPBasicBlock *SinkTo;

    VPSingleDefRecipe *SinkCandidate;

    std::tie(SinkTo, SinkCandidate) = WorkList[I];

    if (SinkCandidate->getParent() == SinkTo ||

        SinkCandidate->mayHaveSideEffects() ||

        SinkCandidate->mayReadOrWriteMemory())

      continue;

    if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {

      if (!ScalarVFOnly && RepR->isUniform())

        continue;

    } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))

      continue;


    bool NeedsDuplicating = false;

    // All recipe users of the sink candidate must be in the same block SinkTo

    // or all users outside of SinkTo must be uniform-after-vectorization (

    // i.e., only first lane is used) . In the latter case, we need to duplicate

    // SinkCandidate.

    auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,

                            SinkCandidate](VPUser *U) {

      auto *UI = cast<VPRecipeBase>(U);

      if (UI->getParent() == SinkTo)

        return true;

      NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);

      // We only know how to duplicate VPRecipeRecipes for now.

      return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);

    };

    if (!all_of(SinkCandidate->users(), CanSinkWithUser))

      continue;


    if (NeedsDuplicating) {

      if (ScalarVFOnly)

        continue;

      Instruction *I = SinkCandidate->getUnderlyingInstr();

      auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);

      // TODO: add ".cloned" suffix to name of Clone's VPValue.


      Clone->insertBefore(SinkCandidate);

      SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {

        return cast<VPRecipeBase>(&U)->getParent() != SinkTo;

      });

    }

    SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());

    for (VPValue *Op : SinkCandidate->operands())

      if (auto *Def =

              dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))

        WorkList.insert(std::make_pair(SinkTo, Def));

    Changed = true;

  }

  return Changed;

}


/// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return

/// the mask.

VPValue *getPredicatedMask(VPRegionBlock *R) {

  auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());

  if (!EntryBB || EntryBB->size() != 1 ||

      !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))

    return nullptr;


  return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);

}


/// If \p R is a triangle region, return the 'then' block of the triangle.

static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {

  auto *EntryBB = cast<VPBasicBlock>(R->getEntry());

  if (EntryBB->getNumSuccessors() != 2)

    return nullptr;


  auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);

  auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);

  if (!Succ0 || !Succ1)

    return nullptr;


  if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)

    return nullptr;

  if (Succ0->getSingleSuccessor() == Succ1)

    return Succ0;

  if (Succ1->getSingleSuccessor() == Succ0)

    return Succ1;

  return nullptr;

}


// Merge replicate regions in their successor region, if a replicate region

// is connected to a successor replicate region with the same predicate by a

// single, empty VPBasicBlock.

static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {

  SmallPtrSet<VPRegionBlock *, 4> TransformedRegions;


  // Collect replicate regions followed by an empty block, followed by another

  // replicate region with matching masks to process front. This is to avoid

  // iterator invalidation issues while merging regions.

  SmallVector<VPRegionBlock *, 8> WorkList;

  for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(

           vp_depth_first_deep(Plan.getEntry()))) {

    if (!Region1->isReplicator())

      continue;

    auto *MiddleBasicBlock =

        dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());

    if (!MiddleBasicBlock || !MiddleBasicBlock->empty())

      continue;


    auto *Region2 =

        dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());

    if (!Region2 || !Region2->isReplicator())

      continue;


    VPValue *Mask1 = getPredicatedMask(Region1);

    VPValue *Mask2 = getPredicatedMask(Region2);

    if (!Mask1 || Mask1 != Mask2)

      continue;


    assert(Mask1 && Mask2 && "both region must have conditions");

    WorkList.push_back(Region1);

  }


  // Move recipes from Region1 to its successor region, if both are triangles.

  for (VPRegionBlock *Region1 : WorkList) {

    if (TransformedRegions.contains(Region1))

      continue;

    auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());

    auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());


    VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);

    VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);

    if (!Then1 || !Then2)

      continue;


    // Note: No fusion-preventing memory dependencies are expected in either

    // region. Such dependencies should be rejected during earlier dependence

    // checks, which guarantee accesses can be re-ordered for vectorization.

    //

    // Move recipes to the successor region.

    for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))

      ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());


    auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());

    auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());


    // Move VPPredInstPHIRecipes from the merge block to the successor region's

    // merge block. Update all users inside the successor region to use the

    // original values.

    for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {

      VPValue *PredInst1 =

          cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);

      VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();

      Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) {

        return cast<VPRecipeBase>(&U)->getParent() == Then2;

      });


      // Remove phi recipes that are unused after merging the regions.

      if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {

        Phi1ToMove.eraseFromParent();

        continue;

      }

      Phi1ToMove.moveBefore(*Merge2, Merge2->begin());

    }


    // Finally, remove the first region.

    for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {

      VPBlockUtils::disconnectBlocks(Pred, Region1);

      VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);

    }

    VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);

    TransformedRegions.insert(Region1);

  }


  return !TransformedRegions.empty();

}


static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,

                                            VPlan &Plan) {

  Instruction *Instr = PredRecipe->getUnderlyingInstr();

  // Build the triangular if-then region.

  std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();

  assert(Instr->getParent() && "Predicated instruction not in any basic block");

  auto *BlockInMask = PredRecipe->getMask();

  auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);

  auto *Entry =

      Plan.createVPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);


  // Replace predicated replicate recipe with a replicate recipe without a

  // mask but in the replicate region.

  auto *RecipeWithoutMask = new VPReplicateRecipe(

      PredRecipe->getUnderlyingInstr(),

      make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),

      PredRecipe->isUniform());

  auto *Pred =

      Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);


  VPPredInstPHIRecipe *PHIRecipe = nullptr;

  if (PredRecipe->getNumUsers() != 0) {

    PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask,

                                        RecipeWithoutMask->getDebugLoc());

    PredRecipe->replaceAllUsesWith(PHIRecipe);

    PHIRecipe->setOperand(0, RecipeWithoutMask);

  }

  PredRecipe->eraseFromParent();

  auto *Exiting =

      Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);

  VPRegionBlock *Region =

      Plan.createVPRegionBlock(Entry, Exiting, RegionName, true);


  // Note: first set Entry as region entry and then connect successors starting

  // from it in order, to propagate the "parent" of each VPBasicBlock.

  VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);

  VPBlockUtils::connectBlocks(Pred, Exiting);


  return Region;

}


static void addReplicateRegions(VPlan &Plan) {

  SmallVector<VPReplicateRecipe *> WorkList;

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(

           vp_depth_first_deep(Plan.getEntry()))) {

    for (VPRecipeBase &R : *VPBB)

      if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {

        if (RepR->isPredicated())

          WorkList.push_back(RepR);

      }

  }


  unsigned BBNum = 0;

  for (VPReplicateRecipe *RepR : WorkList) {

    VPBasicBlock *CurrentBlock = RepR->getParent();

    VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());


    BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();

    SplitBlock->setName(

        OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");

    // Record predicated instructions for above packing optimizations.

    VPBlockBase *Region = createReplicateRegion(RepR, Plan);

    Region->setParent(CurrentBlock->getParent());

    VPBlockUtils::insertOnEdge(CurrentBlock, SplitBlock, Region);

  }

}


/// Remove redundant VPBasicBlocks by merging them into their predecessor if

/// the predecessor has a single successor.

static bool mergeBlocksIntoPredecessors(VPlan &Plan) {

  SmallVector<VPBasicBlock *> WorkList;

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(

           vp_depth_first_deep(Plan.getEntry()))) {

    // Don't fold the blocks in the skeleton of the Plan into their single

    // predecessors for now.

    // TODO: Remove restriction once more of the skeleton is modeled in VPlan.

    if (!VPBB->getParent())

      continue;

    auto *PredVPBB =

        dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

    if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||

        isa<VPIRBasicBlock>(PredVPBB))

      continue;

    WorkList.push_back(VPBB);

  }


  for (VPBasicBlock *VPBB : WorkList) {

    VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());

    for (VPRecipeBase &R : make_early_inc_range(*VPBB))

      R.moveBefore(*PredVPBB, PredVPBB->end());

    VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);

    auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());

    if (ParentRegion && ParentRegion->getExiting() == VPBB)

      ParentRegion->setExiting(PredVPBB);

    for (auto *Succ : to_vector(VPBB->successors())) {

      VPBlockUtils::disconnectBlocks(VPBB, Succ);

      VPBlockUtils::connectBlocks(PredVPBB, Succ);

    }

    // VPBB is now dead and will be cleaned up when the plan gets destroyed.

  }

  return !WorkList.empty();

}


void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) {

  // Convert masked VPReplicateRecipes to if-then region blocks.

  addReplicateRegions(Plan);


  bool ShouldSimplify = true;

  while (ShouldSimplify) {

    ShouldSimplify = sinkScalarOperands(Plan);

    ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);

    ShouldSimplify |= mergeBlocksIntoPredecessors(Plan);

  }

}


/// Remove redundant casts of inductions.

///

/// Such redundant casts are casts of induction variables that can be ignored,

/// because we already proved that the casted phi is equal to the uncasted phi

/// in the vectorized loop. There is no need to vectorize the cast - the same

/// value can be used for both the phi and casts in the vector loop.

static void removeRedundantInductionCasts(VPlan &Plan) {

  for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {

    auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);

    if (!IV || IV->getTruncInst())

      continue;


    // A sequence of IR Casts has potentially been recorded for IV, which

    // *must be bypassed* when the IV is vectorized, because the vectorized IV

    // will produce the desired casted value. This sequence forms a def-use

    // chain and is provided in reverse order, ending with the cast that uses

    // the IV phi. Search for the recipe of the last cast in the chain and

    // replace it with the original IV. Note that only the final cast is

    // expected to have users outside the cast-chain and the dead casts left

    // over will be cleaned up later.

    auto &Casts = IV->getInductionDescriptor().getCastInsts();

    VPValue *FindMyCast = IV;

    for (Instruction *IRCast : reverse(Casts)) {

      VPSingleDefRecipe *FoundUserCast = nullptr;

      for (auto *U : FindMyCast->users()) {

        auto *UserCast = dyn_cast<VPSingleDefRecipe>(U);

        if (UserCast && UserCast->getUnderlyingValue() == IRCast) {

          FoundUserCast = UserCast;

          break;

        }

      }

      FindMyCast = FoundUserCast;

    }

    FindMyCast->replaceAllUsesWith(IV);

  }

}


/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV

/// recipe, if it exists.

static void removeRedundantCanonicalIVs(VPlan &Plan) {

  VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();

  VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;

  for (VPUser *U : CanonicalIV->users()) {

    WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);

    if (WidenNewIV)

      break;

  }


  if (!WidenNewIV)

    return;


  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();

  for (VPRecipeBase &Phi : HeaderVPBB->phis()) {

    auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);


    if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())

      continue;


    // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides

    // everything WidenNewIV's users need. That is, WidenOriginalIV will

    // generate a vector phi or all users of WidenNewIV demand the first lane

    // only.

    if (any_of(WidenOriginalIV->users(),

               [WidenOriginalIV](VPUser *U) {

                 return !U->usesScalars(WidenOriginalIV);

               }) ||

        vputils::onlyFirstLaneUsed(WidenNewIV)) {

      WidenNewIV->replaceAllUsesWith(WidenOriginalIV);

      WidenNewIV->eraseFromParent();

      return;

    }

  }

}


/// Returns true if \p R is dead and can be removed.

static bool isDeadRecipe(VPRecipeBase &R) {

  using namespace llvm::PatternMatch;

  // Do remove conditional assume instructions as their conditions may be

  // flattened.

  auto *RepR = dyn_cast<VPReplicateRecipe>(&R);

  bool IsConditionalAssume =

      RepR && RepR->isPredicated() &&

      match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>());

  if (IsConditionalAssume)

    return true;


  if (R.mayHaveSideEffects())

    return false;


  // Recipe is dead if no user keeps the recipe alive.

  return all_of(R.definedValues(),

                [](VPValue *V) { return V->getNumUsers() == 0; });

}


void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {

  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(

      Plan.getEntry());


  for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {

    // The recipes in the block are processed in reverse order, to catch chains

    // of dead recipes.

    for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {

      if (isDeadRecipe(R))

        R.eraseFromParent();

    }

  }

}


static VPScalarIVStepsRecipe *

createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,

                    Instruction::BinaryOps InductionOpcode,

                    FPMathOperator *FPBinOp, Instruction *TruncI,

                    VPValue *StartV, VPValue *Step, DebugLoc DL,

                    VPBuilder &Builder) {

  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();

  VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();

  VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(

      Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");


  // Truncate base induction if needed.

  Type *CanonicalIVType = CanonicalIV->getScalarType();

  VPTypeAnalysis TypeInfo(CanonicalIVType);

  Type *ResultTy = TypeInfo.inferScalarType(BaseIV);

  if (TruncI) {

    Type *TruncTy = TruncI->getType();

    assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&

           "Not truncating.");

    assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");

    BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy, DL);

    ResultTy = TruncTy;

  }


  // Truncate step if needed.

  Type *StepTy = TypeInfo.inferScalarType(Step);

  if (ResultTy != StepTy) {

    assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&

           "Not truncating.");

    assert(StepTy->isIntegerTy() && "Truncation requires an integer type");

    auto *VecPreheader =

        cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor());

    VPBuilder::InsertPointGuard Guard(Builder);

    Builder.setInsertPoint(VecPreheader);

    Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy, DL);

  }

  return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step);

}


static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {

  SetVector<VPUser *> Users(V->user_begin(), V->user_end());

  for (unsigned I = 0; I != Users.size(); ++I) {

    VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);

    if (isa<VPHeaderPHIRecipe>(Cur))

      continue;

    for (VPValue *V : Cur->definedValues())

      Users.insert(V->user_begin(), V->user_end());

  }

  return Users.takeVector();

}


/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd

/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as

/// VPWidenPointerInductionRecipe will generate vectors only. If some users

/// require vectors while other require scalars, the scalar uses need to extract

/// the scalars from the generated vectors (Note that this is different to how

/// int/fp inductions are handled). Legalize extract-from-ends using uniform

/// VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so

/// the correct end value is available. Also optimize

/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by

/// providing them scalar steps built on the canonical scalar IV and update the

/// original IV's users. This is an optional optimization to reduce the needs of

/// vector extracts.

static void legalizeAndOptimizeInductions(VPlan &Plan) {

  using namespace llvm::VPlanPatternMatch;

  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();

  bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));

  VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());

  for (VPRecipeBase &Phi : HeaderVPBB->phis()) {

    auto *PhiR = dyn_cast<VPWidenInductionRecipe>(&Phi);

    if (!PhiR)

      continue;


    // Try to narrow wide and replicating recipes to uniform recipes, based on

    // VPlan analysis.

    // TODO: Apply to all recipes in the future, to replace legacy uniformity

    // analysis.

    auto Users = collectUsersRecursively(PhiR);

    for (VPUser *U : reverse(Users)) {

      auto *Def = dyn_cast<VPSingleDefRecipe>(U);

      auto *RepR = dyn_cast<VPReplicateRecipe>(U);

      // Skip recipes that shouldn't be narrowed.

      if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||

          Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||

          (RepR && (RepR->isUniform() || RepR->isPredicated())))

        continue;


      // Skip recipes that may have other lanes than their first used.

      if (!vputils::isUniformAfterVectorization(Def) &&

          !vputils::onlyFirstLaneUsed(Def))

        continue;


      auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),

                                          Def->operands(), /*IsUniform*/ true);

      Clone->insertAfter(Def);

      Def->replaceAllUsesWith(Clone);

    }


    // Replace wide pointer inductions which have only their scalars used by

    // PtrAdd(IndStart, ScalarIVSteps (0, Step)).

    if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {

      if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))

        continue;


      const InductionDescriptor &ID = PtrIV->getInductionDescriptor();

      VPValue *StartV =

          Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0));

      VPValue *StepV = PtrIV->getOperand(1);

      VPScalarIVStepsRecipe *Steps = createScalarIVSteps(

          Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,

          nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);


      VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,

                                             PtrIV->getDebugLoc(), "next.gep");


      PtrIV->replaceAllUsesWith(PtrAdd);

      continue;

    }


    // Replace widened induction with scalar steps for users that only use

    // scalars.

    auto *WideIV = cast<VPWidenIntOrFpInductionRecipe>(&Phi);

    if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {

          return U->usesScalars(WideIV);

        }))

      continue;


    const InductionDescriptor &ID = WideIV->getInductionDescriptor();

    VPScalarIVStepsRecipe *Steps = createScalarIVSteps(

        Plan, ID.getKind(), ID.getInductionOpcode(),

        dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),

        WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),

        WideIV->getDebugLoc(), Builder);


    // Update scalar users of IV to use Step instead.

    if (!HasOnlyVectorVFs)

      WideIV->replaceAllUsesWith(Steps);

    else

      WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {

        return U.usesScalars(WideIV);

      });

  }

}


/// Check if \p VPV is an untruncated wide induction, either before or after the

/// increment. If so return the header IV (before the increment), otherwise

/// return null.

static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {

  auto *WideIV = dyn_cast<VPWidenInductionRecipe>(VPV);

  if (WideIV) {

    // VPV itself is a wide induction, separately compute the end value for exit

    // users if it is not a truncated IV.

    auto *IntOrFpIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);

    return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;

  }


  // Check if VPV is an optimizable induction increment.

  VPRecipeBase *Def = VPV->getDefiningRecipe();

  if (!Def || Def->getNumOperands() != 2)

    return nullptr;

  WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));

  if (!WideIV)

    WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));

  if (!WideIV)

    return nullptr;


  auto IsWideIVInc = [&]() {

    using namespace VPlanPatternMatch;

    auto &ID = WideIV->getInductionDescriptor();


    // Check if VPV increments the induction by the induction step.

    VPValue *IVStep = WideIV->getStepValue();

    switch (ID.getInductionOpcode()) {

    case Instruction::Add:

      return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),

                                                     m_Specific(IVStep)));

    case Instruction::FAdd:

      return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),

                                                      m_Specific(IVStep)));

    case Instruction::FSub:

      return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),

                                                    m_Specific(IVStep)));

    case Instruction::Sub: {

      // IVStep will be the negated step of the subtraction. Check if Step == -1

      // * IVStep.

      VPValue *Step;

      if (!match(VPV,

                 m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||

          !Step->isLiveIn() || !IVStep->isLiveIn())

        return false;

      auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());

      auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());

      return StepCI && IVStepCI &&

             StepCI->getValue() == (-1 * IVStepCI->getValue());

    }

    default:

      return ID.getKind() == InductionDescriptor::IK_PtrInduction &&

             match(VPV, m_GetElementPtr(m_Specific(WideIV),

                                        m_Specific(WideIV->getStepValue())));

    }

    llvm_unreachable("should have been covered by switch above");

  };

  return IsWideIVInc() ? WideIV : nullptr;

}


void VPlanTransforms::optimizeInductionExitUsers(

    VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {

  using namespace VPlanPatternMatch;

  SmallVector<VPIRBasicBlock *> ExitVPBBs(Plan.getExitBlocks());

  if (ExitVPBBs.size() != 1)

    return;


  VPIRBasicBlock *ExitVPBB = ExitVPBBs[0];

  VPBlockBase *PredVPBB = ExitVPBB->getSinglePredecessor();

  if (!PredVPBB)

    return;

  assert(PredVPBB == Plan.getMiddleBlock() &&

         "predecessor must be the middle block");


  VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());

  VPBuilder B(Plan.getMiddleBlock()->getTerminator());

  for (VPRecipeBase &R : *ExitVPBB) {

    auto *ExitIRI = cast<VPIRInstruction>(&R);

    if (!isa<PHINode>(ExitIRI->getInstruction()))

      break;


    VPValue *Incoming;

    if (!match(ExitIRI->getOperand(0),

               m_VPInstruction<VPInstruction::ExtractFromEnd>(

                   m_VPValue(Incoming), m_SpecificInt(1))))

      continue;


    auto *WideIV = getOptimizableIVOf(Incoming);

    if (!WideIV)

      continue;

    VPValue *EndValue = EndValues.lookup(WideIV);

    assert(EndValue && "end value must have been pre-computed");


    if (Incoming != WideIV) {

      ExitIRI->setOperand(0, EndValue);

      continue;

    }


    VPValue *Escape = nullptr;

    VPValue *Step = WideIV->getStepValue();

    Type *ScalarTy = TypeInfo.inferScalarType(WideIV);

    if (ScalarTy->isIntegerTy()) {

      Escape =

          B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");

    } else if (ScalarTy->isPointerTy()) {

      auto *Zero = Plan.getOrAddLiveIn(

          ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));

      Escape = B.createPtrAdd(EndValue,

                              B.createNaryOp(Instruction::Sub, {Zero, Step}),

                              {}, "ind.escape");

    } else if (ScalarTy->isFloatingPointTy()) {

      const auto &ID = WideIV->getInductionDescriptor();

      Escape = B.createNaryOp(

          ID.getInductionBinOp()->getOpcode() == Instruction::FAdd

              ? Instruction::FSub

              : Instruction::FAdd,

          {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});

    } else {

      llvm_unreachable("all possible induction types must be handled");

    }

    ExitIRI->setOperand(0, Escape);

  }

}


/// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing

/// them with already existing recipes expanding the same SCEV expression.

static void removeRedundantExpandSCEVRecipes(VPlan &Plan) {

  DenseMap<const SCEV *, VPValue *> SCEV2VPV;


  for (VPRecipeBase &R :

       make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) {

    auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);

    if (!ExpR)

      continue;


    auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});

    if (I.second)

      continue;

    ExpR->replaceAllUsesWith(I.first->second);

    ExpR->eraseFromParent();

  }

}


static void recursivelyDeleteDeadRecipes(VPValue *V) {

  SmallVector<VPValue *> WorkList;

  SmallPtrSet<VPValue *, 8> Seen;

  WorkList.push_back(V);


  while (!WorkList.empty()) {

    VPValue *Cur = WorkList.pop_back_val();

    if (!Seen.insert(Cur).second)

      continue;

    VPRecipeBase *R = Cur->getDefiningRecipe();

    if (!R)

      continue;

    if (!isDeadRecipe(*R))

      continue;

    WorkList.append(R->op_begin(), R->op_end());

    R->eraseFromParent();

  }

}


/// Try to simplify recipe \p R.

static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {

  using namespace llvm::VPlanPatternMatch;


  if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {

    // Try to remove redundant blend recipes.

    SmallPtrSet<VPValue *, 4> UniqueValues;

    if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))

      UniqueValues.insert(Blend->getIncomingValue(0));

    for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)

      if (!match(Blend->getMask(I), m_False()))

        UniqueValues.insert(Blend->getIncomingValue(I));


    if (UniqueValues.size() == 1) {

      Blend->replaceAllUsesWith(*UniqueValues.begin());

      Blend->eraseFromParent();

      return;

    }


    if (Blend->isNormalized())

      return;


    // Normalize the blend so its first incoming value is used as the initial

    // value with the others blended into it.


    unsigned StartIndex = 0;

    for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {

      // If a value's mask is used only by the blend then is can be deadcoded.

      // TODO: Find the most expensive mask that can be deadcoded, or a mask

      // that's used by multiple blends where it can be removed from them all.

      VPValue *Mask = Blend->getMask(I);

      if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {

        StartIndex = I;

        break;

      }

    }


    SmallVector<VPValue *, 4> OperandsWithMask;

    OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));


    for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {

      if (I == StartIndex)

        continue;

      OperandsWithMask.push_back(Blend->getIncomingValue(I));

      OperandsWithMask.push_back(Blend->getMask(I));

    }


    auto *NewBlend = new VPBlendRecipe(

        cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);

    NewBlend->insertBefore(&R);


    VPValue *DeadMask = Blend->getMask(StartIndex);

    Blend->replaceAllUsesWith(NewBlend);

    Blend->eraseFromParent();

    recursivelyDeleteDeadRecipes(DeadMask);

    return;

  }


  VPValue *A;

  if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {

    VPValue *Trunc = R.getVPSingleValue();

    Type *TruncTy = TypeInfo.inferScalarType(Trunc);

    Type *ATy = TypeInfo.inferScalarType(A);

    if (TruncTy == ATy) {

      Trunc->replaceAllUsesWith(A);

    } else {

      // Don't replace a scalarizing recipe with a widened cast.

      if (isa<VPReplicateRecipe>(&R))

        return;

      if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {


        unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))

                                 ? Instruction::SExt

                                 : Instruction::ZExt;

        auto *VPC =

            new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);

        if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {

          // UnderlyingExt has distinct return type, used to retain legacy cost.

          VPC->setUnderlyingValue(UnderlyingExt);

        }

        VPC->insertBefore(&R);

        Trunc->replaceAllUsesWith(VPC);

      } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {

        auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);

        VPC->insertBefore(&R);

        Trunc->replaceAllUsesWith(VPC);

      }

    }

#ifndef NDEBUG

    // Verify that the cached type info is for both A and its users is still

    // accurate by comparing it to freshly computed types.

    VPTypeAnalysis TypeInfo2(

        R.getParent()->getPlan()->getCanonicalIV()->getScalarType());

    assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));

    for (VPUser *U : A->users()) {

      auto *R = cast<VPRecipeBase>(U);

      for (VPValue *VPV : R->definedValues())

        assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));

    }

#endif

  }


  // Simplify (X && Y) || (X && !Y) -> X.

  // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X

  // && (Y || Z) and (X || !X) into true. This requires queuing newly created

  // recipes to be visited during simplification.

  VPValue *X, *Y, *X1, *Y1;

  if (match(&R,

            m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),

                         m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&

      X == X1 && Y == Y1) {

    R.getVPSingleValue()->replaceAllUsesWith(X);

    R.eraseFromParent();

    return;

  }


  if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))

    return R.getVPSingleValue()->replaceAllUsesWith(A);


  if (match(&R, m_Not(m_Not(m_VPValue(A)))))

    return R.getVPSingleValue()->replaceAllUsesWith(A);


  // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.

  if ((match(&R,

             m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) ||

       match(&R,

             m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&

      TypeInfo.inferScalarType(R.getOperand(1)) ==

          TypeInfo.inferScalarType(R.getVPSingleValue()))

    return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));

}


/// Try to simplify the recipes in \p Plan. Use \p CanonicalIVTy as type for all

/// un-typed live-ins in VPTypeAnalysis.

static void simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy) {

  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(

      Plan.getEntry());

  VPTypeAnalysis TypeInfo(CanonicalIVTy);

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {

    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {

      simplifyRecipe(R, TypeInfo);

    }

  }

}


void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,

                                         unsigned BestUF,

                                         PredicatedScalarEvolution &PSE) {

  assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");

  assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");

  VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();

  VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock();

  auto *Term = &ExitingVPBB->back();

  // Try to simplify the branch condition if TC <= VF * UF when preparing to

  // execute the plan for the main vector loop. We only do this if the

  // terminator is:

  //  1. BranchOnCount, or

  //  2. BranchOnCond where the input is Not(ActiveLaneMask).

  using namespace llvm::VPlanPatternMatch;

  if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) &&

      !match(Term,

             m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))

    return;


  ScalarEvolution &SE = *PSE.getSE();

  const SCEV *TripCount =

      vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);

  assert(!isa<SCEVCouldNotCompute>(TripCount) &&

         "Trip count SCEV must be computable");

  ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);

  const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);

  if (TripCount->isZero() ||

      !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))

    return;


  // The vector loop region only executes once. If possible, completely remove

  // the region, otherwise replace the terminator controlling the latch with

  // (BranchOnCond true).

  auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());

  auto *CanIVTy = Plan.getCanonicalIV()->getScalarType();

  if (all_of(

          Header->phis(),

          IsaPred<VPCanonicalIVPHIRecipe, VPFirstOrderRecurrencePHIRecipe>)) {

    for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {

      auto *HeaderPhiR = cast<VPHeaderPHIRecipe>(&HeaderR);

      HeaderPhiR->replaceAllUsesWith(HeaderPhiR->getStartValue());

      HeaderPhiR->eraseFromParent();

    }


    VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();

    VPBlockBase *Exit = VectorRegion->getSingleSuccessor();

    VPBlockUtils::disconnectBlocks(Preheader, VectorRegion);

    VPBlockUtils::disconnectBlocks(VectorRegion, Exit);


    for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry()))

      B->setParent(nullptr);


    VPBlockUtils::connectBlocks(Preheader, Header);

    VPBlockUtils::connectBlocks(ExitingVPBB, Exit);

    simplifyRecipes(Plan, CanIVTy);

  } else {

    // The vector region contains header phis for which we cannot remove the

    // loop region yet.

    LLVMContext &Ctx = SE.getContext();

    auto *BOC = new VPInstruction(

        VPInstruction::BranchOnCond,

        {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());

    ExitingVPBB->appendRecipe(BOC);

  }


  Term->eraseFromParent();

  VPlanTransforms::removeDeadRecipes(Plan);


  Plan.setVF(BestVF);

  Plan.setUF(BestUF);

  // TODO: Further simplifications are possible

  //      1. Replace inductions with constants.

  //      2. Replace vector loop region with VPBasicBlock.

}


/// Sink users of \p FOR after the recipe defining the previous value \p

/// Previous of the recurrence. \returns true if all users of \p FOR could be

/// re-arranged as needed or false if it is not possible.

static bool

sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,

                                 VPRecipeBase *Previous,

                                 VPDominatorTree &VPDT) {

  // Collect recipes that need sinking.

  SmallVector<VPRecipeBase *> WorkList;

  SmallPtrSet<VPRecipeBase *, 8> Seen;

  Seen.insert(Previous);

  auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {

    // The previous value must not depend on the users of the recurrence phi. In

    // that case, FOR is not a fixed order recurrence.

    if (SinkCandidate == Previous)

      return false;


    if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||

        !Seen.insert(SinkCandidate).second ||

        VPDT.properlyDominates(Previous, SinkCandidate))

      return true;


    if (SinkCandidate->mayHaveSideEffects())

      return false;


    WorkList.push_back(SinkCandidate);

    return true;

  };


  // Recursively sink users of FOR after Previous.

  WorkList.push_back(FOR);

  for (unsigned I = 0; I != WorkList.size(); ++I) {

    VPRecipeBase *Current = WorkList[I];

    assert(Current->getNumDefinedValues() == 1 &&

           "only recipes with a single defined value expected");


    for (VPUser *User : Current->getVPSingleValue()->users()) {

      if (!TryToPushSinkCandidate(cast<VPRecipeBase>(User)))

        return false;

    }

  }


  // Keep recipes to sink ordered by dominance so earlier instructions are

  // processed first.

  sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {

    return VPDT.properlyDominates(A, B);

  });


  for (VPRecipeBase *SinkCandidate : WorkList) {

    if (SinkCandidate == FOR)

      continue;


    SinkCandidate->moveAfter(Previous);

    Previous = SinkCandidate;

  }

  return true;

}


/// Try to hoist \p Previous and its operands before all users of \p FOR.

static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,

                                        VPRecipeBase *Previous,

                                        VPDominatorTree &VPDT) {

  if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())

    return false;


  // Collect recipes that need hoisting.

  SmallVector<VPRecipeBase *> HoistCandidates;

  SmallPtrSet<VPRecipeBase *, 8> Visited;

  VPRecipeBase *HoistPoint = nullptr;

  // Find the closest hoist point by looking at all users of FOR and selecting

  // the recipe dominating all other users.

  for (VPUser *U : FOR->users()) {

    auto *R = cast<VPRecipeBase>(U);

    if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint))

      HoistPoint = R;

  }

  assert(all_of(FOR->users(),

                [&VPDT, HoistPoint](VPUser *U) {

                  auto *R = cast<VPRecipeBase>(U);

                  return HoistPoint == R ||

                         VPDT.properlyDominates(HoistPoint, R);

                }) &&

         "HoistPoint must dominate all users of FOR");


  auto NeedsHoisting = [HoistPoint, &VPDT,

                        &Visited](VPValue *HoistCandidateV) -> VPRecipeBase * {

    VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();

    if (!HoistCandidate)

      return nullptr;

    VPRegionBlock *EnclosingLoopRegion =

        HoistCandidate->getParent()->getEnclosingLoopRegion();

    assert((!HoistCandidate->getParent()->getParent() ||

            HoistCandidate->getParent()->getParent() == EnclosingLoopRegion) &&

           "CFG in VPlan should still be flat, without replicate regions");

    // Hoist candidate was already visited, no need to hoist.

    if (!Visited.insert(HoistCandidate).second)

      return nullptr;


    // Candidate is outside loop region or a header phi, dominates FOR users w/o

    // hoisting.

    if (!EnclosingLoopRegion || isa<VPHeaderPHIRecipe>(HoistCandidate))

      return nullptr;


    // If we reached a recipe that dominates HoistPoint, we don't need to

    // hoist the recipe.

    if (VPDT.properlyDominates(HoistCandidate, HoistPoint))

      return nullptr;

    return HoistCandidate;

  };

  auto CanHoist = [&](VPRecipeBase *HoistCandidate) {

    // Avoid hoisting candidates with side-effects, as we do not yet analyze

    // associated dependencies.

    return !HoistCandidate->mayHaveSideEffects();

  };


  if (!NeedsHoisting(Previous->getVPSingleValue()))

    return true;


  // Recursively try to hoist Previous and its operands before all users of FOR.

  HoistCandidates.push_back(Previous);


  for (unsigned I = 0; I != HoistCandidates.size(); ++I) {

    VPRecipeBase *Current = HoistCandidates[I];

    assert(Current->getNumDefinedValues() == 1 &&

           "only recipes with a single defined value expected");

    if (!CanHoist(Current))

      return false;


    for (VPValue *Op : Current->operands()) {

      // If we reach FOR, it means the original Previous depends on some other

      // recurrence that in turn depends on FOR. If that is the case, we would

      // also need to hoist recipes involving the other FOR, which may break

      // dependencies.

      if (Op == FOR)

        return false;


      if (auto *R = NeedsHoisting(Op))

        HoistCandidates.push_back(R);

    }

  }


  // Order recipes to hoist by dominance so earlier instructions are processed

  // first.

  sort(HoistCandidates, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {

    return VPDT.properlyDominates(A, B);

  });


  for (VPRecipeBase *HoistCandidate : HoistCandidates) {

    HoistCandidate->moveBefore(*HoistPoint->getParent(),

                               HoistPoint->getIterator());

  }


  return true;

}


bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,

                                                  VPBuilder &LoopBuilder) {

  VPDominatorTree VPDT;

  VPDT.recalculate(Plan);


  SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;

  for (VPRecipeBase &R :

       Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis())

    if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))

      RecurrencePhis.push_back(FOR);


  for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {

    SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;

    VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();

    // Fixed-order recurrences do not contain cycles, so this loop is guaranteed

    // to terminate.

    while (auto *PrevPhi =

               dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {

      assert(PrevPhi->getParent() == FOR->getParent());

      assert(SeenPhis.insert(PrevPhi).second);

      Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();

    }


    if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT) &&

        !hoistPreviousBeforeFORUsers(FOR, Previous, VPDT))

      return false;


    // Introduce a recipe to combine the incoming and previous values of a

    // fixed-order recurrence.

    VPBasicBlock *InsertBlock = Previous->getParent();

    if (isa<VPHeaderPHIRecipe>(Previous))

      LoopBuilder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());

    else

      LoopBuilder.setInsertPoint(InsertBlock,

                                 std::next(Previous->getIterator()));


    auto *RecurSplice = cast<VPInstruction>(

        LoopBuilder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,

                                 {FOR, FOR->getBackedgeValue()}));


    FOR->replaceAllUsesWith(RecurSplice);

    // Set the first operand of RecurSplice to FOR again, after replacing

    // all users.

    RecurSplice->setOperand(0, FOR);

  }

  return true;

}


void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {

  for (VPRecipeBase &R :

       Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {

    auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);

    if (!PhiR)

      continue;

    const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();

    RecurKind RK = RdxDesc.getRecurrenceKind();

    if (RK != RecurKind::Add && RK != RecurKind::Mul)

      continue;


    for (VPUser *U : collectUsersRecursively(PhiR))

      if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) {

        RecWithFlags->dropPoisonGeneratingFlags();

      }

  }

}


/// Move loop-invariant recipes out of the vector loop region in \p Plan.

static void licm(VPlan &Plan) {

  VPBasicBlock *Preheader = Plan.getVectorPreheader();


  // Return true if we do not know how to (mechanically) hoist a given recipe

  // out of a loop region. Does not address legality concerns such as aliasing

  // or speculation safety.

  auto CannotHoistRecipe = [](VPRecipeBase &R) {

    // Allocas cannot be hoisted.

    auto *RepR = dyn_cast<VPReplicateRecipe>(&R);

    return RepR && RepR->getOpcode() == Instruction::Alloca;

  };


  // Hoist any loop invariant recipes from the vector loop region to the

  // preheader. Preform a shallow traversal of the vector loop region, to

  // exclude recipes in replicate regions.

  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(

           vp_depth_first_shallow(LoopRegion->getEntry()))) {

    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {

      if (CannotHoistRecipe(R))

        continue;

      // TODO: Relax checks in the future, e.g. we could also hoist reads, if

      // their memory location is not modified in the vector loop.

      if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||

          any_of(R.operands(), [](VPValue *Op) {

            return !Op->isDefinedOutsideLoopRegions();

          }))

        continue;

      R.moveBefore(*Preheader, Preheader->end());

    }

  }

}


void VPlanTransforms::truncateToMinimalBitwidths(

    VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs) {

#ifndef NDEBUG

  // Count the processed recipes and cross check the count later with MinBWs

  // size, to make sure all entries in MinBWs have been handled.

  unsigned NumProcessedRecipes = 0;

#endif

  // Keep track of created truncates, so they can be re-used. Note that we

  // cannot use RAUW after creating a new truncate, as this would could make

  // other uses have different types for their operands, making them invalidly

  // typed.

  DenseMap<VPValue *, VPWidenCastRecipe *> ProcessedTruncs;

  Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();

  VPTypeAnalysis TypeInfo(CanonicalIVType);

  VPBasicBlock *PH = Plan.getVectorPreheader();

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(

           vp_depth_first_deep(Plan.getVectorLoopRegion()))) {

    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {

      if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,

               VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))

        continue;


      VPValue *ResultVPV = R.getVPSingleValue();

      auto *UI = cast_or_null<Instruction>(ResultVPV->getUnderlyingValue());

      unsigned NewResSizeInBits = MinBWs.lookup(UI);

      if (!NewResSizeInBits)

        continue;


#ifndef NDEBUG

      NumProcessedRecipes++;

#endif

      // If the value wasn't vectorized, we must maintain the original scalar

      // type. Skip those here, after incrementing NumProcessedRecipes. Also

      // skip casts which do not need to be handled explicitly here, as

      // redundant casts will be removed during recipe simplification.

      if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R)) {

#ifndef NDEBUG

        // If any of the operands is a live-in and not used by VPWidenRecipe or

        // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as

        // processed as well. When MinBWs is currently constructed, there is no

        // information about whether recipes are widened or replicated and in

        // case they are reciplicated the operands are not truncated. Counting

        // them them here ensures we do not miss any recipes in MinBWs.

        // TODO: Remove once the analysis is done on VPlan.

        for (VPValue *Op : R.operands()) {

          if (!Op->isLiveIn())

            continue;

          auto *UV = dyn_cast_or_null<Instruction>(Op->getUnderlyingValue());

          if (UV && MinBWs.contains(UV) && !ProcessedTruncs.contains(Op) &&

              none_of(Op->users(),

                      IsaPred<VPWidenRecipe, VPWidenSelectRecipe>)) {

            // Add an entry to ProcessedTruncs to avoid counting the same

            // operand multiple times.

            ProcessedTruncs[Op] = nullptr;

            NumProcessedRecipes += 1;

          }

        }

#endif

        continue;

      }


      Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);

      unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();

      assert(OldResTy->isIntegerTy() && "only integer types supported");

      (void)OldResSizeInBits;


      LLVMContext &Ctx = CanonicalIVType->getContext();

      auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);


      // Any wrapping introduced by shrinking this operation shouldn't be

      // considered undefined behavior. So, we can't unconditionally copy

      // arithmetic wrapping flags to VPW.

      if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))

        VPW->dropPoisonGeneratingFlags();


      using namespace llvm::VPlanPatternMatch;

      if (OldResSizeInBits != NewResSizeInBits &&

          !match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue()))) {

        // Extend result to original width.

        auto *Ext =

            new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);

        Ext->insertAfter(&R);

        ResultVPV->replaceAllUsesWith(Ext);

        Ext->setOperand(0, ResultVPV);

        assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");

      } else {

        assert(

            match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&

            "Only ICmps should not need extending the result.");

      }


      assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");

      if (isa<VPWidenLoadRecipe>(&R))

        continue;


      // Shrink operands by introducing truncates as needed.

      unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;

      for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {

        auto *Op = R.getOperand(Idx);

        unsigned OpSizeInBits =

            TypeInfo.inferScalarType(Op)->getScalarSizeInBits();

        if (OpSizeInBits == NewResSizeInBits)

          continue;

        assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");

        auto [ProcessedIter, IterIsEmpty] =

            ProcessedTruncs.insert({Op, nullptr});

        VPWidenCastRecipe *NewOp =

            IterIsEmpty

                ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)

                : ProcessedIter->second;

        R.setOperand(Idx, NewOp);

        if (!IterIsEmpty)

          continue;

        ProcessedIter->second = NewOp;

        if (!Op->isLiveIn()) {

          NewOp->insertBefore(&R);

        } else {

          PH->appendRecipe(NewOp);

#ifndef NDEBUG

          auto *OpInst = dyn_cast<Instruction>(Op->getLiveInIRValue());

          bool IsContained = MinBWs.contains(OpInst);

          NumProcessedRecipes += IsContained;

#endif

        }

      }


    }

  }


  assert(MinBWs.size() == NumProcessedRecipes &&

         "some entries in MinBWs haven't been processed");

}


void VPlanTransforms::optimize(VPlan &Plan) {

  removeRedundantCanonicalIVs(Plan);

  removeRedundantInductionCasts(Plan);


  simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType());

  removeDeadRecipes(Plan);

  legalizeAndOptimizeInductions(Plan);

  removeRedundantExpandSCEVRecipes(Plan);

  simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType());

  removeDeadRecipes(Plan);


  createAndOptimizeReplicateRegions(Plan);

  mergeBlocksIntoPredecessors(Plan);

  licm(Plan);

}


// Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace

// the loop terminator with a branch-on-cond recipe with the negated

// active-lane-mask as operand. Note that this turns the loop into an

// uncountable one. Only the existing terminator is replaced, all other existing

// recipes/users remain unchanged, except for poison-generating flags being

// dropped from the canonical IV increment. Return the created

// VPActiveLaneMaskPHIRecipe.

//

// The function uses the following definitions:

//

//  %TripCount = DataWithControlFlowWithoutRuntimeCheck ?

//    calculate-trip-count-minus-VF (original TC) : original TC

//  %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ?

//     CanonicalIVPhi : CanonicalIVIncrement

//  %StartV is the canonical induction start value.

//

// The function adds the following recipes:

//

// vector.ph:

//   %TripCount = calculate-trip-count-minus-VF (original TC)

//       [if DataWithControlFlowWithoutRuntimeCheck]

//   %EntryInc = canonical-iv-increment-for-part %StartV

//   %EntryALM = active-lane-mask %EntryInc, %TripCount

//

// vector.body:

//   ...

//   %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ]

//   ...

//   %InLoopInc = canonical-iv-increment-for-part %IncrementValue

//   %ALM = active-lane-mask %InLoopInc, TripCount

//   %Negated = Not %ALM

//   branch-on-cond %Negated

//

static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(

    VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {

  VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();

  VPBasicBlock *EB = TopRegion->getExitingBasicBlock();

  auto *CanonicalIVPHI = Plan.getCanonicalIV();

  VPValue *StartV = CanonicalIVPHI->getStartValue();


  auto *CanonicalIVIncrement =

      cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());

  // TODO: Check if dropping the flags is needed if

  // !DataAndControlFlowWithoutRuntimeCheck.

  CanonicalIVIncrement->dropPoisonGeneratingFlags();

  DebugLoc DL = CanonicalIVIncrement->getDebugLoc();

  // We can't use StartV directly in the ActiveLaneMask VPInstruction, since

  // we have to take unrolling into account. Each part needs to start at

  //   Part * VF

  auto *VecPreheader = Plan.getVectorPreheader();

  VPBuilder Builder(VecPreheader);


  // Create the ActiveLaneMask instruction using the correct start values.

  VPValue *TC = Plan.getTripCount();


  VPValue *TripCount, *IncrementValue;

  if (!DataAndControlFlowWithoutRuntimeCheck) {

    // When the loop is guarded by a runtime overflow check for the loop

    // induction variable increment by VF, we can increment the value before

    // the get.active.lane mask and use the unmodified tripcount.

    IncrementValue = CanonicalIVIncrement;

    TripCount = TC;

  } else {

    // When avoiding a runtime check, the active.lane.mask inside the loop

    // uses a modified trip count and the induction variable increment is

    // done after the active.lane.mask intrinsic is called.

    IncrementValue = CanonicalIVPHI;

    TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,

                                     {TC}, DL);

  }

  auto *EntryIncrement = Builder.createOverflowingOp(

      VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL,

      "index.part.next");


  // Create the active lane mask instruction in the VPlan preheader.

  auto *EntryALM =

      Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC},

                           DL, "active.lane.mask.entry");


  // Now create the ActiveLaneMaskPhi recipe in the main loop using the

  // preheader ActiveLaneMask instruction.

  auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());

  LaneMaskPhi->insertAfter(CanonicalIVPHI);


  // Create the active lane mask for the next iteration of the loop before the

  // original terminator.

  VPRecipeBase *OriginalTerminator = EB->getTerminator();

  Builder.setInsertPoint(OriginalTerminator);

  auto *InLoopIncrement =

      Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,

                                  {IncrementValue}, {false, false}, DL);

  auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,

                                   {InLoopIncrement, TripCount}, DL,

                                   "active.lane.mask.next");

  LaneMaskPhi->addOperand(ALM);


  // Replace the original terminator with BranchOnCond. We have to invert the

  // mask here because a true condition means jumping to the exit block.

  auto *NotMask = Builder.createNot(ALM, DL);

  Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);

  OriginalTerminator->eraseFromParent();

  return LaneMaskPhi;

}


/// Collect all VPValues representing a header mask through the (ICMP_ULE,

/// WideCanonicalIV, backedge-taken-count) pattern.

/// TODO: Introduce explicit recipe for header-mask instead of searching

/// for the header-mask pattern manually.

static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {

  SmallVector<VPValue *> WideCanonicalIVs;

  auto *FoundWidenCanonicalIVUser =

      find_if(Plan.getCanonicalIV()->users(),

              [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });

  assert(count_if(Plan.getCanonicalIV()->users(),

                  [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=

             1 &&

         "Must have at most one VPWideCanonicalIVRecipe");

  if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {

    auto *WideCanonicalIV =

        cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);

    WideCanonicalIVs.push_back(WideCanonicalIV);

  }


  // Also include VPWidenIntOrFpInductionRecipes that represent a widened

  // version of the canonical induction.

  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();

  for (VPRecipeBase &Phi : HeaderVPBB->phis()) {

    auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);

    if (WidenOriginalIV && WidenOriginalIV->isCanonical())

      WideCanonicalIVs.push_back(WidenOriginalIV);

  }


  // Walk users of wide canonical IVs and collect to all compares of the form

  // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).

  SmallVector<VPValue *> HeaderMasks;

  for (auto *Wide : WideCanonicalIVs) {

    for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {

      auto *HeaderMask = dyn_cast<VPInstruction>(U);

      if (!HeaderMask || !vputils::isHeaderMask(HeaderMask, Plan))

        continue;


      assert(HeaderMask->getOperand(0) == Wide &&

             "WidenCanonicalIV must be the first operand of the compare");

      HeaderMasks.push_back(HeaderMask);

    }

  }

  return HeaderMasks;

}


void VPlanTransforms::addActiveLaneMask(

    VPlan &Plan, bool UseActiveLaneMaskForControlFlow,

    bool DataAndControlFlowWithoutRuntimeCheck) {

  assert((!DataAndControlFlowWithoutRuntimeCheck ||

          UseActiveLaneMaskForControlFlow) &&

         "DataAndControlFlowWithoutRuntimeCheck implies "

         "UseActiveLaneMaskForControlFlow");


  auto *FoundWidenCanonicalIVUser =

      find_if(Plan.getCanonicalIV()->users(),

              [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });

  assert(FoundWidenCanonicalIVUser &&

         "Must have widened canonical IV when tail folding!");

  auto *WideCanonicalIV =

      cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);

  VPSingleDefRecipe *LaneMask;

  if (UseActiveLaneMaskForControlFlow) {

    LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(

        Plan, DataAndControlFlowWithoutRuntimeCheck);

  } else {

    VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);

    LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,

                              {WideCanonicalIV, Plan.getTripCount()}, nullptr,

                              "active.lane.mask");

  }


  // Walk users of WideCanonicalIV and replace all compares of the form

  // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an

  // active-lane-mask.

  for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))

    HeaderMask->replaceAllUsesWith(LaneMask);

}


/// Try to convert \p CurRecipe to a corresponding EVL-based recipe. Returns

/// nullptr if no EVL-based recipe could be created.

/// \p HeaderMask  Header Mask.

/// \p CurRecipe   Recipe to be transform.

/// \p TypeInfo    VPlan-based type analysis.

/// \p AllOneMask  The vector mask parameter of vector-predication intrinsics.

/// \p EVL         The explicit vector length parameter of vector-predication

/// intrinsics.

static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,

                                     VPRecipeBase &CurRecipe,

                                     VPTypeAnalysis &TypeInfo,

                                     VPValue &AllOneMask, VPValue &EVL) {

  using namespace llvm::VPlanPatternMatch;

  auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {

    assert(OrigMask && "Unmasked recipe when folding tail");

    return HeaderMask == OrigMask ? nullptr : OrigMask;

  };


  return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe)

      .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {

        VPValue *NewMask = GetNewMask(L->getMask());

        return new VPWidenLoadEVLRecipe(*L, EVL, NewMask);

      })

      .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {

        VPValue *NewMask = GetNewMask(S->getMask());

        return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);

      })

      .Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {

        unsigned Opcode = W->getOpcode();

        if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))

          return nullptr;

        return new VPWidenEVLRecipe(*W, EVL);

      })

      .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {

        VPValue *NewMask = GetNewMask(Red->getCondOp());

        return new VPReductionEVLRecipe(*Red, EVL, NewMask);

      })

      .Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>(

          [&](auto *CR) -> VPRecipeBase * {

            Intrinsic::ID VPID;

            if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(CR)) {

              VPID =

                  VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID());

            } else {

              auto *CastR = cast<VPWidenCastRecipe>(CR);

              VPID = VPIntrinsic::getForOpcode(CastR->getOpcode());

            }


            // Not all intrinsics have a corresponding VP intrinsic.

            if (VPID == Intrinsic::not_intrinsic)

              return nullptr;

            assert(VPIntrinsic::getMaskParamPos(VPID) &&

                   VPIntrinsic::getVectorLengthParamPos(VPID) &&

                   "Expected VP intrinsic to have mask and EVL");


            SmallVector<VPValue *> Ops(CR->operands());

            Ops.push_back(&AllOneMask);

            Ops.push_back(&EVL);

            return new VPWidenIntrinsicRecipe(

                VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());

          })

      .Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {

        SmallVector<VPValue *> Ops(Sel->operands());

        Ops.push_back(&EVL);

        return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops,

                                          TypeInfo.inferScalarType(Sel),

                                          Sel->getDebugLoc());

      })

      .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {

        VPValue *LHS, *RHS;

        // Transform select with a header mask condition

        //   select(header_mask, LHS, RHS)

        // into vector predication merge.

        //   vp.merge(all-true, LHS, RHS, EVL)

        if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS),

                                 m_VPValue(RHS))))

          return nullptr;

        // Use all true as the condition because this transformation is

        // limited to selects whose condition is a header mask.

        return new VPWidenIntrinsicRecipe(

            Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},

            TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());

      })

      .Default([&](VPRecipeBase *R) { return nullptr; });

}


/// Replace recipes with their EVL variants.

static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {

  Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();

  VPTypeAnalysis TypeInfo(CanonicalIVType);

  LLVMContext &Ctx = CanonicalIVType->getContext();

  VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));


  for (VPUser *U : to_vector(Plan.getVF().users())) {

    if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))

      R->setOperand(1, &EVL);

  }


  SmallVector<VPRecipeBase *> ToErase;


  for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {

    for (VPUser *U : collectUsersRecursively(HeaderMask)) {

      auto *CurRecipe = cast<VPRecipeBase>(U);

      VPRecipeBase *EVLRecipe =

          createEVLRecipe(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);

      if (!EVLRecipe)

        continue;


      [[maybe_unused]] unsigned NumDefVal = EVLRecipe->getNumDefinedValues();

      assert(NumDefVal == CurRecipe->getNumDefinedValues() &&

             "New recipe must define the same number of values as the "

             "original.");

      assert(

          NumDefVal <= 1 &&

          "Only supports recipes with a single definition or without users.");

      EVLRecipe->insertBefore(CurRecipe);

      if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(EVLRecipe)) {

        VPValue *CurVPV = CurRecipe->getVPSingleValue();

        CurVPV->replaceAllUsesWith(EVLRecipe->getVPSingleValue());

      }

      // Defer erasing recipes till the end so that we don't invalidate the

      // VPTypeAnalysis cache.

      ToErase.push_back(CurRecipe);

    }

  }


  for (VPRecipeBase *R : reverse(ToErase)) {

    SmallVector<VPValue *> PossiblyDead(R->operands());

    R->eraseFromParent();

    for (VPValue *Op : PossiblyDead)

      recursivelyDeleteDeadRecipes(Op);

  }

}


/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and

/// replaces all uses except the canonical IV increment of

/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe

/// is used only for loop iterations counting after this transformation.

///

/// The function uses the following definitions:

///  %StartV is the canonical induction start value.

///

/// The function adds the following recipes:

///

/// vector.ph:

/// ...

///

/// vector.body:

/// ...

/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],

///                                               [ %NextEVLIV, %vector.body ]

/// %AVL = sub original TC, %EVLPhi

/// %VPEVL = EXPLICIT-VECTOR-LENGTH %AVL

/// ...

/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi

/// ...

///

/// If MaxSafeElements is provided, the function adds the following recipes:

/// vector.ph:

/// ...

///

/// vector.body:

/// ...

/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],

///                                               [ %NextEVLIV, %vector.body ]

/// %AVL = sub original TC, %EVLPhi

/// %cmp = cmp ult %AVL, MaxSafeElements

/// %SAFE_AVL = select %cmp, %AVL, MaxSafeElements

/// %VPEVL = EXPLICIT-VECTOR-LENGTH %SAFE_AVL

/// ...

/// %NextEVLIV = add IVSize (cast i32 %VPEVL to IVSize), %EVLPhi

/// ...

///

bool VPlanTransforms::tryAddExplicitVectorLength(

    VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {

  VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();

  // The transform updates all users of inductions to work based on EVL, instead

  // of the VF directly. At the moment, widened inductions cannot be updated, so

  // bail out if the plan contains any.

  bool ContainsWidenInductions = any_of(

      Header->phis(),

      IsaPred<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>);

  if (ContainsWidenInductions)

    return false;


  auto *CanonicalIVPHI = Plan.getCanonicalIV();

  VPValue *StartV = CanonicalIVPHI->getStartValue();


  // Create the ExplicitVectorLengthPhi recipe in the main loop.

  auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());

  EVLPhi->insertAfter(CanonicalIVPHI);

  VPBuilder Builder(Header, Header->getFirstNonPhi());

  // Compute original TC - IV as the AVL (application vector length).

  VPValue *AVL = Builder.createNaryOp(

      Instruction::Sub, {Plan.getTripCount(), EVLPhi}, DebugLoc(), "avl");

  if (MaxSafeElements) {

    // Support for MaxSafeDist for correct loop emission.

    VPValue *AVLSafe = Plan.getOrAddLiveIn(

        ConstantInt::get(CanonicalIVPHI->getScalarType(), *MaxSafeElements));

    VPValue *Cmp = Builder.createICmp(ICmpInst::ICMP_ULT, AVL, AVLSafe);

    AVL = Builder.createSelect(Cmp, AVL, AVLSafe, DebugLoc(), "safe_avl");

  }

  auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL,

                                     DebugLoc());


  auto *CanonicalIVIncrement =

      cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());

  VPSingleDefRecipe *OpVPEVL = VPEVL;

  if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits();

      IVSize != 32) {

    OpVPEVL = new VPScalarCastRecipe(

        IVSize < 32 ? Instruction::Trunc : Instruction::ZExt, OpVPEVL,

        CanonicalIVPHI->getScalarType(), CanonicalIVIncrement->getDebugLoc());

    OpVPEVL->insertBefore(CanonicalIVIncrement);

  }

  auto *NextEVLIV =

      new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi},

                        {CanonicalIVIncrement->hasNoUnsignedWrap(),

                         CanonicalIVIncrement->hasNoSignedWrap()},

                        CanonicalIVIncrement->getDebugLoc(), "index.evl.next");

  NextEVLIV->insertBefore(CanonicalIVIncrement);

  EVLPhi->addOperand(NextEVLIV);


  transformRecipestoEVLRecipes(Plan, *VPEVL);


  // Replace all uses of VPCanonicalIVPHIRecipe by

  // VPEVLBasedIVPHIRecipe except for the canonical IV increment.

  CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

  CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);

  // TODO: support unroll factor > 1.

  Plan.setUF(1);

  return true;

}


void VPlanTransforms::dropPoisonGeneratingRecipes(

    VPlan &Plan, function_ref<bool(BasicBlock *)> BlockNeedsPredication) {

  // Collect recipes in the backward slice of `Root` that may generate a poison

  // value that is used after vectorization.

  SmallPtrSet<VPRecipeBase *, 16> Visited;

  auto CollectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {

    SmallVector<VPRecipeBase *, 16> Worklist;

    Worklist.push_back(Root);


    // Traverse the backward slice of Root through its use-def chain.

    while (!Worklist.empty()) {

      VPRecipeBase *CurRec = Worklist.pop_back_val();


      if (!Visited.insert(CurRec).second)

        continue;


      // Prune search if we find another recipe generating a widen memory

      // instruction. Widen memory instructions involved in address computation

      // will lead to gather/scatter instructions, which don't need to be

      // handled.

      if (isa<VPWidenMemoryRecipe, VPInterleaveRecipe, VPScalarIVStepsRecipe,

              VPHeaderPHIRecipe>(CurRec))

        continue;


      // This recipe contributes to the address computation of a widen

      // load/store. If the underlying instruction has poison-generating flags,

      // drop them directly.

      if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {

        VPValue *A, *B;

        using namespace llvm::VPlanPatternMatch;

        // Dropping disjoint from an OR may yield incorrect results, as some

        // analysis may have converted it to an Add implicitly (e.g. SCEV used

        // for dependence analysis). Instead, replace it with an equivalent Add.

        // This is possible as all users of the disjoint OR only access lanes

        // where the operands are disjoint or poison otherwise.

        if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&

            RecWithFlags->isDisjoint()) {

          VPBuilder Builder(RecWithFlags);

          VPInstruction *New = Builder.createOverflowingOp(

              Instruction::Add, {A, B}, {false, false},

              RecWithFlags->getDebugLoc());

          New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());

          RecWithFlags->replaceAllUsesWith(New);

          RecWithFlags->eraseFromParent();

          CurRec = New;

        } else

          RecWithFlags->dropPoisonGeneratingFlags();

      } else {

        Instruction *Instr = dyn_cast_or_null<Instruction>(

            CurRec->getVPSingleValue()->getUnderlyingValue());

        (void)Instr;

        assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&

               "found instruction with poison generating flags not covered by "

               "VPRecipeWithIRFlags");

      }


      // Add new definitions to the worklist.

      for (VPValue *Operand : CurRec->operands())

        if (VPRecipeBase *OpDef = Operand->getDefiningRecipe())

          Worklist.push_back(OpDef);

    }

  });


  // Traverse all the recipes in the VPlan and collect the poison-generating

  // recipes in the backward slice starting at the address of a VPWidenRecipe or

  // VPInterleaveRecipe.

  auto Iter = vp_depth_first_deep(Plan.getEntry());

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {

    for (VPRecipeBase &Recipe : *VPBB) {

      if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {

        Instruction &UnderlyingInstr = WidenRec->getIngredient();

        VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();

        if (AddrDef && WidenRec->isConsecutive() &&

            BlockNeedsPredication(UnderlyingInstr.getParent()))

          CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);

      } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {

        VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();

        if (AddrDef) {

          // Check if any member of the interleave group needs predication.

          const InterleaveGroup<Instruction> *InterGroup =

              InterleaveRec->getInterleaveGroup();

          bool NeedPredication = false;

          for (int I = 0, NumMembers = InterGroup->getNumMembers();

               I < NumMembers; ++I) {

            Instruction *Member = InterGroup->getMember(I);

            if (Member)

              NeedPredication |= BlockNeedsPredication(Member->getParent());

          }


          if (NeedPredication)

            CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);

        }

      }

    }

  }

}


void VPlanTransforms::createInterleaveGroups(

    VPlan &Plan,

    const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>

        &InterleaveGroups,

    VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed) {

  if (InterleaveGroups.empty())

    return;


  // Interleave memory: for each Interleave Group we marked earlier as relevant

  // for this VPlan, replace the Recipes widening its memory instructions with a

  // single VPInterleaveRecipe at its insertion point.

  VPDominatorTree VPDT;

  VPDT.recalculate(Plan);

  for (const auto *IG : InterleaveGroups) {

    SmallVector<VPValue *, 4> StoredValues;

    for (unsigned i = 0; i < IG->getFactor(); ++i)

      if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {

        auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));

        StoredValues.push_back(StoreR->getStoredValue());

      }


    bool NeedsMaskForGaps =

        IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed;


    Instruction *IRInsertPos = IG->getInsertPos();

    auto *InsertPos =

        cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));


    // Get or create the start address for the interleave group.

    auto *Start =

        cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0)));

    VPValue *Addr = Start->getAddr();

    VPRecipeBase *AddrDef = Addr->getDefiningRecipe();

    if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) {

      // TODO: Hoist Addr's defining recipe (and any operands as needed) to

      // InsertPos or sink loads above zero members to join it.

      bool InBounds = false;

      if (auto *Gep = dyn_cast<GetElementPtrInst>(

              getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()))

        InBounds = Gep->isInBounds();


      // We cannot re-use the address of member zero because it does not

      // dominate the insert position. Instead, use the address of the insert

      // position and create a PtrAdd adjusting it to the address of member

      // zero.

      assert(IG->getIndex(IRInsertPos) != 0 &&

             "index of insert position shouldn't be zero");

      auto &DL = IRInsertPos->getDataLayout();

      APInt Offset(32,

                   DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) *

                       IG->getIndex(IRInsertPos),

                   /*IsSigned=*/true);

      VPValue *OffsetVPV = Plan.getOrAddLiveIn(

          ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset));

      VPBuilder B(InsertPos);

      Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)

                      : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);

    }

    auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues,

                                        InsertPos->getMask(), NeedsMaskForGaps);

    VPIG->insertBefore(InsertPos);


    unsigned J = 0;

    for (unsigned i = 0; i < IG->getFactor(); ++i)

      if (Instruction *Member = IG->getMember(i)) {

        VPRecipeBase *MemberR = RecipeBuilder.getRecipe(Member);

        if (!Member->getType()->isVoidTy()) {

          VPValue *OriginalV = MemberR->getVPSingleValue();

          OriginalV->replaceAllUsesWith(VPIG->getVPValue(J));

          J++;

        }

        MemberR->eraseFromParent();

      }

  }

}


void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {

  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(

           vp_depth_first_deep(Plan.getEntry()))) {

    for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {

      if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))

        continue;

      auto *PhiR = cast<VPHeaderPHIRecipe>(&R);

      StringRef Name =

          isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";

      auto *ScalarR =

          new VPScalarPHIRecipe(PhiR->getStartValue(), PhiR->getBackedgeValue(),

                                PhiR->getDebugLoc(), Name);

      ScalarR->insertBefore(PhiR);

      PhiR->replaceAllUsesWith(ScalarR);

      PhiR->eraseFromParent();

    }

  }

}


void VPlanTransforms::handleUncountableEarlyExit(

    VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,

    BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {

  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();

  auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());

  VPBuilder Builder(LatchVPBB->getTerminator());

  auto *MiddleVPBB = Plan.getMiddleBlock();

  VPValue *IsEarlyExitTaken = nullptr;


  // Process the uncountable exiting block. Update IsEarlyExitTaken, which

  // tracks if the uncountable early exit has been taken. Also split the middle

  // block and have it conditionally branch to the early exit block if

  // EarlyExitTaken.

  auto *EarlyExitingBranch =

      cast<BranchInst>(UncountableExitingBlock->getTerminator());

  BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);

  BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);


  // The early exit block may or may not be the same as the "countable" exit

  // block. Creates a new VPIRBB for the early exit block in case it is distinct

  // from the countable exit block.

  // TODO: Introduce both exit blocks during VPlan skeleton construction.

  VPIRBasicBlock *VPEarlyExitBlock;

  if (OrigLoop->getUniqueExitBlock()) {

    VPEarlyExitBlock = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]);

  } else {

    VPEarlyExitBlock = Plan.createVPIRBasicBlock(

        !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);

  }


  VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(

      OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);

  auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);

  IsEarlyExitTaken =

      Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});


  VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");

  VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);

  VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);

  NewMiddle->swapSuccessors();


  VPBuilder MiddleBuilder(NewMiddle);

  MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});


  // Replace the condition controlling the non-early exit from the vector loop

  // with one exiting if either the original condition of the vector latch is

  // true or the early exit has been taken.

  auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());

  assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&

         "Unexpected terminator");

  auto *IsLatchExitTaken =

      Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),

                         LatchExitingBranch->getOperand(1));

  auto *AnyExitTaken = Builder.createNaryOp(

      Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});

  Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);

  LatchExitingBranch->eraseFromParent();

}

for
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Definition: AArch64ExpandPseudoInsts.cpp:115

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:353

Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79

Name
std::string Name
Definition: ELFObjHandler.cpp:77

X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:170

IVDescriptors.h

Users
iv Induction Variable Users
Definition: IVUsers.cpp:48

Intrinsics.h

licm
licm
Definition: LICM.cpp:378

mergeBlocksIntoPredecessors
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
Definition: LoopSimplifyCFG.cpp:660

I
#define I(x, y, z)
Definition: MD5.cpp:58

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:51

Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

if
if(PassOpts->AAPipeline)
Definition: PassBuilderBindings.cpp:64

PatternMatch.h

PostOrderIterator.h
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

SetVector.h
This file implements a set that has insertion order iteration characteristics.

TypeSwitch.h
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...

VPRecipeBuilder.h

VPlanAnalysis.h

VPlanCFG.h

VPlanDominatorTree.h
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.

VPlanPatternMatch.h

sinkScalarOperands
static bool sinkScalarOperands(VPlan &Plan)
Definition: VPlanTransforms.cpp:109

removeRedundantInductionCasts
static void removeRedundantInductionCasts(VPlan &Plan)
Remove redundant casts of inductions.
Definition: VPlanTransforms.cpp:424

createScalarIVSteps
static VPScalarIVStepsRecipe * createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, Instruction *TruncI, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder)
Definition: VPlanTransforms.cpp:527

sinkRecurrenceUsersAfterPrevious
static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Sink users of FOR after the recipe defining the previous value Previous of the recurrence.
Definition: VPlanTransforms.cpp:1057

mergeReplicateRegionsIntoSuccessors
static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan)
Definition: VPlanTransforms.cpp:219

addVPLaneMaskPhiAndUpdateExitBranch
static VPActiveLaneMaskPHIRecipe * addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck)
Definition: VPlanTransforms.cpp:1490

transformRecipestoEVLRecipes
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL)
Replace recipes with their EVL variants.
Definition: VPlanTransforms.cpp:1726

isDeadRecipe
static bool isDeadRecipe(VPRecipeBase &R)
Returns true if R is dead and can be removed.
Definition: VPlanTransforms.cpp:493

legalizeAndOptimizeInductions
static void legalizeAndOptimizeInductions(VPlan &Plan)
Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd (IndStart, ScalarIVSteps (0,...
Definition: VPlanTransforms.cpp:589

addReplicateRegions
static void addReplicateRegions(VPlan &Plan)
Definition: VPlanTransforms.cpp:344

simplifyRecipes
static void simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy)
Try to simplify the recipes in Plan.
Definition: VPlanTransforms.cpp:967

simplifyRecipe
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo)
Try to simplify recipe R.
Definition: VPlanTransforms.cpp:834

removeRedundantExpandSCEVRecipes
static void removeRedundantExpandSCEVRecipes(VPlan &Plan)
Remove redundant EpxandSCEVRecipes in Plan's entry block by replacing them with already existing reci...
Definition: VPlanTransforms.cpp:797

collectAllHeaderMasks
static SmallVector< VPValue * > collectAllHeaderMasks(VPlan &Plan)
Collect all VPValues representing a header mask through the (ICMP_ULE, WideCanonicalIV,...
Definition: VPlanTransforms.cpp:1565

hoistPreviousBeforeFORUsers
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to hoist Previous and its operands before all users of FOR.
Definition: VPlanTransforms.cpp:1112

getOptimizableIVOf
static VPWidenInductionRecipe * getOptimizableIVOf(VPValue *VPV)
Check if VPV is an untruncated wide induction, either before or after the increment.
Definition: VPlanTransforms.cpp:673

collectUsersRecursively
static SmallVector< VPUser * > collectUsersRecursively(VPValue *V)
Definition: VPlanTransforms.cpp:565

recursivelyDeleteDeadRecipes
static void recursivelyDeleteDeadRecipes(VPValue *V)
Definition: VPlanTransforms.cpp:814

createReplicateRegion
static VPRegionBlock * createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlan &Plan)
Definition: VPlanTransforms.cpp:303

getPredicatedThenBlock
static VPBasicBlock * getPredicatedThenBlock(VPRegionBlock *R)
If R is a triangle region, return the 'then' block of the triangle.
Definition: VPlanTransforms.cpp:197

createEVLRecipe
static VPRecipeBase * createEVLRecipe(VPValue *HeaderMask, VPRecipeBase &CurRecipe, VPTypeAnalysis &TypeInfo, VPValue &AllOneMask, VPValue &EVL)
Try to convert CurRecipe to a corresponding EVL-based recipe.
Definition: VPlanTransforms.cpp:1647

getPredicatedMask
VPValue * getPredicatedMask(VPRegionBlock *R)
If R is a region with a VPBranchOnMaskRecipe in the entry block, return the mask.
Definition: VPlanTransforms.cpp:187

removeRedundantCanonicalIVs
static void removeRedundantCanonicalIVs(VPlan &Plan)
Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV recipe, if it exists.
Definition: VPlanTransforms.cpp:457

VPlanTransforms.h
This file provides utility VPlan to VPlan transformations.

VPlanUtils.h

VPlan.h
This file contains the declarations of the Vectorization Plan base classes:

VectorUtils.h

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

IV
static const uint32_t IV[8]
Definition: blake3_impl.h:78

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1479

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:694

llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699

llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33

llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194

llvm::DenseMapBase::contains
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211

llvm::DenseMap
Definition: DenseMap.h:727

llvm::DominatorTreeBase::recalculate
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Definition: GenericDomTree.h:859

llvm::ElementCount
Definition: TypeSize.h:300

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311

llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933

llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:334

llvm::InductionDescriptor::InductionKind
InductionKind
This enum represents the kinds of inductions that we support.
Definition: IVDescriptors.h:337

llvm::InductionDescriptor::IK_PtrInduction
@ IK_PtrInduction
Pointer induction var. Step = C.
Definition: IVDescriptors.h:340

llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition: IVDescriptors.h:339

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:279

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:972

llvm::Instruction::isUnaryOp
bool isUnaryOp() const
Definition: Instruction.h:278

llvm::Instruction::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76

llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:986

llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311

llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488

llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558

llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:506

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176

llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: GenericLoopInfo.h:124

llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: GenericLoopInfoImpl.h:158

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36

llvm::MapVector::contains
bool contains(const KeyT &Key) const
Definition: MapVector.h:163

llvm::MapVector::lookup
ValueT lookup(const KeyT &Key) const
Definition: MapVector.h:110

llvm::MapVector::size
size_type size() const
Definition: MapVector.h:60

llvm::PHINode
Definition: Instructions.h:2600

llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2383

llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2422

llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77

llvm::RecurrenceDescriptor::getRecurrenceKind
RecurKind getRecurrenceKind() const
Definition: IVDescriptors.h:210

llvm::Region
Definition: RegionInfo.h:887

llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:299

llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:71

llvm::SCEV::isZero
bool isZero() const
Return true if the expression is a constant zero.
Definition: ScalarEvolution.cpp:448

llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:386

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:447

llvm::ScalarEvolution::getElementCount
const SCEV * getElementCount(Type *Ty, ElementCount EC)
Definition: ScalarEvolution.cpp:506

llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:11050

llvm::ScalarEvolution::getContext
LLVMContext & getContext() const
Definition: ScalarEvolution.h:489

llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1657

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:94

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:93

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384

llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:472

llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:81

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:78

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:673

llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:292

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:280

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::TypeSwitch
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
Definition: TypeSwitch.h:87

llvm::TypeSwitch::Case
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
Definition: TypeSwitch.h:96

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264

llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237

llvm::User
Definition: User.h:44

llvm::User::operands
op_range operands()
Definition: User.h:288

llvm::VPActiveLaneMaskPHIRecipe
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3289

llvm::VPBasicBlock
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3531

llvm::VPBasicBlock::appendRecipe
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3606

llvm::VPBasicBlock::end
iterator end()
Definition: VPlan.h:3568

llvm::VPBasicBlock::phis
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3619

llvm::VPBasicBlock::getFirstNonPhi
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:208

llvm::VPBasicBlock::getEnclosingLoopRegion
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:566

llvm::VPBasicBlock::splitAt
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:536

llvm::VPBasicBlock::getTerminator
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:602

llvm::VPBasicBlock::back
const VPRecipeBase & back() const
Definition: VPlan.h:3580

llvm::VPBlendRecipe
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2489

llvm::VPBlockBase
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:391

llvm::VPBlockBase::getParent
VPRegionBlock * getParent()
Definition: VPlan.h:483

llvm::VPBlockBase::getExitingBasicBlock
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178

llvm::VPBlockBase::swapSuccessors
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:622

llvm::VPBlockBase::getSinglePredecessor
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:525

llvm::VPBlockBase::getEntryBasicBlock
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158

llvm::VPBlockBase::getSingleHierarchicalPredecessor
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:571

llvm::VPBlockBase::getSingleSuccessor
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:519

llvm::VPBlockBase::getSuccessors
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:508

llvm::VPBlockUtils::insertOnEdge
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition: VPlanUtils.h:204

llvm::VPBlockUtils::insertTwoBlocksAfter
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlanUtils.h:123

llvm::VPBlockUtils::connectBlocks
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlanUtils.h:142

llvm::VPBlockUtils::disconnectBlocks
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlanUtils.h:161

llvm::VPBranchOnMaskRecipe
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2849

llvm::VPBuilder::InsertPointGuard
RAII object that stores the current insertion point and restores it when the object is destroyed.
Definition: LoopVectorizationPlanner.h:270

llvm::VPBuilder
VPlan-based builder utility analogous to IRBuilder.
Definition: LoopVectorizationPlanner.h:45

llvm::VPBuilder::createICmp
VPValue * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL={}, const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
Definition: LoopVectorizationPlanner.h:215

llvm::VPBuilder::createDerivedIV
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
Definition: LoopVectorizationPlanner.h:237

llvm::VPBuilder::createPtrAdd
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:223

llvm::VPBuilder::createScalarCast
VPScalarCastRecipe * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
Definition: LoopVectorizationPlanner.h:245

llvm::VPBuilder::getToInsertAfter
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
Definition: LoopVectorizationPlanner.h:87

llvm::VPBuilder::createScalarIVSteps
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step)
Definition: LoopVectorizationPlanner.h:257

llvm::VPBuilder::createOverflowingOp
VPInstruction * createOverflowingOp(unsigned Opcode, std::initializer_list< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:169

llvm::VPBuilder::createNaryOp
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Definition: LoopVectorizationPlanner.h:145

llvm::VPBuilder::createNot
VPValue * createNot(VPValue *Operand, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:177

llvm::VPBuilder::createSelect
VPValue * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL={}, const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
Definition: LoopVectorizationPlanner.h:201

llvm::VPBuilder::setInsertPoint
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Definition: LoopVectorizationPlanner.h:123

llvm::VPCanonicalIVPHIRecipe
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3228

llvm::VPCanonicalIVPHIRecipe::getScalarType
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3259

llvm::VPDef::getNumDefinedValues
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:421

llvm::VPDef::definedValues
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:416

llvm::VPDef::getVPSingleValue
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:394

llvm::VPDominatorTree
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
Definition: VPlanDominatorTree.h:37

llvm::VPDominatorTree::properlyDominates
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
Returns true if A properly dominates B.
Definition: VPlanAnalysis.cpp:313

llvm::VPEVLBasedIVPHIRecipe
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3324

llvm::VPIRBasicBlock
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3673

llvm::VPInstruction
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1188

llvm::VPInstruction::BranchOnCond
@ BranchOnCond
Definition: VPlan.h:1211

llvm::VPInstruction::BranchOnCount
@ BranchOnCount
Definition: VPlan.h:1210

llvm::VPInstruction::ActiveLaneMask
@ ActiveLaneMask
Definition: VPlan.h:1200

llvm::VPInstruction::FirstOrderRecurrenceSplice
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1194

llvm::VPInstruction::ExplicitVectorLength
@ ExplicitVectorLength
Definition: VPlan.h:1201

llvm::VPInstruction::CanonicalIVIncrementForPart
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1209

llvm::VPInstruction::CalculateTripCountMinusVF
@ CalculateTripCountMinusVF
Definition: VPlan.h:1207

llvm::VPInstruction::AnyOf
@ AnyOf
Definition: VPlan.h:1224

llvm::VPInterleaveRecipe
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2556

llvm::VPIntrinsic::getMaskParamPos
static std::optional< unsigned > getMaskParamPos(Intrinsic::ID IntrinsicID)
Definition: IntrinsicInst.cpp:400

llvm::VPIntrinsic::getVectorLengthParamPos
static std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
Definition: IntrinsicInst.cpp:413

llvm::VPIntrinsic::getForOpcode
static Intrinsic::ID getForOpcode(unsigned OC)
The llvm.vp.* intrinsics for this instruction Opcode.
Definition: IntrinsicInst.cpp:564

llvm::VPIntrinsic::getForIntrinsic
static Intrinsic::ID getForIntrinsic(Intrinsic::ID Id)
The llvm.vp.
Definition: IntrinsicInst.cpp:592

llvm::VPPredInstPHIRecipe
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2904

llvm::VPRecipeBase
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:710

llvm::VPRecipeBase::mayReadFromMemory
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
Definition: VPlanRecipes.cpp:99

llvm::VPRecipeBase::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:799

llvm::VPRecipeBase::mayHaveSideEffects
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Definition: VPlanRecipes.cpp:145

llvm::VPRecipeBase::getParent
VPBasicBlock * getParent()
Definition: VPlan.h:735

llvm::VPRecipeBase::getDebugLoc
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:804

llvm::VPRecipeBase::moveBefore
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
Definition: VPlanRecipes.cpp:239

llvm::VPRecipeBase::insertBefore
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
Definition: VPlanRecipes.cpp:202

llvm::VPRecipeBase::eraseFromParent
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: VPlanRecipes.cpp:229

llvm::VPRecipeBuilder
Helper class to create VPRecipies from IR instructions.
Definition: VPRecipeBuilder.h:47

llvm::VPRecipeBuilder::getBlockInMask
VPValue * getBlockInMask(BasicBlock *BB) const
Returns the entry mask for the block BB.
Definition: LoopVectorize.cpp:8208

llvm::VPRecipeBuilder::getRecipe
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
Definition: VPRecipeBuilder.h:209

llvm::VPReductionEVLRecipe
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2731

llvm::VPReductionRecipe
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2651

llvm::VPRegionBlock
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3708

llvm::VPRegionBlock::getEntry
const VPBlockBase * getEntry() const
Definition: VPlan.h:3744

llvm::VPRegionBlock::getExiting
const VPBlockBase * getExiting() const
Definition: VPlan.h:3756

llvm::VPReplicateRecipe
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2772

llvm::VPReplicateRecipe::isUniform
bool isUniform() const
Definition: VPlan.h:2816

llvm::VPReplicateRecipe::getMask
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2840

llvm::VPScalarCastRecipe
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1581

llvm::VPScalarIVStepsRecipe
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3474

llvm::VPScalarPHIRecipe
Recipe to generate a scalar PHI.
Definition: VPlan.h:2263

llvm::VPSingleDefRecipe
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:837

llvm::VPSingleDefRecipe::getUnderlyingInstr
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:907

llvm::VPTypeAnalysis
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40

llvm::VPTypeAnalysis::inferScalarType
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
Definition: VPlanAnalysis.cpp:215

llvm::VPUser
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:206

llvm::VPUser::operands
operand_range operands()
Definition: VPlanValue.h:263

llvm::VPUser::setOperand
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:248

llvm::VPUser::op_end
operand_iterator op_end()
Definition: VPlanValue.h:261

llvm::VPUser::op_begin
operand_iterator op_begin()
Definition: VPlanValue.h:259

llvm::VPUser::addOperand
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:237

llvm::VPValue
Definition: VPlanValue.h:46

llvm::VPValue::getDefiningRecipe
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123

llvm::VPValue::getUnderlyingValue
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:89

llvm::VPValue::replaceAllUsesWith
void replaceAllUsesWith(VPValue *New)
Definition: VPlan.cpp:1420

llvm::VPValue::getNumUsers
unsigned getNumUsers() const
Definition: VPlanValue.h:117

llvm::VPValue::getLiveInIRValue
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:178

llvm::VPValue::isLiveIn
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:173

llvm::VPValue::replaceUsesWithIf
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition: VPlan.cpp:1424

llvm::VPValue::users
user_range users()
Definition: VPlanValue.h:138

llvm::VPWidenCanonicalIVRecipe
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3369

llvm::VPWidenCastRecipe
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1529

llvm::VPWidenEVLRecipe
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1482

llvm::VPWidenGEPRecipe
A recipe for handling GEP instructions.
Definition: VPlan.h:1855

llvm::VPWidenInductionRecipe
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2089

llvm::VPWidenIntOrFpInductionRecipe
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2142

llvm::VPWidenIntrinsicRecipe
A recipe for widening vector intrinsics.
Definition: VPlan.h:1629

llvm::VPWidenMemoryRecipe::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3006

llvm::VPWidenRecipe
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1431

llvm::VPlan
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3807

llvm::VPlan::hasScalableVF
bool hasScalableVF()
Definition: VPlan.h:3999

llvm::VPlan::getEntry
VPBasicBlock * getEntry()
Definition: VPlan.h:3920

llvm::VPlan::createVPRegionBlock
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:4100

llvm::VPlan::getVF
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3985

llvm::VPlan::getTripCount
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3961

llvm::VPlan::hasVF
bool hasVF(ElementCount VF)
Definition: VPlan.h:3998

llvm::VPlan::hasUF
bool hasUF(unsigned UF) const
Definition: VPlan.h:4011

llvm::VPlan::setVF
void setVF(ElementCount VF)
Definition: VPlan.h:3992

llvm::VPlan::getExitBlocks
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:310

llvm::VPlan::getVectorLoopRegion
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1052

llvm::VPlan::getMiddleBlock
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3939

llvm::VPlan::createVPBasicBlock
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:4090

llvm::VPlan::createVPIRBasicBlock
VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1252

llvm::VPlan::getOrAddLiveIn
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:4031

llvm::VPlan::hasScalarVFOnly
bool hasScalarVFOnly() const
Definition: VPlan.h:4009

llvm::VPlan::getCanonicalIV
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4065

llvm::VPlan::getVectorPreheader
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:3925

llvm::VPlan::setUF
void setUF(unsigned UF)
Definition: VPlan.h:4018

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377

llvm::Value::hasName
bool hasName() const
Definition: Value.h:261

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:37

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:32

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:132

llvm::iterator_range::end
IteratorT end() const
Definition: iterator_range.h:65

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:44

llvm::PatternMatch
Definition: PatternMatch.h:47

llvm::PatternMatch::m_Trunc
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:2075

llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49

llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885

llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1799

llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:2138

llvm::PatternMatch::m_LogicalAnd
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
Definition: PatternMatch.h:3081

llvm::PatternMatch::m_Not
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition: PatternMatch.h:2467

llvm::PatternMatch::m_SExt
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:2101

llvm::PatternMatch::m_c_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
Definition: PatternMatch.h:2791

llvm::VPlanPatternMatch
Definition: VPlanPatternMatch.h:26

llvm::vputils::isUniformAfterVectorization
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:41

llvm::vputils::getOrCreateVPValueForSCEVExpr
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlanUtils.cpp:26

llvm::vputils::getSCEVExprForVPValue
const SCEV * getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE)
Return the SCEV expression for V.
Definition: VPlanUtils.cpp:65

llvm::vputils::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16

llvm::vputils::isHeaderMask
bool isHeaderMask(const VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
Definition: VPlanUtils.cpp:43

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739

llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:209

llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:4984

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:77

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657

llvm::vp_depth_first_shallow
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition: VPlanCFG.h:215

llvm::vp_depth_first_deep
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition: VPlanCFG.h:227

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664

llvm::VPlanPtr
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:144

llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753

llvm::to_vector
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition: Casting.h:548

llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33

llvm::RecurKind::Mul
@ Mul
Product of integers.

llvm::RecurKind::Add
@ Add
Sum of integers.

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpression.cpp:22

llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945

llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:1084

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766

llvm::getLoadStoreType
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5039

llvm::TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...

llvm::InstructionUniformity::Default
@ Default
The result values are uniform if and only if all operands are uniform.

llvm::Incoming
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Definition: SILowerI1Copies.h:25

llvm::VPFirstOrderRecurrencePHIRecipe
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2347

llvm::VPWidenLoadEVLRecipe
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:3064

llvm::VPWidenLoadRecipe
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:3025

llvm::VPWidenSelectRecipe
A recipe for widening select instructions.
Definition: VPlan.h:1818

llvm::VPWidenStoreEVLRecipe
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3144

llvm::VPWidenStoreRecipe
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3103

llvm::VPlanTransforms::handleUncountableEarlyExit
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder)
Update Plan to account for the uncountable early exit block in UncountableExitingBlock by.
Definition: VPlanTransforms.cpp:2065

llvm::VPlanTransforms::optimizeInductionExitUsers
static void optimizeInductionExitUsers(VPlan &Plan, DenseMap< VPValue *, VPValue * > &EndValues)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
Definition: VPlanTransforms.cpp:731

llvm::VPlanTransforms::createAndOptimizeReplicateRegions
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
Definition: VPlanTransforms.cpp:406

llvm::VPlanTransforms::convertToConcreteRecipes
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
Definition: VPlanTransforms.cpp:2046

llvm::VPlanTransforms::dropPoisonGeneratingRecipes
static void dropPoisonGeneratingRecipes(VPlan &Plan, function_ref< bool(BasicBlock *)> BlockNeedsPredication)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
Definition: VPlanTransforms.cpp:1873

llvm::VPlanTransforms::createInterleaveGroups
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed)
Definition: VPlanTransforms.cpp:1970

llvm::VPlanTransforms::removeDeadRecipes
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
Definition: VPlanTransforms.cpp:512

llvm::VPlanTransforms::clearReductionWrapFlags
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
Definition: VPlanTransforms.cpp:1256

llvm::VPlanTransforms::tryAddExplicitVectorLength
static bool tryAddExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPEVLBasedIVPHIRecipe and related recipes to Plan and replaces all uses except the canonical IV...
Definition: VPlanTransforms.cpp:1812

llvm::VPlanTransforms::VPInstructionsToVPRecipes
static void VPInstructionsToVPRecipes(VPlanPtr &Plan, function_ref< const InductionDescriptor *(PHINode *)> GetIntOrFpInductionDescriptor, ScalarEvolution &SE, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
Definition: VPlanTransforms.cpp:33

llvm::VPlanTransforms::addActiveLaneMask
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
Definition: VPlanTransforms.cpp:1606

llvm::VPlanTransforms::optimize
static void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
Definition: VPlanTransforms.cpp:1441

llvm::VPlanTransforms::truncateToMinimalBitwidths
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
Definition: VPlanTransforms.cpp:1308

llvm::VPlanTransforms::adjustFixedOrderRecurrences
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder)
Try to have all users of fixed-order recurrences appear after the recipe defining their previous valu...
Definition: VPlanTransforms.cpp:1208

llvm::VPlanTransforms::optimizeForVFAndUF
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
Definition: VPlanTransforms.cpp:978