docs/doxygen/InductiveRangeCheckElimination_8cpp_source.html

//===- InductiveRangeCheckElimination.cpp - -------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// The InductiveRangeCheckElimination pass splits a loop's iteration space into

// three disjoint ranges.  It does that in a way such that the loop running in

// the middle loop provably does not need range checks. As an example, it will

// convert

//

//   len = < known positive >

//   for (i = 0; i < n; i++) {

//     if (0 <= i && i < len) {

//       do_something();

//     } else {

//       throw_out_of_bounds();

//     }

//   }

//

// to

//

//   len = < known positive >

//   limit = smin(n, len)

//   // no first segment

//   for (i = 0; i < limit; i++) {

//     if (0 <= i && i < len) { // this check is fully redundant

//       do_something();

//     } else {

//       throw_out_of_bounds();

//     }

//   }

//   for (i = limit; i < n; i++) {

//     if (0 <= i && i < len) {

//       do_something();

//     } else {

//       throw_out_of_bounds();

//     }

//   }

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/PriorityWorklist.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Analysis/BlockFrequencyInfo.h"

#include "llvm/Analysis/BranchProbabilityInfo.h"

#include "llvm/Analysis/LoopAnalysisManager.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/CFG.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Metadata.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Use.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/BranchProbability.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Cloning.h"

#include "llvm/Transforms/Utils/LoopConstrainer.h"

#include "llvm/Transforms/Utils/LoopSimplify.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include "llvm/Transforms/Utils/ValueMapper.h"

#include <algorithm>

#include <cassert>

#include <iterator>

#include <optional>

#include <utility>


using namespace llvm;

using namespace llvm::PatternMatch;


static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden,

                                        cl::init(64));


static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,

                                       cl::init(false));


static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,

                                      cl::init(false));


static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",

                                             cl::Hidden, cl::init(false));


static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",

                                              cl::Hidden, cl::init(10));


static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",

                                                 cl::Hidden, cl::init(true));


static cl::opt<bool> AllowNarrowLatchCondition(

    "irce-allow-narrow-latch", cl::Hidden, cl::init(true),

    cl::desc("If set to true, IRCE may eliminate wide range checks in loops "

             "with narrow latch condition."));


static cl::opt<unsigned> MaxTypeSizeForOverflowCheck(

    "irce-max-type-size-for-overflow-check", cl::Hidden, cl::init(32),

    cl::desc(

        "Maximum size of range check type for which can be produced runtime "

        "overflow check of its limit's computation"));


static cl::opt<bool>

    PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks",

                                   cl::Hidden, cl::init(false));


#define DEBUG_TYPE "irce"


namespace {


/// An inductive range check is conditional branch in a loop with

///

///  1. a very cold successor (i.e. the branch jumps to that successor very

///     rarely)

///

///  and

///

///  2. a condition that is provably true for some contiguous range of values

///     taken by the containing loop's induction variable.

///

class InductiveRangeCheck {


  const SCEV *Begin = nullptr;

  const SCEV *Step = nullptr;

  const SCEV *End = nullptr;

  Use *CheckUse = nullptr;


  static bool parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE,

                                  const SCEVAddRecExpr *&Index,

                                  const SCEV *&End);


  static void

  extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse,

                             SmallVectorImpl<InductiveRangeCheck> &Checks,

                             SmallPtrSetImpl<Value *> &Visited);


  static bool parseIvAgaisntLimit(Loop *L, Value *LHS, Value *RHS,

                                  ICmpInst::Predicate Pred, ScalarEvolution &SE,

                                  const SCEVAddRecExpr *&Index,

                                  const SCEV *&End);


  static bool reassociateSubLHS(Loop *L, Value *VariantLHS, Value *InvariantRHS,

                                ICmpInst::Predicate Pred, ScalarEvolution &SE,

                                const SCEVAddRecExpr *&Index, const SCEV *&End);


public:

  const SCEV *getBegin() const { return Begin; }

  const SCEV *getStep() const { return Step; }

  const SCEV *getEnd() const { return End; }


  void print(raw_ostream &OS) const {

    OS << "InductiveRangeCheck:\n";

    OS << "  Begin: ";

    Begin->print(OS);

    OS << "  Step: ";

    Step->print(OS);

    OS << "  End: ";

    End->print(OS);

    OS << "\n  CheckUse: ";

    getCheckUse()->getUser()->print(OS);

    OS << " Operand: " << getCheckUse()->getOperandNo() << "\n";

  }


  LLVM_DUMP_METHOD

  void dump() {

    print(dbgs());

  }


  Use *getCheckUse() const { return CheckUse; }


  /// Represents an signed integer range [Range.getBegin(), Range.getEnd()).  If

  /// R.getEnd() le R.getBegin(), then R denotes the empty range.


  class Range {

    const SCEV *Begin;

    const SCEV *End;


  public:

    Range(const SCEV *Begin, const SCEV *End) : Begin(Begin), End(End) {

      assert(Begin->getType() == End->getType() && "ill-typed range!");

    }


    Type *getType() const { return Begin->getType(); }

    const SCEV *getBegin() const { return Begin; }

    const SCEV *getEnd() const { return End; }

    bool isEmpty(ScalarEvolution &SE, bool IsSigned) const {

      if (Begin == End)

        return true;

      if (IsSigned)

        return SE.isKnownPredicate(ICmpInst::ICMP_SGE, Begin, End);

      else

        return SE.isKnownPredicate(ICmpInst::ICMP_UGE, Begin, End);

    }

  };


  /// This is the value the condition of the branch needs to evaluate to for the

  /// branch to take the hot successor (see (1) above).

  bool getPassingDirection() { return true; }


  /// Computes a range for the induction variable (IndVar) in which the range

  /// check is redundant and can be constant-folded away.  The induction

  /// variable is not required to be the canonical {0,+,1} induction variable.

  std::optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,

                                                 const SCEVAddRecExpr *IndVar,

                                                 bool IsLatchSigned) const;


  /// Parse out a set of inductive range checks from \p BI and append them to \p

  /// Checks.

  ///

  /// NB! There may be conditions feeding into \p BI that aren't inductive range

  /// checks, and hence don't end up in \p Checks.

  static void extractRangeChecksFromBranch(

      BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,

      SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);

};


class InductiveRangeCheckElimination {

  ScalarEvolution &SE;

  BranchProbabilityInfo *BPI;

  DominatorTree &DT;

  LoopInfo &LI;


  using GetBFIFunc =

      std::optional<llvm::function_ref<llvm::BlockFrequencyInfo &()>>;

  GetBFIFunc GetBFI;


  // Returns true if it is profitable to do a transform basing on estimation of

  // number of iterations.

  bool isProfitableToTransform(const Loop &L, LoopStructure &LS);


public:

  InductiveRangeCheckElimination(ScalarEvolution &SE,

                                 BranchProbabilityInfo *BPI, DominatorTree &DT,

                                 LoopInfo &LI, GetBFIFunc GetBFI = std::nullopt)

      : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}


  bool run(Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop);

};


} // end anonymous namespace


/// Parse a single ICmp instruction, `ICI`, into a range check.  If `ICI` cannot

/// be interpreted as a range check, return false.  Otherwise set `Index` to the

/// SCEV being range checked, and set `End` to the upper or lower limit `Index`

/// is being range checked.

bool InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,

                                              ScalarEvolution &SE,

                                              const SCEVAddRecExpr *&Index,

                                              const SCEV *&End) {

  auto IsLoopInvariant = [&SE, L](Value *V) {

    return SE.isLoopInvariant(SE.getSCEV(V), L);

  };


  ICmpInst::Predicate Pred = ICI->getPredicate();

  Value *LHS = ICI->getOperand(0);

  Value *RHS = ICI->getOperand(1);


  if (!LHS->getType()->isIntegerTy())

    return false;


  // Canonicalize to the `Index Pred Invariant` comparison

  if (IsLoopInvariant(LHS)) {

    std::swap(LHS, RHS);

    Pred = CmpInst::getSwappedPredicate(Pred);

  } else if (!IsLoopInvariant(RHS))

    // Both LHS and RHS are loop variant

    return false;


  if (parseIvAgaisntLimit(L, LHS, RHS, Pred, SE, Index, End))

    return true;


  if (reassociateSubLHS(L, LHS, RHS, Pred, SE, Index, End))

    return true;


  // TODO: support ReassociateAddLHS

  return false;

}


// Try to parse range check in the form of "IV vs Limit"

bool InductiveRangeCheck::parseIvAgaisntLimit(Loop *L, Value *LHS, Value *RHS,

                                              ICmpInst::Predicate Pred,

                                              ScalarEvolution &SE,

                                              const SCEVAddRecExpr *&Index,

                                              const SCEV *&End) {


  auto SIntMaxSCEV = [&](Type *T) {

    unsigned BitWidth = cast<IntegerType>(T)->getBitWidth();

    return SE.getConstant(APInt::getSignedMaxValue(BitWidth));

  };


  const auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(LHS));

  if (!AddRec)

    return false;


  // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".

  // We can potentially do much better here.

  // If we want to adjust upper bound for the unsigned range check as we do it

  // for signed one, we will need to pick Unsigned max

  switch (Pred) {

  default:

    return false;


  case ICmpInst::ICMP_SGE:

    if (match(RHS, m_ConstantInt<0>())) {

      Index = AddRec;

      End = SIntMaxSCEV(Index->getType());

      return true;

    }

    return false;


  case ICmpInst::ICMP_SGT:

    if (match(RHS, m_ConstantInt<-1>())) {

      Index = AddRec;

      End = SIntMaxSCEV(Index->getType());

      return true;

    }

    return false;


  case ICmpInst::ICMP_SLT:

  case ICmpInst::ICMP_ULT:

    Index = AddRec;

    End = SE.getSCEV(RHS);

    return true;


  case ICmpInst::ICMP_SLE:

  case ICmpInst::ICMP_ULE:

    const SCEV *One = SE.getOne(RHS->getType());

    const SCEV *RHSS = SE.getSCEV(RHS);

    bool Signed = Pred == ICmpInst::ICMP_SLE;

    if (SE.willNotOverflow(Instruction::BinaryOps::Add, Signed, RHSS, One)) {

      Index = AddRec;

      End = SE.getAddExpr(RHSS, One);

      return true;

    }

    return false;

  }


  llvm_unreachable("default clause returns!");

}


// Try to parse range check in the form of "IV - Offset vs Limit" or "Offset -

// IV vs Limit"

bool InductiveRangeCheck::reassociateSubLHS(

    Loop *L, Value *VariantLHS, Value *InvariantRHS, ICmpInst::Predicate Pred,

    ScalarEvolution &SE, const SCEVAddRecExpr *&Index, const SCEV *&End) {

  Value *LHS, *RHS;

  if (!match(VariantLHS, m_Sub(m_Value(LHS), m_Value(RHS))))

    return false;


  const SCEV *IV = SE.getSCEV(LHS);

  const SCEV *Offset = SE.getSCEV(RHS);

  const SCEV *Limit = SE.getSCEV(InvariantRHS);


  bool OffsetSubtracted = false;

  if (SE.isLoopInvariant(IV, L))

    // "Offset - IV vs Limit"

    std::swap(IV, Offset);

  else if (SE.isLoopInvariant(Offset, L))

    // "IV - Offset vs Limit"

    OffsetSubtracted = true;

  else

    return false;


  const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IV);

  if (!AddRec)

    return false;


  // In order to turn "IV - Offset < Limit" into "IV < Limit + Offset", we need

  // to be able to freely move values from left side of inequality to right side

  // (just as in normal linear arithmetics). Overflows make things much more

  // complicated, so we want to avoid this.

  //

  // Let's prove that the initial subtraction doesn't overflow with all IV's

  // values from the safe range constructed for that check.

  //

  // [Case 1] IV - Offset < Limit

  // It doesn't overflow if:

  //     SINT_MIN <= IV - Offset <= SINT_MAX

  // In terms of scaled SINT we need to prove:

  //     SINT_MIN + Offset <= IV <= SINT_MAX + Offset

  // Safe range will be constructed:

  //     0 <= IV < Limit + Offset

  // It means that 'IV - Offset' doesn't underflow, because:

  //     SINT_MIN + Offset < 0 <= IV

  // and doesn't overflow:

  //     IV < Limit + Offset <= SINT_MAX + Offset

  //

  // [Case 2] Offset - IV > Limit

  // It doesn't overflow if:

  //     SINT_MIN <= Offset - IV <= SINT_MAX

  // In terms of scaled SINT we need to prove:

  //     -SINT_MIN >= IV - Offset >= -SINT_MAX

  //     Offset - SINT_MIN >= IV >= Offset - SINT_MAX

  // Safe range will be constructed:

  //     0 <= IV < Offset - Limit

  // It means that 'Offset - IV' doesn't underflow, because

  //     Offset - SINT_MAX < 0 <= IV

  // and doesn't overflow:

  //     IV < Offset - Limit <= Offset - SINT_MIN

  //

  // For the computed upper boundary of the IV's range (Offset +/- Limit) we

  // don't know exactly whether it overflows or not. So if we can't prove this

  // fact at compile time, we scale boundary computations to a wider type with

  // the intention to add runtime overflow check.


  auto getExprScaledIfOverflow = [&](Instruction::BinaryOps BinOp,

                                     const SCEV *LHS,

                                     const SCEV *RHS) -> const SCEV * {

    const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,

                                              SCEV::NoWrapFlags, unsigned);

    switch (BinOp) {

    default:

      llvm_unreachable("Unsupported binary op");

    case Instruction::Add:

      Operation = &ScalarEvolution::getAddExpr;

      break;

    case Instruction::Sub:

      Operation = &ScalarEvolution::getMinusSCEV;

      break;

    }


    if (SE.willNotOverflow(BinOp, ICmpInst::isSigned(Pred), LHS, RHS,

                           cast<Instruction>(VariantLHS)))

      return (SE.*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0);


    // We couldn't prove that the expression does not overflow.

    // Than scale it to a wider type to check overflow at runtime.

    auto *Ty = cast<IntegerType>(LHS->getType());

    if (Ty->getBitWidth() > MaxTypeSizeForOverflowCheck)

      return nullptr;


    auto WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);

    return (SE.*Operation)(SE.getSignExtendExpr(LHS, WideTy),

                           SE.getSignExtendExpr(RHS, WideTy), SCEV::FlagAnyWrap,

                           0);

  };


  if (OffsetSubtracted)

    // "IV - Offset < Limit" -> "IV" < Offset + Limit

    Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Offset, Limit);

  else {

    // "Offset - IV > Limit" -> "IV" < Offset - Limit

    Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Sub, Offset, Limit);

    Pred = ICmpInst::getSwappedPredicate(Pred);

  }


  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {

    // "Expr <= Limit" -> "Expr < Limit + 1"

    if (Pred == ICmpInst::ICMP_SLE && Limit)

      Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Limit,

                                      SE.getOne(Limit->getType()));

    if (Limit) {

      Index = AddRec;

      End = Limit;

      return true;

    }

  }

  return false;

}


void InductiveRangeCheck::extractRangeChecksFromCond(

    Loop *L, ScalarEvolution &SE, Use &ConditionUse,

    SmallVectorImpl<InductiveRangeCheck> &Checks,

    SmallPtrSetImpl<Value *> &Visited) {

  Value *Condition = ConditionUse.get();

  if (!Visited.insert(Condition).second)

    return;


  // TODO: Do the same for OR, XOR, NOT etc?

  if (match(Condition, m_LogicalAnd(m_Value(), m_Value()))) {

    extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(0),

                               Checks, Visited);

    extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(1),

                               Checks, Visited);

    return;

  }


  ICmpInst *ICI = dyn_cast<ICmpInst>(Condition);

  if (!ICI)

    return;


  const SCEV *End = nullptr;

  const SCEVAddRecExpr *IndexAddRec = nullptr;

  if (!parseRangeCheckICmp(L, ICI, SE, IndexAddRec, End))

    return;


  assert(IndexAddRec && "IndexAddRec was not computed");

  assert(End && "End was not computed");


  if ((IndexAddRec->getLoop() != L) || !IndexAddRec->isAffine())

    return;


  InductiveRangeCheck IRC;

  IRC.End = End;

  IRC.Begin = IndexAddRec->getStart();

  IRC.Step = IndexAddRec->getStepRecurrence(SE);

  IRC.CheckUse = &ConditionUse;

  Checks.push_back(IRC);

}


void InductiveRangeCheck::extractRangeChecksFromBranch(

    BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,

    SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {

  if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())

    return;


  unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;

  assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&

         "No edges coming to loop?");

  BranchProbability LikelyTaken(15, 16);


  if (!SkipProfitabilityChecks && BPI &&

      BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)

    return;


  // IRCE expects branch's true edge comes to loop. Invert branch for opposite

  // case.

  if (IndexLoopSucc != 0) {

    IRBuilder<> Builder(BI);

    InvertBranch(BI, Builder);

    if (BPI)

      BPI->swapSuccEdgesProbabilities(BI->getParent());

    Changed = true;

  }


  SmallPtrSet<Value *, 8> Visited;

  InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->getOperandUse(0),

                                                  Checks, Visited);

}


/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return

/// signed or unsigned extension of \p S to type \p Ty.

static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,

                                bool Signed) {

  return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);

}


// Compute a safe set of limits for the main loop to run in -- effectively the

// intersection of `Range' and the iteration space of the original loop.

// Return std::nullopt if unable to compute the set of subranges.

static std::optional<LoopConstrainer::SubRanges>

calculateSubRanges(ScalarEvolution &SE, const Loop &L,

                   InductiveRangeCheck::Range &Range,

                   const LoopStructure &MainLoopStructure) {

  auto *RTy = cast<IntegerType>(Range.getType());

  // We only support wide range checks and narrow latches.

  if (!AllowNarrowLatchCondition && RTy != MainLoopStructure.ExitCountTy)

    return std::nullopt;

  if (RTy->getBitWidth() < MainLoopStructure.ExitCountTy->getBitWidth())

    return std::nullopt;


  LoopConstrainer::SubRanges Result;


  bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;

  // I think we can be more aggressive here and make this nuw / nsw if the

  // addition that feeds into the icmp for the latch's terminating branch is nuw

  // / nsw.  In any case, a wrapping 2's complement addition is safe.

  const SCEV *Start = NoopOrExtend(SE.getSCEV(MainLoopStructure.IndVarStart),

                                   RTy, SE, IsSignedPredicate);

  const SCEV *End = NoopOrExtend(SE.getSCEV(MainLoopStructure.LoopExitAt), RTy,

                                 SE, IsSignedPredicate);


  bool Increasing = MainLoopStructure.IndVarIncreasing;


  // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or

  // [Smallest, GreatestSeen] is the range of values the induction variable

  // takes.


  const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;


  const SCEV *One = SE.getOne(RTy);

  if (Increasing) {

    Smallest = Start;

    Greatest = End;

    // No overflow, because the range [Smallest, GreatestSeen] is not empty.

    GreatestSeen = SE.getMinusSCEV(End, One);

  } else {

    // These two computations may sign-overflow.  Here is why that is okay:

    //

    // We know that the induction variable does not sign-overflow on any

    // iteration except the last one, and it starts at `Start` and ends at

    // `End`, decrementing by one every time.

    //

    //  * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the

    //    induction variable is decreasing we know that the smallest value

    //    the loop body is actually executed with is `INT_SMIN` == `Smallest`.

    //

    //  * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`.  In

    //    that case, `Clamp` will always return `Smallest` and

    //    [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)

    //    will be an empty range.  Returning an empty range is always safe.


    Smallest = SE.getAddExpr(End, One);

    Greatest = SE.getAddExpr(Start, One);

    GreatestSeen = Start;

  }


  auto Clamp = [&SE, Smallest, Greatest, IsSignedPredicate](const SCEV *S) {

    return IsSignedPredicate

               ? SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S))

               : SE.getUMaxExpr(Smallest, SE.getUMinExpr(Greatest, S));

  };


  // In some cases we can prove that we don't need a pre or post loop.

  ICmpInst::Predicate PredLE =

      IsSignedPredicate ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;

  ICmpInst::Predicate PredLT =

      IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;


  bool ProvablyNoPreloop =

      SE.isKnownPredicate(PredLE, Range.getBegin(), Smallest);

  if (!ProvablyNoPreloop)

    Result.LowLimit = Clamp(Range.getBegin());


  bool ProvablyNoPostLoop =

      SE.isKnownPredicate(PredLT, GreatestSeen, Range.getEnd());

  if (!ProvablyNoPostLoop)

    Result.HighLimit = Clamp(Range.getEnd());


  return Result;

}


/// Computes and returns a range of values for the induction variable (IndVar)

/// in which the range check can be safely elided.  If it cannot compute such a

/// range, returns std::nullopt.

std::optional<InductiveRangeCheck::Range>

InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,

                                               const SCEVAddRecExpr *IndVar,

                                               bool IsLatchSigned) const {

  // We can deal when types of latch check and range checks don't match in case

  // if latch check is more narrow.

  auto *IVType = dyn_cast<IntegerType>(IndVar->getType());

  auto *RCType = dyn_cast<IntegerType>(getBegin()->getType());

  auto *EndType = dyn_cast<IntegerType>(getEnd()->getType());

  // Do not work with pointer types.

  if (!IVType || !RCType)

    return std::nullopt;

  if (IVType->getBitWidth() > RCType->getBitWidth())

    return std::nullopt;


  // IndVar is of the form "A + B * I" (where "I" is the canonical induction

  // variable, that may or may not exist as a real llvm::Value in the loop) and

  // this inductive range check is a range check on the "C + D * I" ("C" is

  // getBegin() and "D" is getStep()).  We rewrite the value being range

  // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".

  //

  // The actual inequalities we solve are of the form

  //

  //   0 <= M + 1 * IndVar < L given L >= 0  (i.e. N == 1)

  //

  // Here L stands for upper limit of the safe iteration space.

  // The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid

  // overflows when calculating (0 - M) and (L - M) we, depending on type of

  // IV's iteration space, limit the calculations by borders of the iteration

  // space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0.

  // If we figured out that "anything greater than (-M) is safe", we strengthen

  // this to "everything greater than 0 is safe", assuming that values between

  // -M and 0 just do not exist in unsigned iteration space, and we don't want

  // to deal with overflown values.


  if (!IndVar->isAffine())

    return std::nullopt;


  const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned);

  const SCEVConstant *B = dyn_cast<SCEVConstant>(

      NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned));

  if (!B)

    return std::nullopt;

  assert(!B->isZero() && "Recurrence with zero step?");


  const SCEV *C = getBegin();

  const SCEVConstant *D = dyn_cast<SCEVConstant>(getStep());

  if (D != B)

    return std::nullopt;


  assert(!D->getValue()->isZero() && "Recurrence with zero step?");

  unsigned BitWidth = RCType->getBitWidth();

  const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));

  const SCEV *SIntMin = SE.getConstant(APInt::getSignedMinValue(BitWidth));


  // Subtract Y from X so that it does not go through border of the IV

  // iteration space. Mathematically, it is equivalent to:

  //

  //    ClampedSubtract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX).        [1]

  //

  // In [1], 'X - Y' is a mathematical subtraction (result is not bounded to

  // any width of bit grid). But after we take min/max, the result is

  // guaranteed to be within [INT_MIN, INT_MAX].

  //

  // In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min

  // values, depending on type of latch condition that defines IV iteration

  // space.

  auto ClampedSubtract = [&](const SCEV *X, const SCEV *Y) {

    // FIXME: The current implementation assumes that X is in [0, SINT_MAX].

    // This is required to ensure that SINT_MAX - X does not overflow signed and

    // that X - Y does not overflow unsigned if Y is negative. Can we lift this

    // restriction and make it work for negative X either?

    if (IsLatchSigned) {

      // X is a number from signed range, Y is interpreted as signed.

      // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only

      // thing we should care about is that we didn't cross SINT_MAX.

      // So, if Y is positive, we subtract Y safely.

      //   Rule 1: Y > 0 ---> Y.

      // If 0 <= -Y <= (SINT_MAX - X), we subtract Y safely.

      //   Rule 2: Y >=s (X - SINT_MAX) ---> Y.

      // If 0 <= (SINT_MAX - X) < -Y, we can only subtract (X - SINT_MAX).

      //   Rule 3: Y <s (X - SINT_MAX) ---> (X - SINT_MAX).

      // It gives us smax(Y, X - SINT_MAX) to subtract in all cases.

      const SCEV *XMinusSIntMax = SE.getMinusSCEV(X, SIntMax);

      return SE.getMinusSCEV(X, SE.getSMaxExpr(Y, XMinusSIntMax),

                             SCEV::FlagNSW);

    } else

      // X is a number from unsigned range, Y is interpreted as signed.

      // Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only

      // thing we should care about is that we didn't cross zero.

      // So, if Y is negative, we subtract Y safely.

      //   Rule 1: Y <s 0 ---> Y.

      // If 0 <= Y <= X, we subtract Y safely.

      //   Rule 2: Y <=s X ---> Y.

      // If 0 <= X < Y, we should stop at 0 and can only subtract X.

      //   Rule 3: Y >s X ---> X.

      // It gives us smin(X, Y) to subtract in all cases.

      return SE.getMinusSCEV(X, SE.getSMinExpr(X, Y), SCEV::FlagNUW);

  };

  const SCEV *M = SE.getMinusSCEV(C, A);

  const SCEV *Zero = SE.getZero(M->getType());


  // This function returns SCEV equal to 1 if X is non-negative 0 otherwise.

  auto SCEVCheckNonNegative = [&](const SCEV *X) {

    const Loop *L = IndVar->getLoop();

    const SCEV *Zero = SE.getZero(X->getType());

    const SCEV *One = SE.getOne(X->getType());

    // Can we trivially prove that X is a non-negative or negative value?

    if (isKnownNonNegativeInLoop(X, L, SE))

      return One;

    else if (isKnownNegativeInLoop(X, L, SE))

      return Zero;

    // If not, we will have to figure it out during the execution.

    // Function smax(smin(X, 0), -1) + 1 equals to 1 if X >= 0 and 0 if X < 0.

    const SCEV *NegOne = SE.getNegativeSCEV(One);

    return SE.getAddExpr(SE.getSMaxExpr(SE.getSMinExpr(X, Zero), NegOne), One);

  };


  // This function returns SCEV equal to 1 if X will not overflow in terms of

  // range check type, 0 otherwise.

  auto SCEVCheckWillNotOverflow = [&](const SCEV *X) {

    // X doesn't overflow if SINT_MAX >= X.

    // Then if (SINT_MAX - X) >= 0, X doesn't overflow

    const SCEV *SIntMaxExt = SE.getSignExtendExpr(SIntMax, X->getType());

    const SCEV *OverflowCheck =

        SCEVCheckNonNegative(SE.getMinusSCEV(SIntMaxExt, X));


    // X doesn't underflow if X >= SINT_MIN.

    // Then if (X - SINT_MIN) >= 0, X doesn't underflow

    const SCEV *SIntMinExt = SE.getSignExtendExpr(SIntMin, X->getType());

    const SCEV *UnderflowCheck =

        SCEVCheckNonNegative(SE.getMinusSCEV(X, SIntMinExt));


    return SE.getMulExpr(OverflowCheck, UnderflowCheck);

  };


  // FIXME: Current implementation of ClampedSubtract implicitly assumes that

  // X is non-negative (in sense of a signed value). We need to re-implement

  // this function in a way that it will correctly handle negative X as well.

  // We use it twice: for X = 0 everything is fine, but for X = getEnd() we can

  // end up with a negative X and produce wrong results. So currently we ensure

  // that if getEnd() is negative then both ends of the safe range are zero.

  // Note that this may pessimize elimination of unsigned range checks against

  // negative values.

  const SCEV *REnd = getEnd();

  const SCEV *EndWillNotOverflow = SE.getOne(RCType);


  auto PrintRangeCheck = [&](raw_ostream &OS) {

    auto L = IndVar->getLoop();

    OS << "irce: in function ";

    OS << L->getHeader()->getParent()->getName();

    OS << ", in ";

    L->print(OS);

    OS << "there is range check with scaled boundary:\n";

    print(OS);

  };


  if (EndType->getBitWidth() > RCType->getBitWidth()) {

    assert(EndType->getBitWidth() == RCType->getBitWidth() * 2);

    if (PrintScaledBoundaryRangeChecks)

      PrintRangeCheck(errs());

    // End is computed with extended type but will be truncated to a narrow one

    // type of range check. Therefore we need a check that the result will not

    // overflow in terms of narrow type.

    EndWillNotOverflow =

        SE.getTruncateExpr(SCEVCheckWillNotOverflow(REnd), RCType);

    REnd = SE.getTruncateExpr(REnd, RCType);

  }


  const SCEV *RuntimeChecks =

      SE.getMulExpr(SCEVCheckNonNegative(REnd), EndWillNotOverflow);

  const SCEV *Begin = SE.getMulExpr(ClampedSubtract(Zero, M), RuntimeChecks);

  const SCEV *End = SE.getMulExpr(ClampedSubtract(REnd, M), RuntimeChecks);


  return InductiveRangeCheck::Range(Begin, End);

}


static std::optional<InductiveRangeCheck::Range>

IntersectSignedRange(ScalarEvolution &SE,

                     const std::optional<InductiveRangeCheck::Range> &R1,

                     const InductiveRangeCheck::Range &R2) {

  if (R2.isEmpty(SE, /* IsSigned */ true))

    return std::nullopt;

  if (!R1)

    return R2;

  auto &R1Value = *R1;

  // We never return empty ranges from this function, and R1 is supposed to be

  // a result of intersection. Thus, R1 is never empty.

  assert(!R1Value.isEmpty(SE, /* IsSigned */ true) &&

         "We should never have empty R1!");


  // TODO: we could widen the smaller range and have this work; but for now we

  // bail out to keep things simple.

  if (R1Value.getType() != R2.getType())

    return std::nullopt;


  const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin());

  const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd());


  // If the resulting range is empty, just return std::nullopt.

  auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);

  if (Ret.isEmpty(SE, /* IsSigned */ true))

    return std::nullopt;

  return Ret;

}


static std::optional<InductiveRangeCheck::Range>

IntersectUnsignedRange(ScalarEvolution &SE,

                       const std::optional<InductiveRangeCheck::Range> &R1,

                       const InductiveRangeCheck::Range &R2) {

  if (R2.isEmpty(SE, /* IsSigned */ false))

    return std::nullopt;

  if (!R1)

    return R2;

  auto &R1Value = *R1;

  // We never return empty ranges from this function, and R1 is supposed to be

  // a result of intersection. Thus, R1 is never empty.

  assert(!R1Value.isEmpty(SE, /* IsSigned */ false) &&

         "We should never have empty R1!");


  // TODO: we could widen the smaller range and have this work; but for now we

  // bail out to keep things simple.

  if (R1Value.getType() != R2.getType())

    return std::nullopt;


  const SCEV *NewBegin = SE.getUMaxExpr(R1Value.getBegin(), R2.getBegin());

  const SCEV *NewEnd = SE.getUMinExpr(R1Value.getEnd(), R2.getEnd());


  // If the resulting range is empty, just return std::nullopt.

  auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);

  if (Ret.isEmpty(SE, /* IsSigned */ false))

    return std::nullopt;

  return Ret;

}


PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {

  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);

  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);

  // There are no loops in the function. Return before computing other expensive

  // analyses.

  if (LI.empty())

    return PreservedAnalyses::all();

  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);

  auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);


  // Get BFI analysis result on demand. Please note that modification of

  // CFG invalidates this analysis and we should handle it.

  auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & {

    return AM.getResult<BlockFrequencyAnalysis>(F);

  };

  InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });


  bool Changed = false;

  {

    bool CFGChanged = false;

    for (const auto &L : LI) {

      CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,

                                 /*PreserveLCSSA=*/false);

      Changed |= formLCSSARecursively(*L, DT, &LI, &SE);

    }

    Changed |= CFGChanged;


    if (CFGChanged && !SkipProfitabilityChecks) {

      PreservedAnalyses PA = PreservedAnalyses::all();

      PA.abandon<BlockFrequencyAnalysis>();

      AM.invalidate(F, PA);

    }

  }


  SmallPriorityWorklist<Loop *, 4> Worklist;

  appendLoopsToWorklist(LI, Worklist);

  auto LPMAddNewLoop = [&Worklist](Loop *NL, bool IsSubloop) {

    if (!IsSubloop)

      appendLoopsToWorklist(*NL, Worklist);

  };


  while (!Worklist.empty()) {

    Loop *L = Worklist.pop_back_val();

    if (IRCE.run(L, LPMAddNewLoop)) {

      Changed = true;

      if (!SkipProfitabilityChecks) {

        PreservedAnalyses PA = PreservedAnalyses::all();

        PA.abandon<BlockFrequencyAnalysis>();

        AM.invalidate(F, PA);

      }

    }

  }


  if (!Changed)

    return PreservedAnalyses::all();

  return getLoopPassPreservedAnalyses();

}


bool

InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,

                                                        LoopStructure &LS) {

  if (SkipProfitabilityChecks)

    return true;

  if (GetBFI) {

    BlockFrequencyInfo &BFI = (*GetBFI)();

    uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();

    uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();

    if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {

      LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "

                        << "the estimated number of iterations basing on "

                           "frequency info is " << (hFreq / phFreq) << "\n";);

      return false;

    }

    return true;

  }


  if (!BPI)

    return true;

  BranchProbability ExitProbability =

      BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx);

  if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {

    LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "

                      << "the exit probability is too big " << ExitProbability

                      << "\n";);

    return false;

  }

  return true;

}


bool InductiveRangeCheckElimination::run(

    Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop) {

  if (L->getBlocks().size() >= LoopSizeCutoff) {

    LLVM_DEBUG(dbgs() << "irce: giving up constraining loop, too large\n");

    return false;

  }


  BasicBlock *Preheader = L->getLoopPreheader();

  if (!Preheader) {

    LLVM_DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");

    return false;

  }


  LLVMContext &Context = Preheader->getContext();

  SmallVector<InductiveRangeCheck, 16> RangeChecks;

  bool Changed = false;


  for (auto *BBI : L->getBlocks())

    if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))

      InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,

                                                        RangeChecks, Changed);


  if (RangeChecks.empty())

    return Changed;


  auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {

    OS << "irce: looking at loop "; L->print(OS);

    OS << "irce: loop has " << RangeChecks.size()

       << " inductive range checks: \n";

    for (InductiveRangeCheck &IRC : RangeChecks)

      IRC.print(OS);

  };


  LLVM_DEBUG(PrintRecognizedRangeChecks(dbgs()));


  if (PrintRangeChecks)

    PrintRecognizedRangeChecks(errs());


  const char *FailureReason = nullptr;

  std::optional<LoopStructure> MaybeLoopStructure =

      LoopStructure::parseLoopStructure(SE, *L, AllowUnsignedLatchCondition,

                                        FailureReason);

  if (!MaybeLoopStructure) {

    LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "

                      << FailureReason << "\n";);

    return Changed;

  }

  LoopStructure LS = *MaybeLoopStructure;

  if (!isProfitableToTransform(*L, LS))

    return Changed;

  const SCEVAddRecExpr *IndVar =

      cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));


  std::optional<InductiveRangeCheck::Range> SafeIterRange;


  SmallVector<InductiveRangeCheck, 4> RangeChecksToEliminate;

  // Basing on the type of latch predicate, we interpret the IV iteration range

  // as signed or unsigned range. We use different min/max functions (signed or

  // unsigned) when intersecting this range with safe iteration ranges implied

  // by range checks.

  auto IntersectRange =

      LS.IsSignedPredicate ? IntersectSignedRange : IntersectUnsignedRange;


  for (InductiveRangeCheck &IRC : RangeChecks) {

    auto Result = IRC.computeSafeIterationSpace(SE, IndVar,

                                                LS.IsSignedPredicate);

    if (Result) {

      auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, *Result);

      if (MaybeSafeIterRange) {

        assert(!MaybeSafeIterRange->isEmpty(SE, LS.IsSignedPredicate) &&

               "We should never return empty ranges!");

        RangeChecksToEliminate.push_back(IRC);

        SafeIterRange = *MaybeSafeIterRange;

      }

    }

  }


  if (!SafeIterRange)

    return Changed;


  std::optional<LoopConstrainer::SubRanges> MaybeSR =

      calculateSubRanges(SE, *L, *SafeIterRange, LS);

  if (!MaybeSR) {

    LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");

    return false;

  }


  LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT,

                     SafeIterRange->getBegin()->getType(), *MaybeSR);


  if (LC.run()) {

    Changed = true;


    auto PrintConstrainedLoopInfo = [L]() {

      dbgs() << "irce: in function ";

      dbgs() << L->getHeader()->getParent()->getName() << ": ";

      dbgs() << "constrained ";

      L->print(dbgs());

    };


    LLVM_DEBUG(PrintConstrainedLoopInfo());


    if (PrintChangedLoops)

      PrintConstrainedLoopInfo();


    // Optimize away the now-redundant range checks.


    for (InductiveRangeCheck &IRC : RangeChecksToEliminate) {

      ConstantInt *FoldedRangeCheck = IRC.getPassingDirection()

                                          ? ConstantInt::getTrue(Context)

                                          : ConstantInt::getFalse(Context);

      IRC.getCheckUse()->set(FoldedRangeCheck);

    }

  }


  return Changed;

}

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:205

ArrayRef.h

BasicBlockUtils.h

BasicBlock.h

BlockFrequencyInfo.h

BranchProbabilityInfo.h

BranchProbability.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

Casting.h

Cloning.h

CommandLine.h

Compiler.h

LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

DerivedTypes.h

NL
#define NL
Definition: DetailedRecordsBackend.cpp:31

Dominators.h

End
bool End
Definition: ELF_riscv.cpp:480

X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

Function.h

IRBuilder.h

CFG.h
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...

NoopOrExtend
static const SCEV * NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE, bool Signed)
If the type of S matches with Ty, return S.
Definition: InductiveRangeCheckElimination.cpp:557

PrintRangeChecks
static cl::opt< bool > PrintRangeChecks("irce-print-range-checks", cl::Hidden, cl::init(false))

AllowUnsignedLatchCondition
static cl::opt< bool > AllowUnsignedLatchCondition("irce-allow-unsigned-latch", cl::Hidden, cl::init(true))

MinRuntimeIterations
static cl::opt< unsigned > MinRuntimeIterations("irce-min-runtime-iterations", cl::Hidden, cl::init(10))

LoopSizeCutoff
static cl::opt< unsigned > LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, cl::init(64))

IntersectSignedRange
static std::optional< InductiveRangeCheck::Range > IntersectSignedRange(ScalarEvolution &SE, const std::optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
Definition: InductiveRangeCheckElimination.cpp:828

AllowNarrowLatchCondition
static cl::opt< bool > AllowNarrowLatchCondition("irce-allow-narrow-latch", cl::Hidden, cl::init(true), cl::desc("If set to true, IRCE may eliminate wide range checks in loops " "with narrow latch condition."))

MaxTypeSizeForOverflowCheck
static cl::opt< unsigned > MaxTypeSizeForOverflowCheck("irce-max-type-size-for-overflow-check", cl::Hidden, cl::init(32), cl::desc("Maximum size of range check type for which can be produced runtime " "overflow check of its limit's computation"))

PrintChangedLoops
static cl::opt< bool > PrintChangedLoops("irce-print-changed-loops", cl::Hidden, cl::init(false))

IntersectUnsignedRange
static std::optional< InductiveRangeCheck::Range > IntersectUnsignedRange(ScalarEvolution &SE, const std::optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
Definition: InductiveRangeCheckElimination.cpp:857

SkipProfitabilityChecks
static cl::opt< bool > SkipProfitabilityChecks("irce-skip-profitability-checks", cl::Hidden, cl::init(false))

calculateSubRanges
static std::optional< LoopConstrainer::SubRanges > calculateSubRanges(ScalarEvolution &SE, const Loop &L, InductiveRangeCheck::Range &Range, const LoopStructure &MainLoopStructure)
Definition: InductiveRangeCheckElimination.cpp:566

PrintScaledBoundaryRangeChecks
static cl::opt< bool > PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks", cl::Hidden, cl::init(false))

InductiveRangeCheckElimination.h

InstrTypes.h

getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:127

Instructions.h

LoopAnalysisManager.h
This header provides classes for managing per-loop analyses.

LoopConstrainer.h

LoopInfo.h

LoopSimplify.h

LoopUtils.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

R2
#define R2(n)

Metadata.h
This file contains the declarations for metadata subclasses.

Module.h
Module.h This file contains the declarations for the Module class.

Signed
@ Signed
Definition: NVPTXISelLowering.cpp:5545

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

Operation
PowerPC Reduce CR logical Operation
Definition: PPCReduceCRLogicals.cpp:735

PatternMatch.h

PriorityWorklist.h
This file provides a priority worklist.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:53

ScalarEvolutionExpander.h

ScalarEvolutionExpressions.h

ScalarEvolution.h

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallVector.h
This file defines the SmallVector class.

StringRef.h

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40

Twine.h

Type.h

Use.h
This defines the Use class.

User.h

ValueMapper.h

Value.h

RHS
Value * RHS
Definition: X86PartialReduction.cpp:76

LHS
Value * LHS
Definition: X86PartialReduction.cpp:75

IV
static const uint32_t IV[8]
Definition: blake3_impl.h:78

T

llvm::APInt::getSignedMaxValue
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:188

llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:198

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisManager::invalidate
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
Definition: PassManagerImpl.h:166

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168

llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:115

llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:38

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:2906

llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:2991

llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:2976

llvm::BranchProbabilityAnalysis
Analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:426

llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:113

llvm::BranchProbabilityInfo::getEdgeProbability
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
Definition: BranchProbabilityInfo.cpp:1095

llvm::BranchProbabilityInfo::swapSuccEdgesProbabilities
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
Definition: BranchProbabilityInfo.cpp:1178

llvm::BranchProbability
Definition: BranchProbability.h:30

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757

llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909

llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:81

llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::Function
Definition: Function.h:64

llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1140

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2663

llvm::IRCEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: InductiveRangeCheckElimination.cpp:885

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:969

llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278

llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:571

llvm::LoopConstrainer
This class is used to constrain loops to run within a given iteration space.
Definition: LoopConstrainer.h:95

llvm::LoopInfo
Definition: LoopInfo.h:412

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PreservedAnalyses::abandon
void abandon()
Mark an analysis as abandoned.
Definition: Analysis.h:164

llvm::PriorityWorklist::pop_back_val
T pop_back_val()
Definition: PriorityWorklist.h:153

llvm::PriorityWorklist::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:67

llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:347

llvm::SCEVAddRecExpr::getType
Type * getType() const
Definition: ScalarEvolutionExpressions.h:357

llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:358

llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:365

llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:375

llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:359

llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60

llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75

llvm::SCEV::print
void print(raw_ostream &OS) const
Print out the internal representation of this scalar to the specified stream.
Definition: ScalarEvolution.cpp:267

llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:380

llvm::SCEV::NoWrapFlags
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
Definition: ScalarEvolution.h:130

llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition: ScalarEvolution.h:131

llvm::SCEV::FlagNSW
@ FlagNSW
Definition: ScalarEvolution.h:134

llvm::SCEV::FlagNUW
@ FlagNUW
Definition: ScalarEvolution.h:133

llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2244

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452

llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4591

llvm::ScalarEvolution::getSMaxExpr
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:4365

llvm::ScalarEvolution::getSMinExpr
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:4383

llvm::ScalarEvolution::getUMaxExpr
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
Definition: ScalarEvolution.cpp:4374

llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition: ScalarEvolution.h:658

llvm::ScalarEvolution::willNotOverflow
bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?...
Definition: ScalarEvolution.cpp:2300

llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:479

llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4569

llvm::ScalarEvolution::getNoopOrSignExtend
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4764

llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:661

llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13921

llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10931

llvm::ScalarEvolution::getUMinExpr
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Definition: ScalarEvolution.cpp:4393

llvm::ScalarEvolution::getTruncateExpr
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1135

llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4677

llvm::ScalarEvolution::getNoopOrZeroExtend
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4752

llvm::ScalarEvolution::getSignExtendExpr
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1886

llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3107

llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2512

llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:257

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:94

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::Use::get
Value * get() const
Definition: Use.h:66

llvm::User::getOperandUse
const Use & getOperandUse(unsigned i) const
Definition: User.h:182

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:32

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

uint64_t

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:265

llvm::ARM::ProfileKind::M
@ M

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::M68k::MemAddrModeKind::V
@ V

llvm::M68k::MemAddrModeKind::L
@ L

llvm::PatternMatch
Definition: PatternMatch.h:47

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92

llvm::PatternMatch::m_LogicalAnd
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
Definition: PatternMatch.h:2915

llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1104

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::dwarf::Index
Index
Definition: Dwarf.h:875

llvm::logicalview::LVAttributeKind::Zero
@ Zero

llvm::ms_demangle::QualifierMangleMode::Result
@ Result

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:701

llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:425

llvm::InvertBranch
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
Definition: BasicBlockUtils.cpp:2201

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:908

llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1669

llvm::isKnownNegativeInLoop
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1243

llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191

llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138

llvm::isKnownNonNegativeInLoop
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1250

llvm::isProfitableToTransform
static bool isProfitableToTransform(const Loop &L, const BranchInst *BI)
Definition: LoopBoundSplit.cpp:223

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860

raw_ostream.h

llvm::LoopConstrainer::SubRanges
Definition: LoopConstrainer.h:104

llvm::LoopStructure
Definition: LoopConstrainer.h:34

llvm::LoopStructure::IndVarStart
Value * IndVarStart
Definition: LoopConstrainer.h:56

llvm::LoopStructure::IndVarIncreasing
bool IndVarIncreasing
Definition: LoopConstrainer.h:59

llvm::LoopStructure::ExitCountTy
IntegerType * ExitCountTy
Definition: LoopConstrainer.h:61

llvm::LoopStructure::IsSignedPredicate
bool IsSignedPredicate
Definition: LoopConstrainer.h:60

llvm::LoopStructure::LoopExitAt
Value * LoopExitAt
Definition: LoopConstrainer.h:58

llvm::LoopStructure::parseLoopStructure
static std::optional< LoopStructure > parseLoopStructure(ScalarEvolution &, Loop &, bool, const char *&)
Definition: LoopConstrainer.cpp:125

llvm::cl::desc
Definition: CommandLine.h:409