doxygen/AggressiveInstCombine_8cpp_source.html

//===- AggressiveInstCombine.cpp ------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements the aggressive expression pattern combiner classes.

// Currently, it handles expression patterns for:

//  * Truncate instruction

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"

#include "AggressiveInstCombineInternal.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/BasicAliasAnalysis.h"

#include "llvm/Analysis/ConstantFolding.h"

#include "llvm/Analysis/DomTreeUpdater.h"

#include "llvm/Analysis/GlobalsModRef.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/ProfDataUtils.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/BuildLibCalls.h"

#include "llvm/Transforms/Utils/Local.h"


using namespace llvm;

using namespace PatternMatch;


#define DEBUG_TYPE "aggressive-instcombine"


namespace llvm {

extern cl::opt<bool> ProfcheckDisableMetadataFixes;

}


STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");

STATISTIC(NumGuardedRotates,

          "Number of guarded rotates transformed into funnel shifts");

STATISTIC(NumGuardedFunnelShifts,

          "Number of guarded funnel shifts transformed into funnel shifts");

STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");


static cl::opt<unsigned> MaxInstrsToScan(

    "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,

    cl::desc("Max number of instructions to scan for aggressive instcombine."));


static cl::opt<unsigned> StrNCmpInlineThreshold(

    "strncmp-inline-threshold", cl::init(3), cl::Hidden,

    cl::desc("The maximum length of a constant string for a builtin string cmp "

             "call eligible for inlining. The default value is 3."));


static cl::opt<unsigned>

    MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,

                          cl::desc("The maximum length of a constant string to "

                                   "inline a memchr call."));


/// Match a pattern for a bitwise funnel/rotate operation that partially guards

/// against undefined behavior by branching around the funnel-shift/rotation

/// when the shift amount is 0.


static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {

  if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)

    return false;


  // As with the one-use checks below, this is not strictly necessary, but we

  // are being cautious to avoid potential perf regressions on targets that

  // do not actually have a funnel/rotate instruction (where the funnel shift

  // would be expanded back into math/shift/logic ops).

  if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))

    return false;


  // Match V to funnel shift left/right and capture the source operands and

  // shift amount.

  auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,

                             Value *&ShAmt) {

    unsigned Width = V->getType()->getScalarSizeInBits();


    // fshl(ShVal0, ShVal1, ShAmt)

    //  == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))

    if (match(V, m_OneUse(m_c_Or(

                     m_Shl(m_Value(ShVal0), m_Value(ShAmt)),

                     m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),

                                                   m_Deferred(ShAmt))))))) {

      return Intrinsic::fshl;

    }


    // fshr(ShVal0, ShVal1, ShAmt)

    //  == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))

    if (match(V,

              m_OneUse(m_c_Or(m_Shl(m_Value(ShVal0), m_Sub(m_SpecificInt(Width),

                                                           m_Value(ShAmt))),

                              m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {

      return Intrinsic::fshr;

    }


    return Intrinsic::not_intrinsic;

  };


  // One phi operand must be a funnel/rotate operation, and the other phi

  // operand must be the source value of that funnel/rotate operation:

  // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]

  // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]

  // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]

  PHINode &Phi = cast<PHINode>(I);

  unsigned FunnelOp = 0, GuardOp = 1;

  Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);

  Value *ShVal0, *ShVal1, *ShAmt;

  Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);

  if (IID == Intrinsic::not_intrinsic ||

      (IID == Intrinsic::fshl && ShVal0 != P1) ||

      (IID == Intrinsic::fshr && ShVal1 != P1)) {

    IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);

    if (IID == Intrinsic::not_intrinsic ||

        (IID == Intrinsic::fshl && ShVal0 != P0) ||

        (IID == Intrinsic::fshr && ShVal1 != P0))

      return false;

    assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&

           "Pattern must match funnel shift left or right");

    std::swap(FunnelOp, GuardOp);

  }


  // The incoming block with our source operand must be the "guard" block.

  // That must contain a cmp+branch to avoid the funnel/rotate when the shift

  // amount is equal to 0. The other incoming block is the block with the

  // funnel/rotate.

  BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);

  BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);

  Instruction *TermI = GuardBB->getTerminator();


  // Ensure that the shift values dominate each block.

  if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))

    return false;


  BasicBlock *PhiBB = Phi.getParent();

  if (!match(TermI, m_Br(m_SpecificICmp(CmpInst::ICMP_EQ, m_Specific(ShAmt),

                                        m_ZeroInt()),

                         m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))

    return false;


  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());


  if (ShVal0 == ShVal1)

    ++NumGuardedRotates;

  else

    ++NumGuardedFunnelShifts;


  // If this is not a rotate then the select was blocking poison from the

  // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.

  bool IsFshl = IID == Intrinsic::fshl;

  if (ShVal0 != ShVal1) {

    if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))

      ShVal1 = Builder.CreateFreeze(ShVal1);

    else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))

      ShVal0 = Builder.CreateFreeze(ShVal0);

  }


  // We matched a variation of this IR pattern:

  // GuardBB:

  //   %cmp = icmp eq i32 %ShAmt, 0

  //   br i1 %cmp, label %PhiBB, label %FunnelBB

  // FunnelBB:

  //   %sub = sub i32 32, %ShAmt

  //   %shr = lshr i32 %ShVal1, %sub

  //   %shl = shl i32 %ShVal0, %ShAmt

  //   %fsh = or i32 %shr, %shl

  //   br label %PhiBB

  // PhiBB:

  //   %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]

  // -->

  // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)

  Phi.replaceAllUsesWith(

      Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));

  return true;

}


/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and

/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain

/// of 'and' ops, then we also need to capture the fact that we saw an

/// "and X, 1", so that's an extra return value for that case.

namespace {

struct MaskOps {

  Value *Root = nullptr;

  APInt Mask;

  bool MatchAndChain;

  bool FoundAnd1 = false;


  MaskOps(unsigned BitWidth, bool MatchAnds)

      : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}

};

} // namespace


/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a

/// chain of 'and' or 'or' instructions looking for shift ops of a common source

/// value. Examples:

///   or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)

/// returns { X, 0x129 }

///   and (and (X >> 1), 1), (X >> 4)

/// returns { X, 0x12 }


static bool matchAndOrChain(Value *V, MaskOps &MOps) {

  Value *Op0, *Op1;

  if (MOps.MatchAndChain) {

    // Recurse through a chain of 'and' operands. This requires an extra check

    // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere

    // in the chain to know that all of the high bits are cleared.

    if (match(V, m_And(m_Value(Op0), m_One()))) {

      MOps.FoundAnd1 = true;

      return matchAndOrChain(Op0, MOps);

    }

    if (match(V, m_And(m_Value(Op0), m_Value(Op1))))

      return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);

  } else {

    // Recurse through a chain of 'or' operands.

    if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))

      return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);

  }


  // We need a shift-right or a bare value representing a compare of bit 0 of

  // the original source operand.

  Value *Candidate;

  const APInt *BitIndex = nullptr;

  if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))

    Candidate = V;


  // Initialize result source operand.

  if (!MOps.Root)

    MOps.Root = Candidate;


  // The shift constant is out-of-range? This code hasn't been simplified.

  if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))

    return false;


  // Fill in the mask bit derived from the shift constant.

  MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);

  return MOps.Root == Candidate;

}


/// Match patterns that correspond to "any-bits-set" and "all-bits-set".

/// These will include a chain of 'or' or 'and'-shifted bits from a

/// common source value:

/// and (or  (lshr X, C), ...), 1 --> (X & CMask) != 0

/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask

/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns

/// that differ only with a final 'not' of the result. We expect that final

/// 'not' to be folded with the compare that we create here (invert predicate).


static bool foldAnyOrAllBitsSet(Instruction &I) {

  // The 'any-bits-set' ('or' chain) pattern is simpler to match because the

  // final "and X, 1" instruction must be the final op in the sequence.

  bool MatchAllBitsSet;

  if (match(&I, m_c_And(m_OneUse(m_And(m_Value(), m_Value())), m_Value())))

    MatchAllBitsSet = true;

  else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))

    MatchAllBitsSet = false;

  else

    return false;


  MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);

  if (MatchAllBitsSet) {

    if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)

      return false;

  } else {

    if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))

      return false;

  }


  // The pattern was found. Create a masked compare that replaces all of the

  // shift and logic ops.

  IRBuilder<> Builder(&I);

  Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);

  Value *And = Builder.CreateAnd(MOps.Root, Mask);

  Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)

                               : Builder.CreateIsNotNull(And);

  Value *Zext = Builder.CreateZExt(Cmp, I.getType());

  I.replaceAllUsesWith(Zext);

  ++NumAnyOrAllBitsSet;

  return true;

}


// Try to recognize below function as popcount intrinsic.

// This is the "best" algorithm from

// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel

// Also used in TargetLowering::expandCTPOP().

//

// int popcount(unsigned int i) {

//   i = i - ((i >> 1) & 0x55555555);

//   i = (i & 0x33333333) + ((i >> 2) & 0x33333333);

//   i = ((i + (i >> 4)) & 0x0F0F0F0F);

//   return (i * 0x01010101) >> 24;

// }


static bool tryToRecognizePopCount(Instruction &I) {

  if (I.getOpcode() != Instruction::LShr)

    return false;


  Type *Ty = I.getType();

  if (!Ty->isIntOrIntVectorTy())

    return false;


  unsigned Len = Ty->getScalarSizeInBits();

  // FIXME: fix Len == 8 and other irregular type lengths.

  if (!(Len <= 128 && Len > 8 && Len % 8 == 0))

    return false;


  APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));

  APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));

  APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));

  APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));

  APInt MaskShift = APInt(Len, Len - 8);


  Value *Op0 = I.getOperand(0);

  Value *Op1 = I.getOperand(1);

  Value *MulOp0;

  // Matching "(i * 0x01010101...) >> 24".

  if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&

      match(Op1, m_SpecificInt(MaskShift))) {

    Value *ShiftOp0;

    // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".

    if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),

                                    m_Deferred(ShiftOp0)),

                            m_SpecificInt(Mask0F)))) {

      Value *AndOp0;

      // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".

      if (match(ShiftOp0,

                m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),

                        m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)),

                              m_SpecificInt(Mask33))))) {

        Value *Root, *SubOp1;

        // Matching "i - ((i >> 1) & 0x55555555...)".

        const APInt *AndMask;

        if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&

            match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),

                                m_APInt(AndMask)))) {

          auto CheckAndMask = [&]() {

            if (*AndMask == Mask55)

              return true;


            // Exact match failed, see if any bits are known to be 0 where we

            // expect a 1 in the mask.

            if (!AndMask->isSubsetOf(Mask55))

              return false;


            APInt NeededMask = Mask55 & ~*AndMask;

            return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),

                                     NeededMask,

                                     SimplifyQuery(I.getDataLayout()));

          };


          if (CheckAndMask()) {

            LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");

            IRBuilder<> Builder(&I);

            I.replaceAllUsesWith(

                Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));

            ++NumPopCountRecognized;

            return true;

          }

        }

      }

    }

  }


  return false;

}


/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and

/// C2 saturate the value of the fp conversion. The transform is not reversable

/// as the fptosi.sat is more defined than the input - all values produce a

/// valid value for the fptosi.sat, where as some produce poison for original

/// that were out of range of the integer conversion. The reversed pattern may

/// use fmax and fmin instead. As we cannot directly reverse the transform, and

/// it is not always profitable, we make it conditional on the cost being

/// reported as lower by TTI.


static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {

  // Look for min(max(fptosi, converting to fptosi_sat.

  Value *In;

  const APInt *MinC, *MaxC;

  if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))),

                                        m_APInt(MinC))),

                        m_APInt(MaxC))) &&

      !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))),

                                        m_APInt(MaxC))),

                        m_APInt(MinC))))

    return false;


  // Check that the constants clamp a saturate.

  if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)

    return false;


  Type *IntTy = I.getType();

  Type *FpTy = In->getType();

  Type *SatTy =

      IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);

  if (auto *VecTy = dyn_cast<VectorType>(IntTy))

    SatTy = VectorType::get(SatTy, VecTy->getElementCount());


  // Get the cost of the intrinsic, and check that against the cost of

  // fptosi+smin+smax

  InstructionCost SatCost = TTI.getIntrinsicInstrCost(

      IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),

      TTI::TCK_RecipThroughput);

  SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,

                                  TTI::CastContextHint::None,

                                  TTI::TCK_RecipThroughput);


  InstructionCost MinMaxCost = TTI.getCastInstrCost(

      Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,

      TTI::TCK_RecipThroughput);

  MinMaxCost += TTI.getIntrinsicInstrCost(

      IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),

      TTI::TCK_RecipThroughput);

  MinMaxCost += TTI.getIntrinsicInstrCost(

      IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),

      TTI::TCK_RecipThroughput);


  if (SatCost >= MinMaxCost)

    return false;


  IRBuilder<> Builder(&I);

  Value *Sat =

      Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);

  I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));

  return true;

}


/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids

/// pessimistic codegen that has to account for setting errno and can enable

/// vectorization.


static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,

                     TargetLibraryInfo &TLI, AssumptionCache &AC,

                     DominatorTree &DT) {

  // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created

  // (because NNAN or the operand arg must not be less than -0.0) and (2) we

  // would not end up lowering to a libcall anyway (which could change the value

  // of errno), then:

  // (1) errno won't be set.

  // (2) it is safe to convert this to an intrinsic call.

  Type *Ty = Call->getType();

  Value *Arg = Call->getArgOperand(0);

  if (TTI.haveFastSqrt(Ty) &&

      (Call->hasNoNaNs() ||

       cannotBeOrderedLessThanZero(

           Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {

    IRBuilder<> Builder(Call);

    Value *NewSqrt =

        Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");

    Call->replaceAllUsesWith(NewSqrt);


    // Explicitly erase the old call because a call with side effects is not

    // trivially dead.

    Call->eraseFromParent();

    return true;

  }


  return false;

}


// Check if this array of constants represents a cttz table.

// Iterate over the elements from \p Table by trying to find/match all

// the numbers from 0 to \p InputBits that should represent cttz results.


static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,

                        const APInt &AndMask, Type *AccessTy,

                        unsigned InputBits, const APInt &GEPIdxFactor,

                        const DataLayout &DL) {

  for (unsigned Idx = 0; Idx < InputBits; Idx++) {

    APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;

    ConstantInt *C = dyn_cast_or_null<ConstantInt>(

        ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));

    if (!C || C->getValue() != Idx)

      return false;

  }


  return true;

}


// Try to recognize table-based ctz implementation.

// E.g., an example in C (for more cases please see the llvm/tests):

// int f(unsigned x) {

//    static const char table[32] =

//      {0, 1, 28, 2, 29, 14, 24, 3, 30,

//       22, 20, 15, 25, 17, 4, 8, 31, 27,

//       13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};

//    return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];

// }

// this can be lowered to `cttz` instruction.

// There is also a special case when the element is 0.

//

// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn

// sequence that contains each pattern of bits in it. The shift extracts

// the top bits after the multiply, and that index into the table should

// represent the number of trailing zeros in the original number.

//

// Here are some examples or LLVM IR for a 64-bit target:

//

// CASE 1:

// %sub = sub i32 0, %x

// %and = and i32 %sub, %x

// %mul = mul i32 %and, 125613361

// %shr = lshr i32 %mul, 27

// %idxprom = zext i32 %shr to i64

// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,

//     i64 %idxprom

// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8

//

// CASE 2:

// %sub = sub i32 0, %x

// %and = and i32 %sub, %x

// %mul = mul i32 %and, 72416175

// %shr = lshr i32 %mul, 26

// %idxprom = zext i32 %shr to i64

// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,

//     i64 0, i64 %idxprom

// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8

//

// CASE 3:

// %sub = sub i32 0, %x

// %and = and i32 %sub, %x

// %mul = mul i32 %and, 81224991

// %shr = lshr i32 %mul, 27

// %idxprom = zext i32 %shr to i64

// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,

//     i64 0, i64 %idxprom

// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8

//

// CASE 4:

// %sub = sub i64 0, %x

// %and = and i64 %sub, %x

// %mul = mul i64 %and, 283881067100198605

// %shr = lshr i64 %mul, 58

// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,

//     i64 %shr

// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8

//

// All these can be lowered to @llvm.cttz.i32/64 intrinsics.


static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {

  LoadInst *LI = dyn_cast<LoadInst>(&I);

  if (!LI)

    return false;


  Type *AccessType = LI->getType();

  if (!AccessType->isIntegerTy())

    return false;


  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());

  if (!GEP || !GEP->hasNoUnsignedSignedWrap())

    return false;


  GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());

  if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())

    return false;


  unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());

  APInt ModOffset(BW, 0);

  SmallMapVector<Value *, APInt, 4> VarOffsets;

  if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||

      VarOffsets.size() != 1 || ModOffset != 0)

    return false;

  auto [GepIdx, GEPScale] = VarOffsets.front();


  Value *X1;

  const APInt *MulConst, *ShiftConst, *AndCst = nullptr;

  // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.

  // This might be extended to the pointer index type, and if the gep index type

  // has been replaced with an i8 then a new And (and different ShiftConst) will

  // be present.

  auto MatchInner = m_LShr(

      m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),

      m_APInt(ShiftConst));

  if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&

      !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))

    return false;


  unsigned InputBits = X1->getType()->getScalarSizeInBits();

  if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)

    return false;


  if (!GEPScale.isIntN(InputBits) ||

      !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,

                   AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,

                   InputBits, GEPScale.zextOrTrunc(InputBits), DL))

    return false;


  ConstantInt *ZeroTableElem = cast<ConstantInt>(

      ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));

  bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;


  IRBuilder<> B(LI);

  ConstantInt *BoolConst = B.getInt1(!DefinedForZero);

  Type *XType = X1->getType();

  auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});

  Value *ZExtOrTrunc = nullptr;


  if (DefinedForZero) {

    ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);

  } else {

    // If the value in elem 0 isn't the same as InputBits, we still want to

    // produce the value from the table.

    auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));

    auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);


    // The true branch of select handles the cttz(0) case, which is rare.

    if (!ProfcheckDisableMetadataFixes) {

      if (Instruction *SelectI = dyn_cast<Instruction>(Select))

        SelectI->setMetadata(

            LLVMContext::MD_prof,

            MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());

    }


    // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target

    // it should be handled as: `cttz(x) & (typeSize - 1)`.


    ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);

  }


  LI->replaceAllUsesWith(ZExtOrTrunc);


  return true;

}


/// This is used by foldLoadsRecursive() to capture a Root Load node which is

/// of type or(load, load) and recursively build the wide load. Also capture the

/// shift amount, zero extend type and loadSize.


struct LoadOps {

  LoadInst *Root = nullptr;

  LoadInst *RootInsert = nullptr;

  bool FoundRoot = false;

  uint64_t LoadSize = 0;

  uint64_t Shift = 0;

  Type *ZextType;

  AAMDNodes AATags;

};


// Identify and Merge consecutive loads recursively which is of the form

// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1

// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)


static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,

                               AliasAnalysis &AA) {

  uint64_t ShAmt2;

  Value *X;

  Instruction *L1, *L2;


  // Go to the last node with loads.

  if (match(V,

            m_OneUse(m_c_Or(m_Value(X), m_OneUse(m_ShlOrSelf(

                                            m_OneUse(m_ZExt(m_Instruction(L2))),

                                            ShAmt2)))))) {

    if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)

      // Avoid Partial chain merge.

      return false;

  } else

    return false;


  // Check if the pattern has loads

  LoadInst *LI1 = LOps.Root;

  uint64_t ShAmt1 = LOps.Shift;

  if (LOps.FoundRoot == false &&

      match(X, m_OneUse(

                   m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {

    LI1 = dyn_cast<LoadInst>(L1);

  }

  LoadInst *LI2 = dyn_cast<LoadInst>(L2);


  // Check if loads are same, atomic, volatile and having same address space.

  if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||

      LI1->getPointerAddressSpace() != LI2->getPointerAddressSpace())

    return false;


  // Check if Loads come from same BB.

  if (LI1->getParent() != LI2->getParent())

    return false;


  // Find the data layout

  bool IsBigEndian = DL.isBigEndian();


  // Check if loads are consecutive and same size.

  Value *Load1Ptr = LI1->getPointerOperand();

  APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);

  Load1Ptr =

      Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,

                                                  /* AllowNonInbounds */ true);


  Value *Load2Ptr = LI2->getPointerOperand();

  APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);

  Load2Ptr =

      Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,

                                                  /* AllowNonInbounds */ true);


  // Verify if both loads have same base pointers

  uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();

  uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();

  if (Load1Ptr != Load2Ptr)

    return false;


  // Make sure that there are no padding bits.

  if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||

      !DL.typeSizeEqualsStoreSize(LI2->getType()))

    return false;


  // Alias Analysis to check for stores b/w the loads.

  LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;

  MemoryLocation Loc;

  if (!Start->comesBefore(End)) {

    std::swap(Start, End);

    Loc = MemoryLocation::get(End);

    if (LOps.FoundRoot)

      Loc = Loc.getWithNewSize(LOps.LoadSize);

  } else

    Loc = MemoryLocation::get(End);

  unsigned NumScanned = 0;

  for (Instruction &Inst :

       make_range(Start->getIterator(), End->getIterator())) {

    if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))

      return false;


    if (++NumScanned > MaxInstrsToScan)

      return false;

  }


  // Make sure Load with lower Offset is at LI1

  bool Reverse = false;

  if (Offset2.slt(Offset1)) {

    std::swap(LI1, LI2);

    std::swap(ShAmt1, ShAmt2);

    std::swap(Offset1, Offset2);

    std::swap(Load1Ptr, Load2Ptr);

    std::swap(LoadSize1, LoadSize2);

    Reverse = true;

  }


  // Big endian swap the shifts

  if (IsBigEndian)

    std::swap(ShAmt1, ShAmt2);


  // First load is always LI1. This is where we put the new load.

  // Use the merged load size available from LI1 for forward loads.

  if (LOps.FoundRoot) {

    if (!Reverse)

      LoadSize1 = LOps.LoadSize;

    else

      LoadSize2 = LOps.LoadSize;

  }


  // Verify if shift amount and load index aligns and verifies that loads

  // are consecutive.

  uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;

  uint64_t PrevSize =

      DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));

  if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)

    return false;


  // Update LOps

  AAMDNodes AATags1 = LOps.AATags;

  AAMDNodes AATags2 = LI2->getAAMetadata();

  if (LOps.FoundRoot == false) {

    LOps.FoundRoot = true;

    AATags1 = LI1->getAAMetadata();

  }

  LOps.LoadSize = LoadSize1 + LoadSize2;

  LOps.RootInsert = Start;


  // Concatenate the AATags of the Merged Loads.

  LOps.AATags = AATags1.concat(AATags2);


  LOps.Root = LI1;

  LOps.Shift = ShAmt1;

  LOps.ZextType = X->getType();

  return true;

}


// For a given BB instruction, evaluate all loads in the chain that form a

// pattern which suggests that the loads can be combined. The one and only use

// of the loads is to form a wider load.


static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,

                                 TargetTransformInfo &TTI, AliasAnalysis &AA,

                                 const DominatorTree &DT) {

  // Only consider load chains of scalar values.

  if (isa<VectorType>(I.getType()))

    return false;


  LoadOps LOps;

  if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)

    return false;


  IRBuilder<> Builder(&I);

  LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;


  IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);

  // TTI based checks if we want to proceed with wider load

  bool Allowed = TTI.isTypeLegal(WiderType);

  if (!Allowed)

    return false;


  unsigned AS = LI1->getPointerAddressSpace();

  unsigned Fast = 0;

  Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,

                                               AS, LI1->getAlign(), &Fast);

  if (!Allowed || !Fast)

    return false;


  // Get the Index and Ptr for the new GEP.

  Value *Load1Ptr = LI1->getPointerOperand();

  Builder.SetInsertPoint(LOps.RootInsert);

  if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {

    APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);

    Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(

        DL, Offset1, /* AllowNonInbounds */ true);

    Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));

  }

  // Generate wider load.

  NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),

                                      LI1->isVolatile(), "");

  NewLoad->takeName(LI1);

  // Set the New Load AATags Metadata.

  if (LOps.AATags)

    NewLoad->setAAMetadata(LOps.AATags);


  Value *NewOp = NewLoad;

  // Check if zero extend needed.

  if (LOps.ZextType)

    NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);


  // Check if shift needed. We need to shift with the amount of load1

  // shift if not zero.

  if (LOps.Shift)

    NewOp = Builder.CreateShl(NewOp, LOps.Shift);

  I.replaceAllUsesWith(NewOp);


  return true;

}


/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.


struct PartStore {

  Value *PtrBase;

  APInt PtrOffset;

  Value *Val;

  uint64_t ValOffset;

  uint64_t ValWidth;

  StoreInst *Store;


  bool isCompatibleWith(const PartStore &Other) const {

    return PtrBase == Other.PtrBase && Val == Other.Val;

  }


  bool operator<(const PartStore &Other) const {

    return PtrOffset.slt(Other.PtrOffset);

  }


};


static std::optional<PartStore> matchPartStore(Instruction &I,

                                               const DataLayout &DL) {

  auto *Store = dyn_cast<StoreInst>(&I);

  if (!Store || !Store->isSimple())

    return std::nullopt;


  Value *StoredVal = Store->getValueOperand();

  Type *StoredTy = StoredVal->getType();

  if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))

    return std::nullopt;


  uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();

  uint64_t ValOffset;

  Value *Val;

  if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))

    return std::nullopt;


  Value *Ptr = Store->getPointerOperand();

  APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);

  Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(

      DL, PtrOffset, /*AllowNonInbounds=*/true);

  return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};

}


static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts,

                                       unsigned Width, const DataLayout &DL,

                                       TargetTransformInfo &TTI) {

  if (Parts.size() < 2)

    return false;


  // Check whether combining the stores is profitable.

  // FIXME: We could generate smaller stores if we can't produce a large one.

  const PartStore &First = Parts.front();

  LLVMContext &Ctx = First.Store->getContext();

  Type *NewTy = Type::getIntNTy(Ctx, Width);

  unsigned Fast = 0;

  if (!TTI.isTypeLegal(NewTy) ||

      !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,

                                          First.Store->getPointerAddressSpace(),

                                          First.Store->getAlign(), &Fast) ||

      !Fast)

    return false;


  // Generate the combined store.

  IRBuilder<> Builder(First.Store);

  Value *Val = First.Val;

  if (First.ValOffset != 0)

    Val = Builder.CreateLShr(Val, First.ValOffset);

  Val = Builder.CreateTrunc(Val, NewTy);

  StoreInst *Store = Builder.CreateAlignedStore(

      Val, First.Store->getPointerOperand(), First.Store->getAlign());


  // Merge various metadata onto the new store.

  AAMDNodes AATags = First.Store->getAAMetadata();

  SmallVector<Instruction *> Stores = {First.Store};

  Stores.reserve(Parts.size());

  SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()};

  DbgLocs.reserve(Parts.size());

  for (const PartStore &Part : drop_begin(Parts)) {

    AATags = AATags.concat(Part.Store->getAAMetadata());

    Stores.push_back(Part.Store);

    DbgLocs.push_back(Part.Store->getDebugLoc());

  }

  Store->setAAMetadata(AATags);

  Store->mergeDIAssignID(Stores);

  Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs));


  // Remove the old stores.

  for (const PartStore &Part : Parts)

    Part.Store->eraseFromParent();


  return true;

}


static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,

                            const DataLayout &DL, TargetTransformInfo &TTI) {

  if (Parts.size() < 2)

    return false;


  // We now have multiple parts of the same value stored to the same pointer.

  // Sort the parts by pointer offset, and make sure they are consistent with

  // the value offsets. Also check that the value is fully covered without

  // overlaps.

  bool Changed = false;

  llvm::sort(Parts);

  int64_t LastEndOffsetFromFirst = 0;

  const PartStore *First = &Parts[0];

  for (const PartStore &Part : Parts) {

    APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;

    int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;

    if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||

        LastEndOffsetFromFirst != ValOffsetFromFirst) {

      Changed |= mergeConsecutivePartStores(ArrayRef(First, &Part),

                                            LastEndOffsetFromFirst, DL, TTI);

      First = &Part;

      LastEndOffsetFromFirst = Part.ValWidth;

      continue;

    }


    LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;

  }


  Changed |= mergeConsecutivePartStores(ArrayRef(First, Parts.end()),

                                        LastEndOffsetFromFirst, DL, TTI);

  return Changed;

}


static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL,

                                  TargetTransformInfo &TTI, AliasAnalysis &AA) {

  // FIXME: Add big endian support.

  if (DL.isBigEndian())

    return false;


  BatchAAResults BatchAA(AA);

  SmallVector<PartStore, 8> Parts;

  bool MadeChange = false;

  for (Instruction &I : make_early_inc_range(BB)) {

    if (std::optional<PartStore> Part = matchPartStore(I, DL)) {

      if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {

        Parts.push_back(std::move(*Part));

        continue;

      }


      MadeChange |= mergePartStores(Parts, DL, TTI);

      Parts.clear();

      Parts.push_back(std::move(*Part));

      continue;

    }


    if (Parts.empty())

      continue;


    if (I.mayThrow() ||

        (I.mayReadOrWriteMemory() &&

         isModOrRefSet(BatchAA.getModRefInfo(

             &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {

      MadeChange |= mergePartStores(Parts, DL, TTI);

      Parts.clear();

      continue;

    }

  }


  MadeChange |= mergePartStores(Parts, DL, TTI);

  return MadeChange;

}


/// Combine away instructions providing they are still equivalent when compared

/// against 0. i.e do they have any bits set.


static Value *optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder) {

  auto *I = dyn_cast<Instruction>(V);

  if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())

    return nullptr;


  Value *A;


  // Look deeper into the chain of or's, combining away shl (so long as they are

  // nuw or nsw).

  Value *Op0 = I->getOperand(0);

  if (match(Op0, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),

                             m_NUWShl(m_Value(A), m_Value()))))

    Op0 = A;

  else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))

    Op0 = NOp;


  Value *Op1 = I->getOperand(1);

  if (match(Op1, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),

                             m_NUWShl(m_Value(A), m_Value()))))

    Op1 = A;

  else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))

    Op1 = NOp;


  if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))

    return Builder.CreateOr(Op0, Op1);

  return nullptr;

}


static bool foldICmpOrChain(Instruction &I, const DataLayout &DL,

                            TargetTransformInfo &TTI, AliasAnalysis &AA,

                            const DominatorTree &DT) {

  CmpPredicate Pred;

  Value *Op0;

  if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||

      !ICmpInst::isEquality(Pred))

    return false;


  // If the chain or or's matches a load, combine to that before attempting to

  // remove shifts.

  if (auto OpI = dyn_cast<Instruction>(Op0))

    if (OpI->getOpcode() == Instruction::Or)

      if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))

        return true;


  IRBuilder<> Builder(&I);

  // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0

  if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {

    I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));

    return true;

  }


  return false;

}


// Calculate GEP Stride and accumulated const ModOffset. Return Stride and

// ModOffset

static std::pair<APInt, APInt>


getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) {

  unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());

  std::optional<APInt> Stride;

  APInt ModOffset(BW, 0);

  // Return a minimum gep stride, greatest common divisor of consective gep

  // index scales(c.f. Bézout's identity).

  while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {

    SmallMapVector<Value *, APInt, 4> VarOffsets;

    if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))

      break;


    for (auto [V, Scale] : VarOffsets) {

      // Only keep a power of two factor for non-inbounds

      if (!GEP->hasNoUnsignedSignedWrap())

        Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());


      if (!Stride)

        Stride = Scale;

      else

        Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);

    }


    PtrOp = GEP->getPointerOperand();

  }


  // Check whether pointer arrives back at Global Variable via at least one GEP.

  // Even if it doesn't, we can check by alignment.

  if (!isa<GlobalVariable>(PtrOp) || !Stride)

    return {APInt(BW, 1), APInt(BW, 0)};


  // In consideration of signed GEP indices, non-negligible offset become

  // remainder of division by minimum GEP stride.

  ModOffset = ModOffset.srem(*Stride);

  if (ModOffset.isNegative())

    ModOffset += *Stride;


  return {*Stride, ModOffset};

}


/// If C is a constant patterned array and all valid loaded results for given

/// alignment are same to a constant, return that constant.


static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {

  auto *LI = dyn_cast<LoadInst>(&I);

  if (!LI || LI->isVolatile())

    return false;


  // We can only fold the load if it is from a constant global with definitive

  // initializer. Skip expensive logic if this is not the case.

  auto *PtrOp = LI->getPointerOperand();

  auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp));

  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())

    return false;


  // Bail for large initializers in excess of 4K to avoid too many scans.

  Constant *C = GV->getInitializer();

  uint64_t GVSize = DL.getTypeAllocSize(C->getType());

  if (!GVSize || 4096 < GVSize)

    return false;


  Type *LoadTy = LI->getType();

  unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());

  auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);


  // Any possible offset could be multiple of GEP stride. And any valid

  // offset is multiple of load alignment, so checking only multiples of bigger

  // one is sufficient to say results' equality.

  if (auto LA = LI->getAlign();

      LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {

    ConstOffset = APInt(BW, 0);

    Stride = APInt(BW, LA.value());

  }


  Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);

  if (!Ca)

    return false;


  unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);

  for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)

    if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))

      return false;


  I.replaceAllUsesWith(Ca);


  return true;

}


namespace {

class StrNCmpInliner {

public:

  StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,

                 const DataLayout &DL)

      : CI(CI), Func(Func), DTU(DTU), DL(DL) {}


  bool optimizeStrNCmp();


private:

  void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);


  CallInst *CI;

  LibFunc Func;

  DomTreeUpdater *DTU;

  const DataLayout &DL;

};


} // namespace


/// First we normalize calls to strncmp/strcmp to the form of

/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2

/// (without considering '\0').

///

/// Examples:

///

/// \code

///   strncmp(s, "a", 3) -> compare(s, "a", 2)

///   strncmp(s, "abc", 3) -> compare(s, "abc", 3)

///   strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)

///   strcmp(s, "a") -> compare(s, "a", 2)

///

///   char s2[] = {'a'}

///   strncmp(s, s2, 3) -> compare(s, s2, 3)

///

///   char s2[] = {'a', 'b', 'c', 'd'}

///   strncmp(s, s2, 3) -> compare(s, s2, 3)

/// \endcode

///

/// We only handle cases where N and exactly one of s1 and s2 are constant.

/// Cases that s1 and s2 are both constant are already handled by the

/// instcombine pass.

///

/// We do not handle cases where N > StrNCmpInlineThreshold.

///

/// We also do not handles cases where N < 2, which are already

/// handled by the instcombine pass.

///

bool StrNCmpInliner::optimizeStrNCmp() {

  if (StrNCmpInlineThreshold < 2)

    return false;


  if (!isOnlyUsedInZeroComparison(CI))

    return false;


  Value *Str1P = CI->getArgOperand(0);

  Value *Str2P = CI->getArgOperand(1);

  // Should be handled elsewhere.

  if (Str1P == Str2P)

    return false;


  StringRef Str1, Str2;

  bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);

  bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);

  if (HasStr1 == HasStr2)

    return false;


  // Note that '\0' and characters after it are not trimmed.

  StringRef Str = HasStr1 ? Str1 : Str2;

  Value *StrP = HasStr1 ? Str2P : Str1P;


  size_t Idx = Str.find('\0');

  uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;

  if (Func == LibFunc_strncmp) {

    if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))

      N = std::min(N, ConstInt->getZExtValue());

    else

      return false;

  }

  // Now N means how many bytes we need to compare at most.

  if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)

    return false;


  // Cases where StrP has two or more dereferenceable bytes might be better

  // optimized elsewhere.

  bool CanBeNull = false, CanBeFreed = false;

  if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)

    return false;

  inlineCompare(StrP, Str, N, HasStr1);

  return true;

}


/// Convert

///

/// \code

///   ret = compare(s1, s2, N)

/// \endcode

///

/// into

///

/// \code

///   ret = (int)s1[0] - (int)s2[0]

///   if (ret != 0)

///     goto NE

///   ...

///   ret = (int)s1[N-2] - (int)s2[N-2]

///   if (ret != 0)

///     goto NE

///   ret = (int)s1[N-1] - (int)s2[N-1]

///   NE:

/// \endcode

///

/// CFG before and after the transformation:

///

/// (before)

/// BBCI

///

/// (after)

/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail

///                 |                    ^

///                 E                    |

///                 |                    |

///        BBSubs[1] (sub,icmp) --NE-----+

///                ...                   |

///        BBSubs[N-1]    (sub) ---------+

///

void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,

                                   bool Swapped) {

  auto &Ctx = CI->getContext();

  IRBuilder<> B(Ctx);

  // We want these instructions to be recognized as inlined instructions for the

  // compare call, but we don't have a source location for the definition of

  // that function, since we're generating that code now. Because the generated

  // code is a viable point for a memory access error, we make the pragmatic

  // choice here to directly use CI's location so that we have useful

  // attribution for the generated code.

  B.SetCurrentDebugLocation(CI->getDebugLoc());


  BasicBlock *BBCI = CI->getParent();

  BasicBlock *BBTail =

      SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");


  SmallVector<BasicBlock *> BBSubs;

  for (uint64_t I = 0; I < N; ++I)

    BBSubs.push_back(

        BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));

  BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);


  cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);


  B.SetInsertPoint(BBNE);

  PHINode *Phi = B.CreatePHI(CI->getType(), N);

  B.CreateBr(BBTail);


  Value *Base = LHS;

  for (uint64_t i = 0; i < N; ++i) {

    B.SetInsertPoint(BBSubs[i]);

    Value *VL =

        B.CreateZExt(B.CreateLoad(B.getInt8Ty(),

                                  B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),

                     CI->getType());

    Value *VR =

        ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));

    Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);

    if (i < N - 1) {

      BranchInst *CondBrInst = B.CreateCondBr(

          B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), BBNE,

          BBSubs[i + 1]);


      Function *F = CI->getFunction();

      assert(F && "Instruction does not belong to a function!");

      std::optional<Function::ProfileCount> EC = F->getEntryCount();

      if (EC && EC->getCount() > 0)

        setExplicitlyUnknownBranchWeights(*CondBrInst, DEBUG_TYPE);

    } else {

      B.CreateBr(BBNE);

    }


    Phi->addIncoming(Sub, BBSubs[i]);

  }


  CI->replaceAllUsesWith(Phi);

  CI->eraseFromParent();


  if (DTU) {

    SmallVector<DominatorTree::UpdateType, 8> Updates;

    Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});

    for (uint64_t i = 0; i < N; ++i) {

      if (i < N - 1)

        Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});

      Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});

    }

    Updates.push_back({DominatorTree::Insert, BBNE, BBTail});

    Updates.push_back({DominatorTree::Delete, BBCI, BBTail});

    DTU->applyUpdates(Updates);

  }

}


/// Convert memchr with a small constant string into a switch


static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,

                       const DataLayout &DL) {

  if (isa<Constant>(Call->getArgOperand(1)))

    return false;


  StringRef Str;

  Value *Base = Call->getArgOperand(0);

  if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))

    return false;


  uint64_t N = Str.size();

  if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {

    uint64_t Val = ConstInt->getZExtValue();

    // Ignore the case that n is larger than the size of string.

    if (Val > N)

      return false;

    N = Val;

  } else

    return false;


  if (N > MemChrInlineThreshold)

    return false;


  BasicBlock *BB = Call->getParent();

  BasicBlock *BBNext = SplitBlock(BB, Call, DTU);

  IRBuilder<> IRB(BB);

  IRB.SetCurrentDebugLocation(Call->getDebugLoc());

  IntegerType *ByteTy = IRB.getInt8Ty();

  BB->getTerminator()->eraseFromParent();

  SwitchInst *SI = IRB.CreateSwitch(

      IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);

  // We can't know the precise weights here, as they would depend on the value

  // distribution of Call->getArgOperand(1). So we just mark it as "unknown".

  setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(),

                                              DEBUG_TYPE);

  Type *IndexTy = DL.getIndexType(Call->getType());

  SmallVector<DominatorTree::UpdateType, 8> Updates;


  BasicBlock *BBSuccess = BasicBlock::Create(

      Call->getContext(), "memchr.success", BB->getParent(), BBNext);

  IRB.SetInsertPoint(BBSuccess);

  PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");

  Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);

  IRB.CreateBr(BBNext);

  if (DTU)

    Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});


  SmallPtrSet<ConstantInt *, 4> Cases;

  for (uint64_t I = 0; I < N; ++I) {

    ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]);

    if (!Cases.insert(CaseVal).second)

      continue;


    BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",

                                            BB->getParent(), BBSuccess);

    SI->addCase(CaseVal, BBCase);

    IRB.SetInsertPoint(BBCase);

    IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);

    IRB.CreateBr(BBSuccess);

    if (DTU) {

      Updates.push_back({DominatorTree::Insert, BB, BBCase});

      Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});

    }

  }


  PHINode *PHI =

      PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());

  PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);

  PHI->addIncoming(FirstOccursLocation, BBSuccess);


  Call->replaceAllUsesWith(PHI);

  Call->eraseFromParent();


  if (DTU)

    DTU->applyUpdates(Updates);


  return true;

}


static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,

                         TargetLibraryInfo &TLI, AssumptionCache &AC,

                         DominatorTree &DT, const DataLayout &DL,

                         bool &MadeCFGChange) {


  auto *CI = dyn_cast<CallInst>(&I);

  if (!CI || CI->isNoBuiltin())

    return false;


  Function *CalledFunc = CI->getCalledFunction();

  if (!CalledFunc)

    return false;


  LibFunc LF;

  if (!TLI.getLibFunc(*CalledFunc, LF) ||

      !isLibFuncEmittable(CI->getModule(), &TLI, LF))

    return false;


  DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);


  switch (LF) {

  case LibFunc_sqrt:

  case LibFunc_sqrtf:

  case LibFunc_sqrtl:

    return foldSqrt(CI, LF, TTI, TLI, AC, DT);

  case LibFunc_strcmp:

  case LibFunc_strncmp:

    if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {

      MadeCFGChange = true;

      return true;

    }

    break;

  case LibFunc_memchr:

    if (foldMemChr(CI, &DTU, DL)) {

      MadeCFGChange = true;

      return true;

    }

    break;

  default:;

  }

  return false;

}


/// This is the entry point for folds that could be implemented in regular

/// InstCombine, but they are separated because they are not expected to

/// occur frequently and/or have more than a constant-length pattern match.


static bool foldUnusualPatterns(Function &F, DominatorTree &DT,

                                TargetTransformInfo &TTI,

                                TargetLibraryInfo &TLI, AliasAnalysis &AA,

                                AssumptionCache &AC, bool &MadeCFGChange) {

  bool MadeChange = false;

  for (BasicBlock &BB : F) {

    // Ignore unreachable basic blocks.

    if (!DT.isReachableFromEntry(&BB))

      continue;


    const DataLayout &DL = F.getDataLayout();


    // Walk the block backwards for efficiency. We're matching a chain of

    // use->defs, so we're more likely to succeed by starting from the bottom.

    // Also, we want to avoid matching partial patterns.

    // TODO: It would be more efficient if we removed dead instructions

    // iteratively in this loop rather than waiting until the end.

    for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) {

      MadeChange |= foldAnyOrAllBitsSet(I);

      MadeChange |= foldGuardedFunnelShift(I, DT);

      MadeChange |= tryToRecognizePopCount(I);

      MadeChange |= tryToFPToSat(I, TTI);

      MadeChange |= tryToRecognizeTableBasedCttz(I, DL);

      MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);

      MadeChange |= foldPatternedLoads(I, DL);

      MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);

      // NOTE: This function introduces erasing of the instruction `I`, so it

      // needs to be called at the end of this sequence, otherwise we may make

      // bugs.

      MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);

    }


    // Do this separately to avoid redundantly scanning stores multiple times.

    MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);

  }


  // We're done with transforms, so remove dead instructions.

  if (MadeChange)

    for (BasicBlock &BB : F)

      SimplifyInstructionsInBlock(&BB);


  return MadeChange;

}


/// This is the entry point for all transforms. Pass manager differences are

/// handled in the callers of this function.


static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,

                    TargetLibraryInfo &TLI, DominatorTree &DT,

                    AliasAnalysis &AA, bool &MadeCFGChange) {

  bool MadeChange = false;

  const DataLayout &DL = F.getDataLayout();

  TruncInstCombine TIC(AC, TLI, DL, DT);

  MadeChange |= TIC.run(F);

  MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);

  return MadeChange;

}


PreservedAnalyses AggressiveInstCombinePass::run(Function &F,

                                                 FunctionAnalysisManager &AM) {

  auto &AC = AM.getResult<AssumptionAnalysis>(F);

  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);

  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);

  auto &TTI = AM.getResult<TargetIRAnalysis>(F);

  auto &AA = AM.getResult<AAManager>(F);

  bool MadeCFGChange = false;

  if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {

    // No changes, all analyses are preserved.

    return PreservedAnalyses::all();

  }

  // Mark all the analyses that instcombine updates as preserved.

  PreservedAnalyses PA;

  if (MadeCFGChange)

    PA.preserve<DominatorTreeAnalysis>();

  else

    PA.preserveSet<CFGAnalyses>();

  return PA;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Select
AMDGPU Register Bank Select
Definition AMDGPURegBankSelect.cpp:68

PHI
Rewrite undef for PHI
Definition AMDGPURewriteUndefForPHI.cpp:98

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

AggressiveInstCombineInternal.h

tryToRecognizePopCount
static bool tryToRecognizePopCount(Instruction &I)
Definition AggressiveInstCombine.cpp:302

foldSqrt
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
Definition AggressiveInstCombine.cpp:438

foldAnyOrAllBitsSet
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
Definition AggressiveInstCombine.cpp:258

MemChrInlineThreshold
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))

tryToFPToSat
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
Definition AggressiveInstCombine.cpp:383

StrNCmpInlineThreshold
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))

matchAndOrChain
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
Definition AggressiveInstCombine.cpp:212

foldMemChr
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
Definition AggressiveInstCombine.cpp:1348

optimizeShiftInOrChain
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
Definition AggressiveInstCombine.cpp:1006

foldConsecutiveLoads
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
Definition AggressiveInstCombine.cpp:782

foldGuardedFunnelShift
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
Definition AggressiveInstCombine.cpp:74

tryToRecognizeTableBasedCttz
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:544

mergePartStores
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
Definition AggressiveInstCombine.cpp:932

mergeConsecutivePartStores
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
Definition AggressiveInstCombine.cpp:882

MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))

foldLoadsRecursive
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
Definition AggressiveInstCombine.cpp:645

foldICmpOrChain
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
Definition AggressiveInstCombine.cpp:1034

isCTTZTable
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:470

matchPartStore
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:858

foldConsecutiveStores
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
Definition AggressiveInstCombine.cpp:965

getStrideAndModOffsetOfGEP
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:1063

foldPatternedLoads
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
Definition AggressiveInstCombine.cpp:1104

foldLibCalls
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
Definition AggressiveInstCombine.cpp:1427

foldUnusualPatterns
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
Definition AggressiveInstCombine.cpp:1473

AggressiveInstCombine.h
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...

AliasAnalysis.h

AssumptionCache.h

BasicAliasAnalysis.h
This is the interface for LLVM's primary stateless and local alias analysis.

BasicBlockUtils.h

BuildLibCalls.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

CommandLine.h

ConstantFolding.h

DataLayout.h

DomTreeUpdater.h

Dominators.h

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GlobalsModRef.h
This is the interface for a simple mod/ref and alias analysis over globals.

GEP
Hexagon Common GEP
Definition HexagonCommonGEP.cpp:164

getAlign
static MaybeAlign getAlign(Value *Ptr)
Definition IRBuilder.cpp:443

IRBuilder.h

Function.h

Instruction.h

matchFunnelShift
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
Definition InstCombineAndOrXor.cpp:3111

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MDBuilder.h

PatternMatch.h

ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.

MaskShift
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Definition SIProgramInfo.cpp:156

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

Ptr
@ Ptr
Definition TargetLibraryInfo.cpp:77

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

ValueTracking.h

RHS
Value * RHS
Definition X86PartialReduction.cpp:74

LHS
Value * LHS
Definition X86PartialReduction.cpp:73

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:68

llvm::AAManager
A manager for alias analyses.
Definition AliasAnalysis.h:974

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540

llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488

llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329

llvm::APInt::getSplat
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651

llvm::APInt::srem
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736

llvm::APInt::shl
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873

llvm::APInt::isSubsetOf
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257

llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130

llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239

llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221

llvm::AggressiveInstCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition AggressiveInstCombine.cpp:1530

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:412

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147

llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition AssumptionCache.h:180

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:44

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459

llvm::BasicBlock::getFirstInsertionPt
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition BasicBlock.cpp:393

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213

llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233

llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition AliasAnalysis.h:657

llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Definition AliasAnalysis.h:680

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition InstrTypes.h:1290

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1511

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:697

llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition CmpPredicate.h:23

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition Constants.h:87

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getNullValue
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition Constants.cpp:373

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63

llvm::DebugLoc::getMergedLocations
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170

llvm::DomTreeUpdater
Definition DomTreeUpdater.h:34

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284

llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition GenericDomTree.h:252

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165

llvm::DominatorTree::isReachableFromEntry
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition Dominators.cpp:334

llvm::DominatorTree::dominates
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition Dominators.cpp:135

llvm::Function
Definition Function.h:64

llvm::GenericDomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Definition GenericDomTreeUpdaterImpl.h:59

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition Instructions.h:950

llvm::GlobalVariable
Definition GlobalVariable.h:40

llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition GlobalVariable.h:154

llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition GlobalVariable.h:110

llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition GlobalVariable.h:177

llvm::ICmpInst::isEquality
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Definition Instructions.h:1317

llvm::IRBuilderBase::SetCurrentDebugLocation
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247

llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497

llvm::IRBuilderBase::CreateSwitch
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220

llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071

llvm::IRBuilderBase::CreateBr
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207

llvm::IRBuilderBase::CreateInBoundsPtrAdd
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2044

llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition Instruction.h:513

llvm::Instruction::setAAMetadata
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition Metadata.cpp:1832

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:108

llvm::Instruction::getFunction
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition Instruction.cpp:86

llvm::Instruction::getAAMetadata
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition Metadata.cpp:1817

llvm::IntegerType
Class to represent integer types.
Definition DerivedTypes.h:42

llvm::IntegerType::get
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:126

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LoadInst
An instruction for reading from memory.
Definition Instructions.h:181

llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition Instructions.h:266

llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition Instructions.h:260

llvm::LoadInst::isSimple
bool isSimple() const
Definition Instructions.h:252

llvm::MDBuilder
Definition MDBuilder.h:37

llvm::MDBuilder::createUnlikelyBranchWeights
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48

llvm::MapVector::size
size_type size() const
Definition MapVector.h:56

llvm::MapVector::front
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:79

llvm::MemoryLocation
Representation for a specific memory location.
Definition MemoryLocation.h:217

llvm::MemoryLocation::get
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition MemoryLocation.cpp:36

llvm::MemoryLocation::getBeforeOrAfter
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Definition MemoryLocation.h:285

llvm::PHINode
Definition Instructions.h:2639

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition Instructions.h:2774

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition Instructions.h:2674

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:389

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:527

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:573

llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition SmallVector.h:663

llvm::SmallVectorImpl::clear
void clear()
Definition SmallVector.h:610

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:416

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition SmallVector.h:272

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1202

llvm::StoreInst
An instruction for storing to memory.
Definition Instructions.h:297

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::StringRef::npos
static constexpr size_t npos
Definition StringRef.h:57

llvm::SwitchInst
Multiway switch.
Definition Instructions.h:3196

llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition TargetTransformInfo.h:1989

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition TargetLibraryInfo.h:625

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:285

llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition TargetLibraryInfo.h:352

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:223

llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition TargetTransformInfo.h:276

llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
Definition TargetTransformInfo.h:1438

llvm::TruncInstCombine
Definition AggressiveInstCombineInternal.h:52

llvm::TruncInstCombine::run
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
Definition TruncInstCombine.cpp:524

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Type::getPrimitiveSizeInBits
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546

llvm::Value::stripAndAccumulateConstantOffsets
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.

llvm::Value::getContext
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099

llvm::Value::getPointerDereferenceableBytes
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:881

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322

llvm::Value::takeName
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::cl::opt
Definition CommandLine.h:1455

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

uint64_t

Call
CallInst * Call
Definition ObjCARCOpts.cpp:2359

Changed
Changed
Definition ObjCARCOpts.cpp:2369

UINT64_MAX
#define UINT64_MAX
Definition DataTypes.h:77

llvm::AA
Abstract Attribute helper functions.
Definition Attributor.h:165

llvm::APIntOps::GreatestCommonDivisor
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798

llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition Intrinsics.h:46

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::Loc
Definition DwarfDebug.h:129

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition MIPatternMatch.h:278

llvm::MIPatternMatch::m_Neg
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
Definition MIPatternMatch.h:929

llvm::MIPatternMatch::m_OneUse
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Definition MIPatternMatch.h:56

llvm::PatternMatch
Definition PatternMatch.h:47

llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition PatternMatch.h:1288

llvm::PatternMatch::m_LShrOrSelf
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
Definition PatternMatch.h:1350

llvm::PatternMatch::m_CastOrSelf
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
Definition PatternMatch.h:2209

llvm::PatternMatch::m_APInt
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition PatternMatch.h:291

llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition PatternMatch.h:2975

llvm::PatternMatch::m_Trunc
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition PatternMatch.h:2215

llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition PatternMatch.h:1052

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:49

llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition PatternMatch.h:854

llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition PatternMatch.h:954

llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition PatternMatch.h:584

llvm::PatternMatch::m_SMin
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
Definition PatternMatch.h:2496

llvm::PatternMatch::m_ShlOrSelf
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
Definition PatternMatch.h:1343

llvm::PatternMatch::m_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition PatternMatch.h:1240

llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition PatternMatch.h:972

llvm::PatternMatch::m_SpecificBB
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
Definition PatternMatch.h:1085

llvm::PatternMatch::m_NSWShl
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
Definition PatternMatch.h:1424

llvm::PatternMatch::m_SpecificICmp
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
Definition PatternMatch.h:1809

llvm::PatternMatch::m_ZExt
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition PatternMatch.h:2247

llvm::PatternMatch::m_NUWShl
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
Definition PatternMatch.h:1467

llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition PatternMatch.h:2368

llvm::PatternMatch::m_c_Add
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
Definition PatternMatch.h:2961

llvm::PatternMatch::m_FPToSI
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
Definition PatternMatch.h:2314

llvm::PatternMatch::m_SMax
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
Definition PatternMatch.h:2490

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:105

llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition PatternMatch.h:1312

llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Definition PatternMatch.h:1746

llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition PatternMatch.h:1306

llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition PatternMatch.h:1294

llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition PatternMatch.h:604

llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition PatternMatch.h:2982

llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition PatternMatch.h:1186

llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition PatternMatch.h:252

llvm::SI
Definition SIInstrInfo.h:1805

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm::codeview::CompileSym2Flags::EC
@ EC
Definition CodeView.h:433

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:409

llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::TailFoldingOpts::Reverse
@ Reverse
Definition AArch64BaseInfo.h:609

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::isOnlyUsedInZeroComparison
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
Definition ValueTracking.cpp:256

llvm::LibFunc
LibFunc
Definition TargetLibraryInfo.h:72

llvm::setExplicitlyUnknownBranchWeightsIfProfiled
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, StringRef PassName)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
Definition ProfDataUtils.cpp:276

llvm::getConstantStringInfo
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
Definition ValueTracking.cpp:6491

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632

llvm::SimplifyInstructionsInBlock
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721

llvm::setExplicitlyUnknownBranchWeights
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName)
Specify that the branch weights for this terminator cannot be known at compile time.
Definition ProfDataUtils.cpp:266

llvm::MaskedValueIsZero
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
Definition ValueTracking.cpp:318

llvm::isLibFuncEmittable
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
Definition BuildLibCalls.cpp:1527

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::isModOrRefSet
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43

llvm::ConstantFoldLoadFromConst
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
Definition ConstantFolding.cpp:714

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1128

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:218

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:220

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::SplitBlock
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition BasicBlockUtils.cpp:961

llvm::ProfcheckDisableMetadataFixes
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition AggressiveInstCombine.cpp:47

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:564

llvm::isGuaranteedNotToBePoison
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Definition ValueTracking.cpp:7776

llvm::getUnderlyingObject
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Definition ValueTracking.cpp:6683

llvm::AliasAnalysis
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
Definition AliasAnalysis.h:721

llvm::cannotBeOrderedLessThanZero
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
Definition ValueTracking.cpp:5978

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869

N
#define N

LoadOps
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
Definition AggressiveInstCombine.cpp:632

LoadOps::ZextType
Type * ZextType
Definition AggressiveInstCombine.cpp:638

LoadOps::Shift
uint64_t Shift
Definition AggressiveInstCombine.cpp:637

LoadOps::AATags
AAMDNodes AATags
Definition AggressiveInstCombine.cpp:639

LoadOps::RootInsert
LoadInst * RootInsert
Definition AggressiveInstCombine.cpp:634

LoadOps::Root
LoadInst * Root
Definition AggressiveInstCombine.cpp:633

LoadOps::LoadSize
uint64_t LoadSize
Definition AggressiveInstCombine.cpp:636

LoadOps::FoundRoot
bool FoundRoot
Definition AggressiveInstCombine.cpp:635

PartStore
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
Definition AggressiveInstCombine.cpp:841

PartStore::ValOffset
uint64_t ValOffset
Definition AggressiveInstCombine.cpp:845

PartStore::ValWidth
uint64_t ValWidth
Definition AggressiveInstCombine.cpp:846

PartStore::PtrOffset
APInt PtrOffset
Definition AggressiveInstCombine.cpp:843

PartStore::Store
StoreInst * Store
Definition AggressiveInstCombine.cpp:847

PartStore::operator<
bool operator<(const PartStore &Other) const
Definition AggressiveInstCombine.cpp:853

PartStore::isCompatibleWith
bool isCompatibleWith(const PartStore &Other) const
Definition AggressiveInstCombine.cpp:849

PartStore::PtrBase
Value * PtrBase
Definition AggressiveInstCombine.cpp:842

PartStore::Val
Value * Val
Definition AggressiveInstCombine.cpp:844

llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761

llvm::AAMDNodes::concat
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Definition TypeBasedAliasAnalysis.cpp:553

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314

llvm::SimplifyQuery
Definition SimplifyQuery.h:71

llvm::SmallMapVector
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257

llvm::cl::desc
Definition CommandLine.h:411