doxygen/html/SeparateConstOffsetFromGEP_8cpp_source.html

//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// Loop unrolling may create many similar GEPs for array accesses.

// e.g., a 2-level loop

//

// float a[32][32]; // global variable

//

// for (int i = 0; i < 2; ++i) {

//   for (int j = 0; j < 2; ++j) {

//     ...

//     ... = a[x + i][y + j];

//     ...

//   }

// }

//

// will probably be unrolled to:

//

// gep %a, 0, %x, %y; load

// gep %a, 0, %x, %y + 1; load

// gep %a, 0, %x + 1, %y; load

// gep %a, 0, %x + 1, %y + 1; load

//

// LLVM's GVN does not use partial redundancy elimination yet, and is thus

// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs

// significant slowdown in targets with limited addressing modes. For instance,

// because the PTX target does not support the reg+reg addressing mode, the

// NVPTX backend emits PTX code that literally computes the pointer address of

// each GEP, wasting tons of registers. It emits the following PTX for the

// first load and similar PTX for other loads.

//

// mov.u32         %r1, %x;

// mov.u32         %r2, %y;

// mul.wide.u32    %rl2, %r1, 128;

// mov.u64         %rl3, a;

// add.s64         %rl4, %rl3, %rl2;

// mul.wide.u32    %rl5, %r2, 4;

// add.s64         %rl6, %rl4, %rl5;

// ld.global.f32   %f1, [%rl6];

//

// To reduce the register pressure, the optimization implemented in this file

// merges the common part of a group of GEPs, so we can compute each pointer

// address by adding a simple offset to the common part, saving many registers.

//

// It works by splitting each GEP into a variadic base and a constant offset.

// The variadic base can be computed once and reused by multiple GEPs, and the

// constant offsets can be nicely folded into the reg+immediate addressing mode

// (supported by most targets) without using any extra register.

//

// For instance, we transform the four GEPs and four loads in the above example

// into:

//

// base = gep a, 0, x, y

// load base

// load base + 1  * sizeof(float)

// load base + 32 * sizeof(float)

// load base + 33 * sizeof(float)

//

// Given the transformed IR, a backend that supports the reg+immediate

// addressing mode can easily fold the pointer arithmetics into the loads. For

// example, the NVPTX backend can easily fold the pointer arithmetics into the

// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.

//

// mov.u32         %r1, %tid.x;

// mov.u32         %r2, %tid.y;

// mul.wide.u32    %rl2, %r1, 128;

// mov.u64         %rl3, a;

// add.s64         %rl4, %rl3, %rl2;

// mul.wide.u32    %rl5, %r2, 4;

// add.s64         %rl6, %rl4, %rl5;

// ld.global.f32   %f1, [%rl6]; // so far the same as unoptimized PTX

// ld.global.f32   %f2, [%rl6+4]; // much better

// ld.global.f32   %f3, [%rl6+128]; // much better

// ld.global.f32   %f4, [%rl6+132]; // much better

//

// Another improvement enabled by the LowerGEP flag is to lower a GEP with

// multiple indices to multiple GEPs with a single index.

// Such transformation can have following benefits:

// (1) It can always extract constants in the indices of structure type.

// (2) After such Lowering, there are more optimization opportunities such as

//     CSE, LICM and CGP.

//

// E.g. The following GEPs have multiple indices:

//  BB1:

//    %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3

//    load %p

//    ...

//  BB2:

//    %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2

//    load %p2

//    ...

//

// We can not do CSE to the common part related to index "i64 %i". Lowering

// GEPs can achieve such goals.

//

// This pass will lower a GEP with multiple indices into multiple GEPs with a

// single index:

//  BB1:

//    %2 = mul i64 %i, length_of_10xstruct          ; CSE opportunity

//    %3 = getelementptr i8, ptr %ptr, i64 %2       ; CSE opportunity

//    %4 = mul i64 %j1, length_of_struct

//    %5 = getelementptr i8, ptr %3, i64 %4

//    %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset

//    load %p

//    ...

//  BB2:

//    %8 = mul i64 %i, length_of_10xstruct            ; CSE opportunity

//    %9 = getelementptr i8, ptr %ptr, i64 %8         ; CSE opportunity

//    %10 = mul i64 %j2, length_of_struct

//    %11 = getelementptr i8, ptr %9, i64 %10

//    %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset

//    load %p2

//    ...

//

// Lowering GEPs can also benefit other passes such as LICM and CGP.

// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple

// indices if one of the index is variant. If we lower such GEP into invariant

// parts and variant parts, LICM can hoist/sink those invariant parts.

// CGP (CodeGen Prepare) tries to sink address calculations that match the

// target's addressing modes. A GEP with multiple indices may not match and will

// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of

// them. So we end up with a better addressing mode.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/DepthFirstIterator.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/MemoryBuiltins.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GetElementPtrTypeIterator.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/PassManager.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils/Local.h"

#include <cassert>

#include <cstdint>

#include <string>


using namespace llvm;

using namespace llvm::PatternMatch;


static cl::opt<bool> DisableSeparateConstOffsetFromGEP(

    "disable-separate-const-offset-from-gep", cl::init(false),

    cl::desc("Do not separate the constant offset from a GEP instruction"),

    cl::Hidden);


// Setting this flag may emit false positives when the input module already

// contains dead instructions. Therefore, we set it only in unit tests that are

// free of dead code.

static cl::opt<bool>

    VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),

                     cl::desc("Verify this pass produces no dead code"),

                     cl::Hidden);


namespace {


/// A helper class for separating a constant offset from a GEP index.

///

/// In real programs, a GEP index may be more complicated than a simple addition

/// of something and a constant integer which can be trivially splitted. For

/// example, to split ((a << 3) | 5) + b, we need to search deeper for the

/// constant offset, so that we can separate the index to (a << 3) + b and 5.

///

/// Therefore, this class looks into the expression that computes a given GEP

/// index, and tries to find a constant integer that can be hoisted to the

/// outermost level of the expression as an addition. Not every constant in an

/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +

/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,

/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).

class ConstantOffsetExtractor {

public:

  /// Extracts a constant offset from the given GEP index. It returns the

  /// new index representing the remainder (equal to the original index minus

  /// the constant offset), or nullptr if we cannot extract a constant offset.

  /// \p Idx The given GEP index

  /// \p GEP The given GEP

  /// \p UserChainTail Outputs the tail of UserChain so that we can

  ///                  garbage-collect unused instructions in UserChain.

  /// \p PreservesNUW  Outputs whether the extraction allows preserving the

  ///                  GEP's nuw flag, if it has one.

  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,

                        User *&UserChainTail, bool &PreservesNUW);


  /// Looks for a constant offset from the given GEP index without extracting

  /// it. It returns the numeric value of the extracted constant offset (0 if

  /// failed). The meaning of the arguments are the same as Extract.

  static APInt Find(Value *Idx, GetElementPtrInst *GEP);


private:

  ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)

      : IP(InsertionPt), DL(InsertionPt->getDataLayout()) {}


  /// Searches the expression that computes V for a non-zero constant C s.t.

  /// V can be reassociated into the form V' + C. If the searching is

  /// successful, returns C and update UserChain as a def-use chain from C to V;

  /// otherwise, UserChain is empty.

  ///

  /// \p V            The given expression

  /// \p SignExtended Whether V will be sign-extended in the computation of the

  ///                 GEP index

  /// \p ZeroExtended Whether V will be zero-extended in the computation of the

  ///                 GEP index

  /// \p NonNegative  Whether V is guaranteed to be non-negative. For example,

  ///                 an index of an inbounds GEP is guaranteed to be

  ///                 non-negative. Levaraging this, we can better split

  ///                 inbounds GEPs.

  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);


  /// A helper function to look into both operands of a binary operator.

  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,

                            bool ZeroExtended);


  /// After finding the constant offset C from the GEP index I, we build a new

  /// index I' s.t. I' + C = I. This function builds and returns the new

  /// index I' according to UserChain produced by function "find".

  ///

  /// The building conceptually takes two steps:

  /// 1) iteratively distribute sext/zext/trunc towards the leaves of the

  /// expression tree that computes I

  /// 2) reassociate the expression tree to the form I' + C.

  ///

  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute

  /// sext to a, b and 5 so that we have

  ///   sext(a) + (sext(b) + 5).

  /// Then, we reassociate it to

  ///   (sext(a) + sext(b)) + 5.

  /// Given this form, we know I' is sext(a) + sext(b).

  Value *rebuildWithoutConstOffset();


  /// After the first step of rebuilding the GEP index without the constant

  /// offset, distribute sext/zext/trunc to the operands of all operators in

  /// UserChain. e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>

  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).

  ///

  /// The function also updates UserChain to point to new subexpressions after

  /// distributing sext/zext/trunc. e.g., the old UserChain of the above example

  /// is

  ///   5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),

  /// and the new UserChain is

  ///   zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->

  ///     zext(sext(a)) + (zext(sext(b)) + zext(sext(5))

  ///

  /// \p ChainIndex The index to UserChain. ChainIndex is initially

  ///               UserChain.size() - 1, and is decremented during

  ///               the recursion.

  Value *distributeCastsAndCloneChain(unsigned ChainIndex);


  /// Reassociates the GEP index to the form I' + C and returns I'.

  Value *removeConstOffset(unsigned ChainIndex);


  /// A helper function to apply CastInsts, a list of sext/zext/trunc, to value

  /// V.  e.g., if CastInsts = [sext i32 to i64, zext i16 to i32], this function

  /// returns "sext i32 (zext i16 V to i32) to i64".

  Value *applyCasts(Value *V);


  /// A helper function that returns whether we can trace into the operands

  /// of binary operator BO for a constant offset.

  ///

  /// \p SignExtended Whether BO is surrounded by sext

  /// \p ZeroExtended Whether BO is surrounded by zext

  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound

  ///                array index.

  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,

                    bool NonNegative);


  /// Analyze XOR instruction to extract disjoint constant bits that behave

  /// like addition operations for improved address mode folding.

  APInt extractDisjointBitsFromXor(BinaryOperator *XorInst);


  /// The path from the constant offset to the old GEP index. e.g., if the GEP

  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will

  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and

  /// UserChain[2] will be the entire expression "a * b + (c + 5)".

  ///

  /// This path helps to rebuild the new GEP index.

  SmallVector<User *, 8> UserChain;


  /// A data structure used in rebuildWithoutConstOffset. Contains all

  /// sext/zext/trunc instructions along UserChain.

  SmallVector<CastInst *, 16> CastInsts;


  /// Insertion position of cloned instructions.

  BasicBlock::iterator IP;


  const DataLayout &DL;

};


/// A pass that tries to split every GEP in the function into a variadic

/// base and a constant offset. It is a FunctionPass because searching for the

/// constant offset may inspect other basic blocks.

class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {

public:

  static char ID;


  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)

      : FunctionPass(ID), LowerGEP(LowerGEP) {

    initializeSeparateConstOffsetFromGEPLegacyPassPass(

        *PassRegistry::getPassRegistry());

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<DominatorTreeWrapperPass>();

    AU.addRequired<TargetTransformInfoWrapperPass>();

    AU.addRequired<LoopInfoWrapperPass>();

    AU.setPreservesCFG();

    AU.addRequired<TargetLibraryInfoWrapperPass>();

  }


  bool runOnFunction(Function &F) override;


private:

  bool LowerGEP;

};


/// A pass that tries to split every GEP in the function into a variadic

/// base and a constant offset. It is a FunctionPass because searching for the

/// constant offset may inspect other basic blocks.

class SeparateConstOffsetFromGEP {

public:

  SeparateConstOffsetFromGEP(

      DominatorTree *DT, LoopInfo *LI, TargetLibraryInfo *TLI,

      function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)

      : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}


  bool run(Function &F);


private:

  /// Track the operands of an add or sub.

  using ExprKey = std::pair<Value *, Value *>;


  /// Create a pair for use as a map key for a commutable operation.

  static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {

    if (A < B)

      return {A, B};

    return {B, A};

  }


  /// Tries to split the given GEP into a variadic base and a constant offset,

  /// and returns true if the splitting succeeds.

  bool splitGEP(GetElementPtrInst *GEP);


  /// Tries to reorder the given GEP with the GEP that produces the base if

  /// doing so results in producing a constant offset as the outermost

  /// index.

  bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);


  /// Lower a GEP with multiple indices into multiple GEPs with a single index.

  /// Function splitGEP already split the original GEP into a variadic part and

  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the

  /// variadic part into a set of GEPs with a single index and applies

  /// AccumulativeByteOffset to it.

  /// \p Variadic                  The variadic part of the original GEP.

  /// \p AccumulativeByteOffset    The constant offset.

  void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,

                              const APInt &AccumulativeByteOffset);


  /// Finds the constant offset within each index and accumulates them. If

  /// LowerGEP is true, it finds in indices of both sequential and structure

  /// types, otherwise it only finds in sequential indices. The output

  /// NeedsExtraction indicates whether we successfully find a non-zero constant

  /// offset.

  APInt accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);


  /// Canonicalize array indices to pointer-size integers. This helps to

  /// simplify the logic of splitting a GEP. For example, if a + b is a

  /// pointer-size integer, we have

  ///   gep base, a + b = gep (gep base, a), b

  /// However, this equality may not hold if the size of a + b is smaller than

  /// the pointer size, because LLVM conceptually sign-extends GEP indices to

  /// pointer size before computing the address

  /// (http://llvm.org/docs/LangRef.html#id181).

  ///

  /// This canonicalization is very likely already done in clang and

  /// instcombine. Therefore, the program will probably remain the same.

  ///

  /// Returns true if the module changes.

  ///

  /// Verified in @i32_add in split-gep.ll

  bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);


  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.

  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting

  /// the constant offset. After extraction, it becomes desirable to reunion the

  /// distributed sexts. For example,

  ///

  ///                              &a[sext(i +nsw (j +nsw 5)]

  ///   => distribute              &a[sext(i) +nsw (sext(j) +nsw 5)]

  ///   => constant extraction     &a[sext(i) + sext(j)] + 5

  ///   => reunion                 &a[sext(i +nsw j)] + 5

  bool reuniteExts(Function &F);


  /// A helper that reunites sexts in an instruction.

  bool reuniteExts(Instruction *I);


  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.

  Instruction *findClosestMatchingDominator(

      ExprKey Key, Instruction *Dominatee,

      DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);


  /// Verify F is free of dead code.

  void verifyNoDeadCode(Function &F);


  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);


  // Swap the index operand of two GEP.

  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);


  // Check if it is safe to swap operand of two GEP.

  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,

                            Loop *CurLoop);


  const DataLayout *DL = nullptr;

  DominatorTree *DT = nullptr;

  LoopInfo *LI;

  TargetLibraryInfo *TLI;

  // Retrieved lazily since not always used.

  function_ref<TargetTransformInfo &(Function &)> GetTTI;


  /// Whether to lower a GEP with multiple indices into arithmetic operations or

  /// multiple GEPs with a single index.

  bool LowerGEP;


  DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds;

  DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;

};


} // end anonymous namespace


char SeparateConstOffsetFromGEPLegacyPass::ID = 0;


INITIALIZE_PASS_BEGIN(

    SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",

    "Split GEPs to a variadic base and a constant offset for better CSE", false,

    false)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

INITIALIZE_PASS_END(

    SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",

    "Split GEPs to a variadic base and a constant offset for better CSE", false,

    false)


FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) {

  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);

}


bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,

                                            bool ZeroExtended,

                                            BinaryOperator *BO,

                                            bool NonNegative) {

  // We only consider ADD, SUB and OR, because a non-zero constant found in

  // expressions composed of these operations can be easily hoisted as a

  // constant offset by reassociation.

  if (BO->getOpcode() != Instruction::Add &&

      BO->getOpcode() != Instruction::Sub &&

      BO->getOpcode() != Instruction::Or) {

    return false;

  }


  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);

  // Do not trace into "or" unless it is equivalent to "add nuw nsw".

  // This is the case if the or's disjoint flag is set.

  if (BO->getOpcode() == Instruction::Or &&

      !cast<PossiblyDisjointInst>(BO)->isDisjoint())

    return false;


  // FIXME: We don't currently support constants from the RHS of subs,

  // when we are zero-extended, because we need a way to zero-extended

  // them before they are negated.

  if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)

    return false;


  // In addition, tracing into BO requires that its surrounding sext/zext/trunc

  // (if any) is distributable to both operands.

  //

  // Suppose BO = A op B.

  //  SignExtended | ZeroExtended | Distributable?

  // --------------+--------------+----------------------------------

  //       0       |      0       | true because no s/zext exists

  //       0       |      1       | zext(BO) == zext(A) op zext(B)

  //       1       |      0       | sext(BO) == sext(A) op sext(B)

  //       1       |      1       | zext(sext(BO)) ==

  //               |              |     zext(sext(A)) op zext(sext(B))

  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {

    // If a + b >= 0 and (a >= 0 or b >= 0), then

    //   sext(a + b) = sext(a) + sext(b)

    // even if the addition is not marked nsw.

    //

    // Leveraging this invariant, we can trace into an sext'ed inbound GEP

    // index if the constant offset is non-negative.

    //

    // Verified in @sext_add in split-gep.ll.

    if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {

      if (!ConstLHS->isNegative())

        return true;

    }

    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {

      if (!ConstRHS->isNegative())

        return true;

    }

  }


  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)

  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)

  if (BO->getOpcode() == Instruction::Add ||

      BO->getOpcode() == Instruction::Sub) {

    if (SignExtended && !BO->hasNoSignedWrap())

      return false;

    if (ZeroExtended && !BO->hasNoUnsignedWrap())

      return false;

  }


  return true;

}


APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,

                                                   bool SignExtended,

                                                   bool ZeroExtended) {

  // Save off the current height of the chain, in case we need to restore it.

  size_t ChainLength = UserChain.size();


  // BO being non-negative does not shed light on whether its operands are

  // non-negative. Clear the NonNegative flag here.

  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,

                              /* NonNegative */ false);

  // If we found a constant offset in the left operand, stop and return that.

  // This shortcut might cause us to miss opportunities of combining the

  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.

  // However, such cases are probably already handled by -instcombine,

  // given this pass runs after the standard optimizations.

  if (ConstantOffset != 0) return ConstantOffset;


  // Reset the chain back to where it was when we started exploring this node,

  // since visiting the LHS didn't pan out.

  UserChain.resize(ChainLength);


  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,

                        /* NonNegative */ false);

  // If U is a sub operator, negate the constant offset found in the right

  // operand.

  if (BO->getOpcode() == Instruction::Sub)

    ConstantOffset = -ConstantOffset;


  // If RHS wasn't a suitable candidate either, reset the chain again.

  if (ConstantOffset == 0)

    UserChain.resize(ChainLength);


  return ConstantOffset;

}


APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,

                                    bool ZeroExtended, bool NonNegative) {

  // TODO(jingyue): We could trace into integer/pointer casts, such as

  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only

  // integers because it gives good enough results for our benchmarks.

  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();


  // We cannot do much with Values that are not a User, such as an Argument.

  User *U = dyn_cast<User>(V);

  if (U == nullptr) return APInt(BitWidth, 0);


  APInt ConstantOffset(BitWidth, 0);

  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {

    // Hooray, we found it!

    ConstantOffset = CI->getValue();

  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {

    // Trace into subexpressions for more hoisting opportunities.

    if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))

      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);

    // Handle XOR with disjoint bits that can be treated as addition.

    else if (BO->getOpcode() == Instruction::Xor)

      ConstantOffset = extractDisjointBitsFromXor(BO);

  } else if (isa<TruncInst>(V)) {

    ConstantOffset =

        find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)

            .trunc(BitWidth);

  } else if (isa<SExtInst>(V)) {

    ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,

                          ZeroExtended, NonNegative).sext(BitWidth);

  } else if (isa<ZExtInst>(V)) {

    // As an optimization, we can clear the SignExtended flag because

    // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.

    //

    // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.

    ConstantOffset =

        find(U->getOperand(0), /* SignExtended */ false,

             /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);

  }


  // If we found a non-zero constant offset, add it to the path for

  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't

  // help this optimization.

  if (ConstantOffset != 0)

    UserChain.push_back(U);

  return ConstantOffset;

}


Value *ConstantOffsetExtractor::applyCasts(Value *V) {

  Value *Current = V;

  // CastInsts is built in the use-def order. Therefore, we apply them to V

  // in the reversed order.

  for (CastInst *I : llvm::reverse(CastInsts)) {

    if (Constant *C = dyn_cast<Constant>(Current)) {

      // Try to constant fold the cast.

      Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL);

      if (Current)

        continue;

    }


    Instruction *Cast = I->clone();

    Cast->setOperand(0, Current);

    // In ConstantOffsetExtractor::find we do not analyze nuw/nsw for trunc, so

    // we assume that it is ok to redistribute trunc over add/sub/or. But for

    // example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc

    // nuw (add A, B))). To make such redistributions legal we drop all the

    // poison generating flags from cloned trunc instructions here.

    if (isa<TruncInst>(Cast))

      Cast->dropPoisonGeneratingFlags();

    Cast->insertBefore(*IP->getParent(), IP);

    Current = Cast;

  }

  return Current;

}


Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {

  distributeCastsAndCloneChain(UserChain.size() - 1);

  // Remove all nullptrs (used to be sext/zext/trunc) from UserChain.

  unsigned NewSize = 0;

  for (User *I : UserChain) {

    if (I != nullptr) {

      UserChain[NewSize] = I;

      NewSize++;

    }

  }

  UserChain.resize(NewSize);

  return removeConstOffset(UserChain.size() - 1);

}


Value *

ConstantOffsetExtractor::distributeCastsAndCloneChain(unsigned ChainIndex) {

  User *U = UserChain[ChainIndex];

  if (ChainIndex == 0) {

    assert(isa<ConstantInt>(U));

    // If U is a ConstantInt, applyCasts will return a ConstantInt as well.

    return UserChain[ChainIndex] = cast<ConstantInt>(applyCasts(U));

  }


  if (CastInst *Cast = dyn_cast<CastInst>(U)) {

    assert(

        (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&

        "Only following instructions can be traced: sext, zext & trunc");

    CastInsts.push_back(Cast);

    UserChain[ChainIndex] = nullptr;

    return distributeCastsAndCloneChain(ChainIndex - 1);

  }


  // Function find only trace into BinaryOperator and CastInst.

  BinaryOperator *BO = cast<BinaryOperator>(U);

  // OpNo = which operand of BO is UserChain[ChainIndex - 1]

  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);

  Value *TheOther = applyCasts(BO->getOperand(1 - OpNo));

  Value *NextInChain = distributeCastsAndCloneChain(ChainIndex - 1);


  BinaryOperator *NewBO = nullptr;

  if (OpNo == 0) {

    NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,

                                   BO->getName(), IP);

  } else {

    NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,

                                   BO->getName(), IP);

  }

  return UserChain[ChainIndex] = NewBO;

}


Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {

  if (ChainIndex == 0) {

    assert(isa<ConstantInt>(UserChain[ChainIndex]));

    return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());

  }


  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);

  assert((BO->use_empty() || BO->hasOneUse()) &&

         "distributeCastsAndCloneChain clones each BinaryOperator in "

         "UserChain, so no one should be used more than "

         "once");


  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);

  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);

  Value *NextInChain = removeConstOffset(ChainIndex - 1);

  Value *TheOther = BO->getOperand(1 - OpNo);


  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {

    if (CI->isZero()) {

      // Custom XOR handling for disjoint bits - preserves original XOR

      // with non-disjoint constant bits.

      // TODO: The design should be updated to support partial constant

      // extraction.

      if (BO->getOpcode() == Instruction::Xor)

        return BO;


      // If NextInChain is 0 and not the LHS of a sub, we can simplify the

      // sub-expression to be just TheOther.

      if (!(BO->getOpcode() == Instruction::Sub && OpNo == 0))

        return TheOther;

    }

  }


  BinaryOperator::BinaryOps NewOp = BO->getOpcode();

  if (BO->getOpcode() == Instruction::Or) {

    // Rebuild "or" as "add", because "or" may be invalid for the new

    // expression.

    //

    // For instance, given

    //   a | (b + 5) where a and b + 5 have no common bits,

    // we can extract 5 as the constant offset.

    //

    // However, reusing the "or" in the new index would give us

    //   (a | b) + 5

    // which does not equal a | (b + 5).

    //

    // Replacing the "or" with "add" is fine, because

    //   a | (b + 5) = a + (b + 5) = (a + b) + 5

    NewOp = Instruction::Add;

  }


  BinaryOperator *NewBO;

  if (OpNo == 0) {

    NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);

  } else {

    NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);

  }

  NewBO->takeName(BO);

  return NewBO;

}


/// Analyze XOR instruction to extract disjoint constant bits for address

/// folding

///

/// This function identifies bits in an XOR constant operand that are disjoint

/// from the base operand's known set bits. For these disjoint bits, XOR behaves

/// identically to addition, allowing us to extract them as constant offsets

/// that can be folded into addressing modes.

///

/// Transformation: `Base ^ Const` becomes `(Base ^ NonDisjointBits) +

/// DisjointBits` where DisjointBits = Const & KnownZeros(Base)

///

/// Example with ptr having known-zero low bit:

///   Original: `xor %ptr, 3`    ; 3 = 0b11

///   Analysis: DisjointBits = 3 & KnownZeros(%ptr) = 0b11 & 0b01 = 0b01

///   Result:   `(xor %ptr, 2) + 1` where 1 can be folded into address mode

///

/// \param XorInst The XOR binary operator to analyze

/// \return APInt containing the disjoint bits that can be extracted as offset,

///         or zero if no disjoint bits exist

APInt ConstantOffsetExtractor::extractDisjointBitsFromXor(

    BinaryOperator *XorInst) {

  assert(XorInst && XorInst->getOpcode() == Instruction::Xor &&

         "Expected XOR instruction");


  const unsigned BitWidth = XorInst->getType()->getScalarSizeInBits();

  Value *BaseOperand;

  ConstantInt *XorConstant;


  // Match pattern: xor BaseOperand, Constant.

  if (!match(XorInst, m_Xor(m_Value(BaseOperand), m_ConstantInt(XorConstant))))

    return APInt::getZero(BitWidth);


  // Compute known bits for the base operand.

  const SimplifyQuery SQ(DL);

  const KnownBits BaseKnownBits = computeKnownBits(BaseOperand, SQ);

  const APInt &ConstantValue = XorConstant->getValue();


  // Identify disjoint bits: constant bits that are known zero in base.

  const APInt DisjointBits = ConstantValue & BaseKnownBits.Zero;


  // Early exit if no disjoint bits found.

  if (DisjointBits.isZero())

    return APInt::getZero(BitWidth);


  // Compute the remaining non-disjoint bits that stay in the XOR.

  const APInt NonDisjointBits = ConstantValue & ~DisjointBits;


  // FIXME: Enhance XOR constant extraction to handle nested binary operations.

  // Currently we only extract disjoint bits from the immediate XOR constant,

  // but we could recursively process cases like:

  //   xor (add %base, C1), C2  ->  add %base, (C1 ^ disjoint_bits(C2))

  // This requires careful analysis to ensure the transformation preserves

  // semantics, particularly around sign extension and overflow behavior.


  // Add the non-disjoint constant to the user chain for later transformation

  // This will replace the original constant in the XOR with the new

  // constant.

  UserChain.push_back(ConstantInt::get(XorInst->getType(), NonDisjointBits));

  return DisjointBits;

}


/// A helper function to check if reassociating through an entry in the user

/// chain would invalidate the GEP's nuw flag.


static bool allowsPreservingNUW(const User *U) {

  if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {

    // Binary operations need to be effectively add nuw.

    auto Opcode = BO->getOpcode();

    if (Opcode == BinaryOperator::Or) {

      // Ors are only considered here if they are disjoint. The addition that

      // they represent in this case is NUW.

      assert(cast<PossiblyDisjointInst>(BO)->isDisjoint());

      return true;

    }

    return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap();

  }

  // UserChain can only contain ConstantInt, CastInst, or BinaryOperator.

  // Among the possible CastInsts, only trunc without nuw is a problem: If it

  // is distributed through an add nuw, wrapping may occur:

  // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)"

  if (const TruncInst *TI = dyn_cast<TruncInst>(U))

    return TI->hasNoUnsignedWrap();

  assert((isa<CastInst>(U) || isa<ConstantInt>(U)) && "Unexpected User.");

  return true;

}


Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,

                                        User *&UserChainTail,

                                        bool &PreservesNUW) {

  ConstantOffsetExtractor Extractor(GEP->getIterator());

  // Find a non-zero constant offset first.

  APInt ConstantOffset =

      Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,

                     GEP->isInBounds());

  if (ConstantOffset == 0) {

    UserChainTail = nullptr;

    PreservesNUW = true;

    return nullptr;

  }


  PreservesNUW = all_of(Extractor.UserChain, allowsPreservingNUW);


  // Separates the constant offset from the GEP index.

  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();

  UserChainTail = Extractor.UserChain.back();

  return IdxWithoutConstOffset;

}


APInt ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {

  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.

  return ConstantOffsetExtractor(GEP->getIterator())

      .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,

            GEP->isInBounds());

}


bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(

    GetElementPtrInst *GEP) {

  bool Changed = false;

  Type *PtrIdxTy = DL->getIndexType(GEP->getType());

  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();

       I != E; ++I, ++GTI) {

    // Skip struct member indices which must be i32.

    if (GTI.isSequential()) {

      if ((*I)->getType() != PtrIdxTy) {

        *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom",

                                         GEP->getIterator());

        Changed = true;

      }

    }

  }

  return Changed;

}


APInt SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,

                                                       bool &NeedsExtraction) {

  NeedsExtraction = false;

  unsigned IdxWidth = DL->getIndexTypeSizeInBits(GEP->getType());

  APInt AccumulativeByteOffset(IdxWidth, 0);

  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      // Constant offsets of scalable types are not really constant.

      if (GTI.getIndexedType()->isScalableTy())

        continue;


      // Tries to extract a constant offset from this GEP index.

      APInt ConstantOffset =

          ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP)

              .sextOrTrunc(IdxWidth);

      if (ConstantOffset != 0) {

        NeedsExtraction = true;

        // A GEP may have multiple indices.  We accumulate the extracted

        // constant offset to a byte offset, and later offset the remainder of

        // the original GEP with this byte offset.

        AccumulativeByteOffset +=

            ConstantOffset * APInt(IdxWidth,

                                   GTI.getSequentialElementStride(*DL),

                                   /*IsSigned=*/true, /*ImplicitTrunc=*/true);

      }

    } else if (LowerGEP) {

      StructType *StTy = GTI.getStructType();

      uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();

      // Skip field 0 as the offset is always 0.

      if (Field != 0) {

        NeedsExtraction = true;

        AccumulativeByteOffset +=

            APInt(IdxWidth, DL->getStructLayout(StTy)->getElementOffset(Field),

                  /*IsSigned=*/true, /*ImplicitTrunc=*/true);

      }

    }

  }

  return AccumulativeByteOffset;

}


void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(

    GetElementPtrInst *Variadic, const APInt &AccumulativeByteOffset) {

  IRBuilder<> Builder(Variadic);

  Type *PtrIndexTy = DL->getIndexType(Variadic->getType());


  Value *ResultPtr = Variadic->getOperand(0);

  Loop *L = LI->getLoopFor(Variadic->getParent());

  // Check if the base is not loop invariant or used more than once.

  bool isSwapCandidate =

      L && L->isLoopInvariant(ResultPtr) &&

      !hasMoreThanOneUseInLoop(ResultPtr, L);

  Value *FirstResult = nullptr;


  gep_type_iterator GTI = gep_type_begin(*Variadic);

  // Create an ugly GEP for each sequential index. We don't create GEPs for

  // structure indices, as they are accumulated in the constant offset index.

  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      Value *Idx = Variadic->getOperand(I);

      // Skip zero indices.

      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))

        if (CI->isZero())

          continue;


      APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),

                                GTI.getSequentialElementStride(*DL));

      // Scale the index by element size.

      if (ElementSize != 1) {

        if (ElementSize.isPowerOf2()) {

          Idx = Builder.CreateShl(

              Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));

        } else {

          Idx =

              Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));

        }

      }

      // Create an ugly GEP with a single index for each index.

      ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep");

      if (FirstResult == nullptr)

        FirstResult = ResultPtr;

    }

  }


  // Create a GEP with the constant offset index.

  if (AccumulativeByteOffset != 0) {

    Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);

    ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep");

  } else

    isSwapCandidate = false;


  // If we created a GEP with constant index, and the base is loop invariant,

  // then we swap the first one with it, so LICM can move constant GEP out

  // later.

  auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);

  auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);

  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))

    swapGEPOperand(FirstGEP, SecondGEP);


  Variadic->replaceAllUsesWith(ResultPtr);

  Variadic->eraseFromParent();

}


bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,

                                            TargetTransformInfo &TTI) {

  auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());

  if (!PtrGEP)

    return false;


  bool NestedNeedsExtraction;

  APInt NestedByteOffset = accumulateByteOffset(PtrGEP, NestedNeedsExtraction);

  if (!NestedNeedsExtraction)

    return false;


  unsigned AddrSpace = PtrGEP->getPointerAddressSpace();

  if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),

                                 /*BaseGV=*/nullptr,

                                 NestedByteOffset.getSExtValue(),

                                 /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))

    return false;


  bool GEPInBounds = GEP->isInBounds();

  bool PtrGEPInBounds = PtrGEP->isInBounds();

  bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;

  if (IsChainInBounds) {

    auto IsKnownNonNegative = [this](Value *V) {

      return isKnownNonNegative(V, *DL);

    };

    IsChainInBounds &= all_of(GEP->indices(), IsKnownNonNegative);

    if (IsChainInBounds)

      IsChainInBounds &= all_of(PtrGEP->indices(), IsKnownNonNegative);

  }


  IRBuilder<> Builder(GEP);

  // For trivial GEP chains, we can swap the indices.

  Value *NewSrc = Builder.CreateGEP(

      GEP->getSourceElementType(), PtrGEP->getPointerOperand(),

      SmallVector<Value *, 4>(GEP->indices()), "", IsChainInBounds);

  Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc,

                                    SmallVector<Value *, 4>(PtrGEP->indices()),

                                    "", IsChainInBounds);

  GEP->replaceAllUsesWith(NewGEP);

  RecursivelyDeleteTriviallyDeadInstructions(GEP);

  return true;

}


bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {

  // Skip vector GEPs.

  if (GEP->getType()->isVectorTy())

    return false;


  // If the base of this GEP is a ptradd of a constant, lets pass the constant

  // along. This ensures that when we have a chain of GEPs the constant

  // offset from each is accumulated.

  Value *NewBase;

  const APInt *BaseOffset;

  const bool ExtractBase =

      match(GEP->getPointerOperand(),

            m_PtrAdd(m_Value(NewBase), m_APInt(BaseOffset)));


  unsigned IdxWidth = DL->getIndexTypeSizeInBits(GEP->getType());

  const APInt BaseByteOffset =

      ExtractBase ? BaseOffset->sextOrTrunc(IdxWidth) : APInt(IdxWidth, 0);


  // The backend can already nicely handle the case where all indices are

  // constant.

  if (GEP->hasAllConstantIndices() && !ExtractBase)

    return false;


  bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);


  bool NeedsExtraction;

  APInt AccumulativeByteOffset =

      BaseByteOffset + accumulateByteOffset(GEP, NeedsExtraction);


  TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());


  if (!NeedsExtraction && !ExtractBase) {

    Changed |= reorderGEP(GEP, TTI);

    return Changed;

  }


  // If LowerGEP is disabled, before really splitting the GEP, check whether the

  // backend supports the addressing mode we are about to produce. If no, this

  // splitting probably won't be beneficial.

  // If LowerGEP is enabled, even the extracted constant offset can not match

  // the addressing mode, we can still do optimizations to other lowered parts

  // of variable indices. Therefore, we don't check for addressing modes in that

  // case.

  if (!LowerGEP) {

    unsigned AddrSpace = GEP->getPointerAddressSpace();

    if (!TTI.isLegalAddressingMode(

            GEP->getResultElementType(),

            /*BaseGV=*/nullptr, AccumulativeByteOffset.getSExtValue(),

            /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) {

      return Changed;

    }

  }


  // Track information for preserving GEP flags.

  bool AllOffsetsNonNegative = AccumulativeByteOffset.isNonNegative();

  bool AllNUWPreserved = GEP->hasNoUnsignedWrap();

  bool NewGEPInBounds = GEP->isInBounds();

  bool NewGEPNUSW = GEP->hasNoUnsignedSignedWrap();


  // Remove the constant offset in each sequential index. The resultant GEP

  // computes the variadic base.

  // Notice that we don't remove struct field indices here. If LowerGEP is

  // disabled, a structure index is not accumulated and we still use the old

  // one. If LowerGEP is enabled, a structure index is accumulated in the

  // constant offset. LowerToSingleIndexGEPs will later handle the constant

  // offset and won't need a new structure index.

  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      // Constant offsets of scalable types are not really constant.

      if (GTI.getIndexedType()->isScalableTy())

        continue;


      // Splits this GEP index into a variadic part and a constant offset, and

      // uses the variadic part as the new index.

      Value *Idx = GEP->getOperand(I);

      User *UserChainTail;

      bool PreservesNUW;

      Value *NewIdx = ConstantOffsetExtractor::Extract(Idx, GEP, UserChainTail,

                                                       PreservesNUW);

      if (NewIdx != nullptr) {

        // Switches to the index with the constant offset removed.

        GEP->setOperand(I, NewIdx);

        // After switching to the new index, we can garbage-collect UserChain

        // and the old index if they are not used.

        RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);

        RecursivelyDeleteTriviallyDeadInstructions(Idx);

        Idx = NewIdx;

        AllNUWPreserved &= PreservesNUW;

      }

      AllOffsetsNonNegative =

          AllOffsetsNonNegative && isKnownNonNegative(Idx, *DL);

    }

  }

  if (ExtractBase) {

    GEPOperator *Base = cast<GEPOperator>(GEP->getPointerOperand());

    AllNUWPreserved &= Base->hasNoUnsignedWrap();

    NewGEPInBounds &= Base->isInBounds();

    NewGEPNUSW &= Base->hasNoUnsignedSignedWrap();

    AllOffsetsNonNegative &= BaseByteOffset.isNonNegative();


    GEP->setOperand(0, NewBase);

    RecursivelyDeleteTriviallyDeadInstructions(Base);

  }


  // Clear the inbounds attribute because the new index may be off-bound.

  // e.g.,

  //

  //   b     = add i64 a, 5

  //   addr  = gep inbounds float, float* p, i64 b

  //

  // is transformed to:

  //

  //   addr2 = gep float, float* p, i64 a ; inbounds removed

  //   addr  = gep float, float* addr2, i64 5 ; inbounds removed

  //

  // If a is -4, although the old index b is in bounds, the new index a is

  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the

  // inbounds keyword is not present, the offsets are added to the base

  // address with silently-wrapping two's complement arithmetic".

  // Therefore, the final code will be a semantically equivalent.

  GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();


  // If the initial GEP was inbounds/nusw and all variable indices and the

  // accumulated offsets are non-negative, they can be added in any order and

  // the intermediate results are in bounds and don't overflow in a nusw sense.

  // So, we can preserve the inbounds/nusw flag for both GEPs.

  bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative;


  // If the initial GEP was NUW and all operations that we reassociate were NUW

  // additions, the resulting GEPs are also NUW.

  if (AllNUWPreserved) {

    NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap();

    // If the initial GEP additionally had NUSW (or inbounds, which implies

    // NUSW), we know that the indices in the initial GEP must all have their

    // signbit not set. For indices that are the result of NUW adds, the

    // add-operands therefore also don't have their signbit set. Therefore, all

    // indices of the resulting GEPs are non-negative -> we can preserve

    // the inbounds/nusw flag.

    CanPreserveInBoundsNUSW |= NewGEPNUSW;

  }


  if (CanPreserveInBoundsNUSW) {

    if (NewGEPInBounds)

      NewGEPFlags |= GEPNoWrapFlags::inBounds();

    else if (NewGEPNUSW)

      NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap();

  }


  GEP->setNoWrapFlags(NewGEPFlags);


  // Lowers a GEP to GEPs with a single index.

  if (LowerGEP) {

    lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);

    return true;

  }


  // No need to create another GEP if the accumulative byte offset is 0.

  if (AccumulativeByteOffset == 0)

    return true;


  // Offsets the base with the accumulative byte offset.

  //

  //   %gep                        ; the base

  //   ... %gep ...

  //

  // => add the offset

  //

  //   %gep2                       ; clone of %gep

  //   %new.gep = gep i8, %gep2, %offset

  //   %gep                        ; will be removed

  //   ... %gep ...

  //

  // => replace all uses of %gep with %new.gep and remove %gep

  //

  //   %gep2                       ; clone of %gep

  //   %new.gep = gep i8, %gep2, %offset

  //   ... %new.gep ...

  Instruction *NewGEP = GEP->clone();

  NewGEP->insertBefore(GEP->getIterator());


  Type *PtrIdxTy = DL->getIndexType(GEP->getType());

  IRBuilder<> Builder(GEP);

  NewGEP = cast<Instruction>(Builder.CreatePtrAdd(

      NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset),

      GEP->getName(), NewGEPFlags));

  NewGEP->copyMetadata(*GEP);


  GEP->replaceAllUsesWith(NewGEP);

  GEP->eraseFromParent();


  return true;

}


bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {

  if (skipFunction(F))

    return false;

  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);

  auto GetTTI = [this](Function &F) -> TargetTransformInfo & {

    return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

  };

  SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);

  return Impl.run(F);

}


bool SeparateConstOffsetFromGEP::run(Function &F) {

  if (DisableSeparateConstOffsetFromGEP)

    return false;


  DL = &F.getDataLayout();

  bool Changed = false;


  ReversePostOrderTraversal<Function *> RPOT(&F);

  for (BasicBlock *B : RPOT) {

    if (!DT->isReachableFromEntry(B))

      continue;


    for (Instruction &I : llvm::make_early_inc_range(*B))

      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))

        Changed |= splitGEP(GEP);

    // No need to split GEP ConstantExprs because all its indices are constant

    // already.

  }


  Changed |= reuniteExts(F);


  if (VerifyNoDeadCode)

    verifyNoDeadCode(F);


  return Changed;

}


Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(

    ExprKey Key, Instruction *Dominatee,

    DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {

  auto Pos = DominatingExprs.find(Key);

  if (Pos == DominatingExprs.end())

    return nullptr;


  auto &Candidates = Pos->second;

  // Because we process the basic blocks in pre-order of the dominator tree, a

  // candidate that doesn't dominate the current instruction won't dominate any

  // future instruction either. Therefore, we pop it out of the stack. This

  // optimization makes the algorithm O(n).

  while (!Candidates.empty()) {

    Instruction *Candidate = Candidates.back();

    if (DT->dominates(Candidate, Dominatee))

      return Candidate;

    Candidates.pop_back();

  }

  return nullptr;

}


bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {

  if (!I->getType()->isIntOrIntVectorTy())

    return false;


  //   Dom: LHS+RHS

  //   I: sext(LHS)+sext(RHS)

  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).

  // TODO: handle zext

  Value *LHS = nullptr, *RHS = nullptr;

  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {

    if (LHS->getType() == RHS->getType()) {

      ExprKey Key = createNormalizedCommutablePair(LHS, RHS);

      if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {

        Instruction *NewSExt =

            new SExtInst(Dom, I->getType(), "", I->getIterator());

        NewSExt->takeName(I);

        I->replaceAllUsesWith(NewSExt);

        NewSExt->setDebugLoc(I->getDebugLoc());

        RecursivelyDeleteTriviallyDeadInstructions(I);

        return true;

      }

    }

  } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {

    if (LHS->getType() == RHS->getType()) {

      if (auto *Dom =

              findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {

        Instruction *NewSExt =

            new SExtInst(Dom, I->getType(), "", I->getIterator());

        NewSExt->takeName(I);

        I->replaceAllUsesWith(NewSExt);

        NewSExt->setDebugLoc(I->getDebugLoc());

        RecursivelyDeleteTriviallyDeadInstructions(I);

        return true;

      }

    }

  }


  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.

  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {

    if (programUndefinedIfPoison(I)) {

      ExprKey Key = createNormalizedCommutablePair(LHS, RHS);

      DominatingAdds[Key].push_back(I);

    }

  } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {

    if (programUndefinedIfPoison(I))

      DominatingSubs[{LHS, RHS}].push_back(I);

  }

  return false;

}


bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {

  bool Changed = false;

  DominatingAdds.clear();

  DominatingSubs.clear();

  for (const auto Node : depth_first(DT)) {

    BasicBlock *BB = Node->getBlock();

    for (Instruction &I : llvm::make_early_inc_range(*BB))

      Changed |= reuniteExts(&I);

  }

  return Changed;

}


void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {

  for (BasicBlock &B : F) {

    for (Instruction &I : B) {

      if (isInstructionTriviallyDead(&I)) {

        std::string ErrMessage;

        raw_string_ostream RSO(ErrMessage);

        RSO << "Dead instruction detected!\n" << I << "\n";

        llvm_unreachable(RSO.str().c_str());

      }

    }

  }

}


bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(

    GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {

  if (!FirstGEP || !FirstGEP->hasOneUse())

    return false;


  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())

    return false;


  if (FirstGEP == SecondGEP)

    return false;


  unsigned FirstNum = FirstGEP->getNumOperands();

  unsigned SecondNum = SecondGEP->getNumOperands();

  // Give up if the number of operands are not 2.

  if (FirstNum != SecondNum || FirstNum != 2)

    return false;


  Value *FirstBase = FirstGEP->getOperand(0);

  Value *SecondBase = SecondGEP->getOperand(0);

  Value *FirstOffset = FirstGEP->getOperand(1);

  // Give up if the index of the first GEP is loop invariant.

  if (CurLoop->isLoopInvariant(FirstOffset))

    return false;


  // Give up if base doesn't have same type.

  if (FirstBase->getType() != SecondBase->getType())

    return false;


  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);


  // Check if the second operand of first GEP has constant coefficient.

  // For an example, for the following code,  we won't gain anything by

  // hoisting the second GEP out because the second GEP can be folded away.

  //   %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256

  //   %67 = shl i64 %scevgep.sum.ur159, 2

  //   %uglygep160 = getelementptr i8* %65, i64 %67

  //   %uglygep161 = getelementptr i8* %uglygep160, i64 -1024


  // Skip constant shift instruction which may be generated by Splitting GEPs.

  if (FirstOffsetDef && FirstOffsetDef->isShift() &&

      isa<ConstantInt>(FirstOffsetDef->getOperand(1)))

    FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));


  // Give up if FirstOffsetDef is an Add or Sub with constant.

  // Because it may not profitable at all due to constant folding.

  if (FirstOffsetDef)

    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {

      unsigned opc = BO->getOpcode();

      if ((opc == Instruction::Add || opc == Instruction::Sub) &&

          (isa<ConstantInt>(BO->getOperand(0)) ||

           isa<ConstantInt>(BO->getOperand(1))))

        return false;

    }

  return true;

}


bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {

  // TODO: Could look at uses of globals, but we need to make sure we are

  // looking at the correct function.

  if (isa<Constant>(V))

    return false;


  int UsesInLoop = 0;

  for (User *U : V->users()) {

    if (Instruction *User = dyn_cast<Instruction>(U))

      if (L->contains(User))

        if (++UsesInLoop > 1)

          return true;

  }

  return false;

}


void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,

                                                GetElementPtrInst *Second) {

  Value *Offset1 = First->getOperand(1);

  Value *Offset2 = Second->getOperand(1);

  First->setOperand(1, Offset2);

  Second->setOperand(1, Offset1);


  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.

  const DataLayout &DAL = First->getDataLayout();

  APInt Offset(DAL.getIndexSizeInBits(

                   cast<PointerType>(First->getType())->getAddressSpace()),

               0);

  Value *NewBase =

      First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);

  uint64_t ObjectSize;

  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||

     Offset.ugt(ObjectSize)) {

    // TODO(gep_nowrap): Make flag preservation more precise.

    First->setNoWrapFlags(GEPNoWrapFlags::none());

    Second->setNoWrapFlags(GEPNoWrapFlags::none());

  } else

    First->setIsInBounds(true);

}


void SeparateConstOffsetFromGEPPass::printPipeline(

    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {

  static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> *>(this)

      ->printPipeline(OS, MapClassName2PassName);

  OS << '<';

  if (LowerGEP)

    OS << "lower-gep";

  OS << '>';

}


PreservedAnalyses


SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {

  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);

  auto *LI = &AM.getResult<LoopAnalysis>(F);

  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);

  auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {

    return AM.getResult<TargetIRAnalysis>(F);

  };

  SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);

  if (!Impl.run(F))

    return PreservedAnalyses::all();

  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  return PA;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

CommandLine.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Domain::NonNegative
@ NonNegative
Definition CorrelatedValuePropagation.cpp:756

DataLayout.h

DenseMap.h
This file defines the DenseMap class.

DepthFirstIterator.h
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.

DerivedTypes.h

Dominators.h

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:103

GetElementPtrTypeIterator.h

GEP
Hexagon Common GEP
Definition HexagonCommonGEP.cpp:164

IRBuilder.h

BasicBlock.h

Constant.h

Function.h

Instruction.h

Module.h
Module.h This file contains the declarations for the Module class.

PassManager.h
This header defines various interfaces for pass management in LLVM.

Type.h

User.h

Value.h

InitializePasses.h

InstrTypes.h

Instructions.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

LoopInfo.h

Find
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
Definition MCSubtargetInfo.cpp:27

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MemoryBuiltins.h

Field
OptimizedStructLayoutField Field
Definition OptimizedStructLayout.cpp:18

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

Pass.h

PatternMatch.h

Scalar.h

DisableSeparateConstOffsetFromGEP
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)

allowsPreservingNUW
static bool allowsPreservingNUW(const User *U)
A helper function to check if reassociating through an entry in the user chain would invalidate the G...
Definition SeparateConstOffsetFromGEP.cpp:833

VerifyNoDeadCode
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)

SeparateConstOffsetFromGEP.h

SmallVector.h
This file defines the SmallVector class.

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

ValueTracking.h

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::zext
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023

llvm::APInt::trunc
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936

llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381

llvm::APInt::sextOrTrunc
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1052

llvm::APInt::logBase2
unsigned logBase2() const
Definition APInt.h:1776

llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335

llvm::APInt::sext
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996

llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441

llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:411

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170

llvm::BinaryOperator
Definition InstrTypes.h:171

llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition InstrTypes.h:374

llvm::BinaryOperator::Create
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition Instructions.cpp:2704

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::CastInst::CreateIntegerCast
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition Instructions.cpp:3128

llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159

llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
The size in bits of indices used for address calculation in getelementptr and for addresses in the gi...
Definition DataLayout.h:498

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321

llvm::DominatorTree::isReachableFromEntry
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition Dominators.cpp:334

llvm::DominatorTree::dominates
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition Dominators.cpp:135

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::GEPNoWrapFlags::inBounds
static GEPNoWrapFlags inBounds()
Definition GEPNoWrapFlags.h:50

llvm::GEPNoWrapFlags::noUnsignedWrap
static GEPNoWrapFlags noUnsignedWrap()
Definition GEPNoWrapFlags.h:56

llvm::GEPNoWrapFlags::noUnsignedSignedWrap
static GEPNoWrapFlags noUnsignedSignedWrap()
Definition GEPNoWrapFlags.h:53

llvm::GEPNoWrapFlags::none
static GEPNoWrapFlags none()
Definition GEPNoWrapFlags.h:46

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition Instructions.h:950

llvm::GetElementPtrInst::setNoWrapFlags
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Definition Instructions.cpp:1622

llvm::Instruction::hasNoUnsignedWrap
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
Definition Instruction.cpp:415

llvm::Instruction::hasNoSignedWrap
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
Definition Instruction.cpp:422

llvm::Instruction::insertBefore
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
Definition Instruction.cpp:119

llvm::Instruction::isShift
bool isShift() const
Definition Instruction.h:320

llvm::Instruction::dropPoisonGeneratingFlags
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition Instruction.cpp:438

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition Instruction.h:510

llvm::Instruction::copyMetadata
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition Instruction.cpp:1386

llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569

llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition GenericLoopInfo.h:606

llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596

llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition PassRegistry.cpp:23

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::ScalarEvolutionWrapperPass
Definition ScalarEvolution.h:2412

llvm::SeparateConstOffsetFromGEPPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition SeparateConstOffsetFromGEP.cpp:1475

llvm::SeparateConstOffsetFromGEPPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition SeparateConstOffsetFromGEP.cpp:1486

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition TargetTransformInfo.h:2080

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition TargetLibraryInfo.h:602

llvm::TargetLibraryInfoWrapperPass
Definition TargetLibraryInfo.h:627

llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition TargetTransformInfo.h:2137

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:278

llvm::TargetTransformInfo::isLegalAddressingMode
LLVM_ABI bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition TargetTransformInfo.cpp:435

llvm::TruncInst
This class represents a truncation of integer types.
Definition Instructions.h:4570

llvm::Type::getIntegerBitWidth
LLVM_ABI unsigned getIntegerBitWidth() const
Definition DerivedTypes.h:99

llvm::Type::isScalableTy
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230

llvm::User
Definition User.h:44

llvm::User::op_iterator
Use * op_iterator
Definition User.h:254

llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition User.h:212

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::User::getNumOperands
unsigned getNumOperands() const
Definition User.h:229

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition Value.h:759

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439

llvm::Value::use_empty
bool use_empty() const
Definition Value.h:346

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322

llvm::Value::takeName
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403

llvm::cl::opt
Definition CommandLine.h:1454

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition STLFunctionalExtras.h:37

llvm::generic_gep_type_iterator::isSequential
bool isSequential() const
Definition GetElementPtrTypeIterator.h:147

llvm::generic_gep_type_iterator::getStructType
StructType * getStructType() const
Definition GetElementPtrTypeIterator.h:164

llvm::generic_gep_type_iterator::getSequentialElementStride
TypeSize getSequentialElementStride(const DataLayout &DL) const
Definition GetElementPtrTypeIterator.h:154

llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition GetElementPtrTypeIterator.h:102

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

Changed
Changed
Definition ObjCARCOpts.cpp:2369

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:245

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::M68k::MemAddrModeKind::U
@ U
Definition M68kBaseInfo.h:61

llvm::M68k::MemAddrModeKind::V
@ V
Definition M68kBaseInfo.h:63

llvm::M68k::MemAddrModeKind::L
@ L
Definition M68kBaseInfo.h:70

llvm::MCID::Variadic
@ Variadic
Definition MCInstrDesc.h:151

llvm::PatternMatch
Definition PatternMatch.h:47

llvm::PatternMatch::m_PtrAdd
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
Definition PatternMatch.h:2102

llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition PatternMatch.h:1211

llvm::PatternMatch::m_APInt
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition PatternMatch.h:305

llvm::PatternMatch::m_Xor
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition PatternMatch.h:1337

llvm::PatternMatch::m_NSWSub
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition PatternMatch.h:1445

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:49

llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition PatternMatch.h:175

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:99

llvm::PatternMatch::m_NSWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition PatternMatch.h:1429

llvm::PatternMatch::m_SExt
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
Definition PatternMatch.h:2278

llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition PatternMatch.h:1223

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition PointerTypeAnalysis.cpp:205

llvm::objcarc::ARCInstKind::User
@ User
could "use" a pointer
Definition ObjCARCInstKind.h:52

llvm::rdf::Node
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::Offset
@ Offset
Definition DWP.cpp:532

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737

llvm::RecursivelyDeleteTriviallyDeadInstructions
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632

llvm::initializeSeparateConstOffsetFromGEPLegacyPassPass
LLVM_ABI void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::isInstructionTriviallyDead
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402

llvm::getObjectSize
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition MemoryBuiltins.cpp:581

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406

llvm::computeKnownBits
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition ValueTracking.cpp:152

llvm::programUndefinedIfPoison
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
Definition ValueTracking.cpp:8302

llvm::gep_type_iterator
generic_gep_type_iterator<> gep_type_iterator
Definition GetElementPtrTypeIterator.h:171

llvm::ConstantFoldCastOperand
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
Definition ConstantFolding.cpp:1485

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1131

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::Key
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Definition PassManager.h:667

llvm::createSeparateConstOffsetFromGEPPass
LLVM_ABI FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition SeparateConstOffsetFromGEP.cpp:477

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:273

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition GetElementPtrTypeIterator.h:173

llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition DepthFirstIterator.h:233

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:563

llvm::isKnownNonNegative
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
Definition ValueTracking.cpp:281

raw_ostream.h

llvm::KnownBits::Zero
APInt Zero
Definition KnownBits.h:25

llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70

llvm::cl::desc
Definition CommandLine.h:410