doxygen/SeparateConstOffsetFromGEP_8cpp_source.html

//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// Loop unrolling may create many similar GEPs for array accesses.

// e.g., a 2-level loop

//

// float a[32][32]; // global variable

//

// for (int i = 0; i < 2; ++i) {

//   for (int j = 0; j < 2; ++j) {

//     ...

//     ... = a[x + i][y + j];

//     ...

//   }

// }

//

// will probably be unrolled to:

//

// gep %a, 0, %x, %y; load

// gep %a, 0, %x, %y + 1; load

// gep %a, 0, %x + 1, %y; load

// gep %a, 0, %x + 1, %y + 1; load

//

// LLVM's GVN does not use partial redundancy elimination yet, and is thus

// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs

// significant slowdown in targets with limited addressing modes. For instance,

// because the PTX target does not support the reg+reg addressing mode, the

// NVPTX backend emits PTX code that literally computes the pointer address of

// each GEP, wasting tons of registers. It emits the following PTX for the

// first load and similar PTX for other loads.

//

// mov.u32         %r1, %x;

// mov.u32         %r2, %y;

// mul.wide.u32    %rl2, %r1, 128;

// mov.u64         %rl3, a;

// add.s64         %rl4, %rl3, %rl2;

// mul.wide.u32    %rl5, %r2, 4;

// add.s64         %rl6, %rl4, %rl5;

// ld.global.f32   %f1, [%rl6];

//

// To reduce the register pressure, the optimization implemented in this file

// merges the common part of a group of GEPs, so we can compute each pointer

// address by adding a simple offset to the common part, saving many registers.

//

// It works by splitting each GEP into a variadic base and a constant offset.

// The variadic base can be computed once and reused by multiple GEPs, and the

// constant offsets can be nicely folded into the reg+immediate addressing mode

// (supported by most targets) without using any extra register.

//

// For instance, we transform the four GEPs and four loads in the above example

// into:

//

// base = gep a, 0, x, y

// load base

// laod base + 1  * sizeof(float)

// load base + 32 * sizeof(float)

// load base + 33 * sizeof(float)

//

// Given the transformed IR, a backend that supports the reg+immediate

// addressing mode can easily fold the pointer arithmetics into the loads. For

// example, the NVPTX backend can easily fold the pointer arithmetics into the

// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.

//

// mov.u32         %r1, %tid.x;

// mov.u32         %r2, %tid.y;

// mul.wide.u32    %rl2, %r1, 128;

// mov.u64         %rl3, a;

// add.s64         %rl4, %rl3, %rl2;

// mul.wide.u32    %rl5, %r2, 4;

// add.s64         %rl6, %rl4, %rl5;

// ld.global.f32   %f1, [%rl6]; // so far the same as unoptimized PTX

// ld.global.f32   %f2, [%rl6+4]; // much better

// ld.global.f32   %f3, [%rl6+128]; // much better

// ld.global.f32   %f4, [%rl6+132]; // much better

//

// Another improvement enabled by the LowerGEP flag is to lower a GEP with

// multiple indices to either multiple GEPs with a single index or arithmetic

// operations (depending on whether the target uses alias analysis in codegen).

// Such transformation can have following benefits:

// (1) It can always extract constants in the indices of structure type.

// (2) After such Lowering, there are more optimization opportunities such as

//     CSE, LICM and CGP.

//

// E.g. The following GEPs have multiple indices:

//  BB1:

//    %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3

//    load %p

//    ...

//  BB2:

//    %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2

//    load %p2

//    ...

//

// We can not do CSE to the common part related to index "i64 %i". Lowering

// GEPs can achieve such goals.

// If the target does not use alias analysis in codegen, this pass will

// lower a GEP with multiple indices into arithmetic operations:

//  BB1:

//    %1 = ptrtoint [10 x %struct]* %ptr to i64    ; CSE opportunity

//    %2 = mul i64 %i, length_of_10xstruct         ; CSE opportunity

//    %3 = add i64 %1, %2                          ; CSE opportunity

//    %4 = mul i64 %j1, length_of_struct

//    %5 = add i64 %3, %4

//    %6 = add i64 %3, struct_field_3              ; Constant offset

//    %p = inttoptr i64 %6 to i32*

//    load %p

//    ...

//  BB2:

//    %7 = ptrtoint [10 x %struct]* %ptr to i64    ; CSE opportunity

//    %8 = mul i64 %i, length_of_10xstruct         ; CSE opportunity

//    %9 = add i64 %7, %8                          ; CSE opportunity

//    %10 = mul i64 %j2, length_of_struct

//    %11 = add i64 %9, %10

//    %12 = add i64 %11, struct_field_2            ; Constant offset

//    %p = inttoptr i64 %12 to i32*

//    load %p2

//    ...

//

// If the target uses alias analysis in codegen, this pass will lower a GEP

// with multiple indices into multiple GEPs with a single index:

//  BB1:

//    %1 = bitcast [10 x %struct]* %ptr to i8*     ; CSE opportunity

//    %2 = mul i64 %i, length_of_10xstruct         ; CSE opportunity

//    %3 = getelementptr i8* %1, i64 %2            ; CSE opportunity

//    %4 = mul i64 %j1, length_of_struct

//    %5 = getelementptr i8* %3, i64 %4

//    %6 = getelementptr i8* %5, struct_field_3    ; Constant offset

//    %p = bitcast i8* %6 to i32*

//    load %p

//    ...

//  BB2:

//    %7 = bitcast [10 x %struct]* %ptr to i8*     ; CSE opportunity

//    %8 = mul i64 %i, length_of_10xstruct         ; CSE opportunity

//    %9 = getelementptr i8* %7, i64 %8            ; CSE opportunity

//    %10 = mul i64 %j2, length_of_struct

//    %11 = getelementptr i8* %9, i64 %10

//    %12 = getelementptr i8* %11, struct_field_2  ; Constant offset

//    %p2 = bitcast i8* %12 to i32*

//    load %p2

//    ...

//

// Lowering GEPs can also benefit other passes such as LICM and CGP.

// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple

// indices if one of the index is variant. If we lower such GEP into invariant

// parts and variant parts, LICM can hoist/sink those invariant parts.

// CGP (CodeGen Prepare) tries to sink address calculations that match the

// target's addressing modes. A GEP with multiple indices may not match and will

// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of

// them. So we end up with a better addressing mode.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/DepthFirstIterator.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/MemoryBuiltins.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GetElementPtrTypeIterator.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/PassManager.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils/Local.h"

#include <cassert>

#include <cstdint>

#include <string>


using namespace llvm;

using namespace llvm::PatternMatch;


static cl::opt<bool> DisableSeparateConstOffsetFromGEP(

    "disable-separate-const-offset-from-gep", cl::init(false),

    cl::desc("Do not separate the constant offset from a GEP instruction"),

    cl::Hidden);


// Setting this flag may emit false positives when the input module already

// contains dead instructions. Therefore, we set it only in unit tests that are

// free of dead code.

static cl::opt<bool>

    VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),

                     cl::desc("Verify this pass produces no dead code"),

                     cl::Hidden);


namespace {


/// A helper class for separating a constant offset from a GEP index.

///

/// In real programs, a GEP index may be more complicated than a simple addition

/// of something and a constant integer which can be trivially splitted. For

/// example, to split ((a << 3) | 5) + b, we need to search deeper for the

/// constant offset, so that we can separate the index to (a << 3) + b and 5.

///

/// Therefore, this class looks into the expression that computes a given GEP

/// index, and tries to find a constant integer that can be hoisted to the

/// outermost level of the expression as an addition. Not every constant in an

/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +

/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,

/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).

class ConstantOffsetExtractor {

public:

  /// Extracts a constant offset from the given GEP index. It returns the

  /// new index representing the remainder (equal to the original index minus

  /// the constant offset), or nullptr if we cannot extract a constant offset.

  /// \p Idx The given GEP index

  /// \p GEP The given GEP

  /// \p UserChainTail Outputs the tail of UserChain so that we can

  ///                  garbage-collect unused instructions in UserChain.

  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,

                        User *&UserChainTail);


  /// Looks for a constant offset from the given GEP index without extracting

  /// it. It returns the numeric value of the extracted constant offset (0 if

  /// failed). The meaning of the arguments are the same as Extract.

  static int64_t Find(Value *Idx, GetElementPtrInst *GEP);


private:

  ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)

      : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()) {}


  /// Searches the expression that computes V for a non-zero constant C s.t.

  /// V can be reassociated into the form V' + C. If the searching is

  /// successful, returns C and update UserChain as a def-use chain from C to V;

  /// otherwise, UserChain is empty.

  ///

  /// \p V            The given expression

  /// \p SignExtended Whether V will be sign-extended in the computation of the

  ///                 GEP index

  /// \p ZeroExtended Whether V will be zero-extended in the computation of the

  ///                 GEP index

  /// \p NonNegative  Whether V is guaranteed to be non-negative. For example,

  ///                 an index of an inbounds GEP is guaranteed to be

  ///                 non-negative. Levaraging this, we can better split

  ///                 inbounds GEPs.

  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);


  /// A helper function to look into both operands of a binary operator.

  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,

                            bool ZeroExtended);


  /// After finding the constant offset C from the GEP index I, we build a new

  /// index I' s.t. I' + C = I. This function builds and returns the new

  /// index I' according to UserChain produced by function "find".

  ///

  /// The building conceptually takes two steps:

  /// 1) iteratively distribute s/zext towards the leaves of the expression tree

  /// that computes I

  /// 2) reassociate the expression tree to the form I' + C.

  ///

  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute

  /// sext to a, b and 5 so that we have

  ///   sext(a) + (sext(b) + 5).

  /// Then, we reassociate it to

  ///   (sext(a) + sext(b)) + 5.

  /// Given this form, we know I' is sext(a) + sext(b).

  Value *rebuildWithoutConstOffset();


  /// After the first step of rebuilding the GEP index without the constant

  /// offset, distribute s/zext to the operands of all operators in UserChain.

  /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>

  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).

  ///

  /// The function also updates UserChain to point to new subexpressions after

  /// distributing s/zext. e.g., the old UserChain of the above example is

  /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),

  /// and the new UserChain is

  /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->

  ///   zext(sext(a)) + (zext(sext(b)) + zext(sext(5))

  ///

  /// \p ChainIndex The index to UserChain. ChainIndex is initially

  ///               UserChain.size() - 1, and is decremented during

  ///               the recursion.

  Value *distributeExtsAndCloneChain(unsigned ChainIndex);


  /// Reassociates the GEP index to the form I' + C and returns I'.

  Value *removeConstOffset(unsigned ChainIndex);


  /// A helper function to apply ExtInsts, a list of s/zext, to value V.

  /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function

  /// returns "sext i32 (zext i16 V to i32) to i64".

  Value *applyExts(Value *V);


  /// A helper function that returns whether we can trace into the operands

  /// of binary operator BO for a constant offset.

  ///

  /// \p SignExtended Whether BO is surrounded by sext

  /// \p ZeroExtended Whether BO is surrounded by zext

  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound

  ///                array index.

  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,

                    bool NonNegative);


  /// The path from the constant offset to the old GEP index. e.g., if the GEP

  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will

  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and

  /// UserChain[2] will be the entire expression "a * b + (c + 5)".

  ///

  /// This path helps to rebuild the new GEP index.

  SmallVector<User *, 8> UserChain;


  /// A data structure used in rebuildWithoutConstOffset. Contains all

  /// sext/zext instructions along UserChain.

  SmallVector<CastInst *, 16> ExtInsts;


  /// Insertion position of cloned instructions.

  BasicBlock::iterator IP;


  const DataLayout &DL;

};


/// A pass that tries to split every GEP in the function into a variadic

/// base and a constant offset. It is a FunctionPass because searching for the

/// constant offset may inspect other basic blocks.

class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {

public:

  static char ID;


  SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)

      : FunctionPass(ID), LowerGEP(LowerGEP) {

    initializeSeparateConstOffsetFromGEPLegacyPassPass(

        *PassRegistry::getPassRegistry());

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<DominatorTreeWrapperPass>();

    AU.addRequired<TargetTransformInfoWrapperPass>();

    AU.addRequired<LoopInfoWrapperPass>();

    AU.setPreservesCFG();

    AU.addRequired<TargetLibraryInfoWrapperPass>();

  }


  bool runOnFunction(Function &F) override;


private:

  bool LowerGEP;

};


/// A pass that tries to split every GEP in the function into a variadic

/// base and a constant offset. It is a FunctionPass because searching for the

/// constant offset may inspect other basic blocks.

class SeparateConstOffsetFromGEP {

public:

  SeparateConstOffsetFromGEP(

      DominatorTree *DT, LoopInfo *LI, TargetLibraryInfo *TLI,

      function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)

      : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}


  bool run(Function &F);


private:

  /// Track the operands of an add or sub.

  using ExprKey = std::pair<Value *, Value *>;


  /// Create a pair for use as a map key for a commutable operation.

  static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {

    if (A < B)

      return {A, B};

    return {B, A};

  }


  /// Tries to split the given GEP into a variadic base and a constant offset,

  /// and returns true if the splitting succeeds.

  bool splitGEP(GetElementPtrInst *GEP);


  /// Tries to reorder the given GEP with the GEP that produces the base if

  /// doing so results in producing a constant offset as the outermost

  /// index.

  bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);


  /// Lower a GEP with multiple indices into multiple GEPs with a single index.

  /// Function splitGEP already split the original GEP into a variadic part and

  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the

  /// variadic part into a set of GEPs with a single index and applies

  /// AccumulativeByteOffset to it.

  /// \p Variadic                  The variadic part of the original GEP.

  /// \p AccumulativeByteOffset    The constant offset.

  void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,

                              int64_t AccumulativeByteOffset);


  /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.

  /// Function splitGEP already split the original GEP into a variadic part and

  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the

  /// variadic part into a set of arithmetic operations and applies

  /// AccumulativeByteOffset to it.

  /// \p Variadic                  The variadic part of the original GEP.

  /// \p AccumulativeByteOffset    The constant offset.

  void lowerToArithmetics(GetElementPtrInst *Variadic,

                          int64_t AccumulativeByteOffset);


  /// Finds the constant offset within each index and accumulates them. If

  /// LowerGEP is true, it finds in indices of both sequential and structure

  /// types, otherwise it only finds in sequential indices. The output

  /// NeedsExtraction indicates whether we successfully find a non-zero constant

  /// offset.

  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);


  /// Canonicalize array indices to pointer-size integers. This helps to

  /// simplify the logic of splitting a GEP. For example, if a + b is a

  /// pointer-size integer, we have

  ///   gep base, a + b = gep (gep base, a), b

  /// However, this equality may not hold if the size of a + b is smaller than

  /// the pointer size, because LLVM conceptually sign-extends GEP indices to

  /// pointer size before computing the address

  /// (http://llvm.org/docs/LangRef.html#id181).

  ///

  /// This canonicalization is very likely already done in clang and

  /// instcombine. Therefore, the program will probably remain the same.

  ///

  /// Returns true if the module changes.

  ///

  /// Verified in @i32_add in split-gep.ll

  bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);


  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.

  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting

  /// the constant offset. After extraction, it becomes desirable to reunion the

  /// distributed sexts. For example,

  ///

  ///                              &a[sext(i +nsw (j +nsw 5)]

  ///   => distribute              &a[sext(i) +nsw (sext(j) +nsw 5)]

  ///   => constant extraction     &a[sext(i) + sext(j)] + 5

  ///   => reunion                 &a[sext(i +nsw j)] + 5

  bool reuniteExts(Function &F);


  /// A helper that reunites sexts in an instruction.

  bool reuniteExts(Instruction *I);


  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.

  Instruction *findClosestMatchingDominator(

      ExprKey Key, Instruction *Dominatee,

      DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);


  /// Verify F is free of dead code.

  void verifyNoDeadCode(Function &F);


  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);


  // Swap the index operand of two GEP.

  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);


  // Check if it is safe to swap operand of two GEP.

  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,

                            Loop *CurLoop);


  const DataLayout *DL = nullptr;

  DominatorTree *DT = nullptr;

  LoopInfo *LI;

  TargetLibraryInfo *TLI;

  // Retrieved lazily since not always used.

  function_ref<TargetTransformInfo &(Function &)> GetTTI;


  /// Whether to lower a GEP with multiple indices into arithmetic operations or

  /// multiple GEPs with a single index.

  bool LowerGEP;


  DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds;

  DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;

};


} // end anonymous namespace


char SeparateConstOffsetFromGEPLegacyPass::ID = 0;


INITIALIZE_PASS_BEGIN(

    SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",

    "Split GEPs to a variadic base and a constant offset for better CSE", false,

    false)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

INITIALIZE_PASS_END(

    SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",

    "Split GEPs to a variadic base and a constant offset for better CSE", false,

    false)


FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) {

  return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);

}


bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,

                                            bool ZeroExtended,

                                            BinaryOperator *BO,

                                            bool NonNegative) {

  // We only consider ADD, SUB and OR, because a non-zero constant found in

  // expressions composed of these operations can be easily hoisted as a

  // constant offset by reassociation.

  if (BO->getOpcode() != Instruction::Add &&

      BO->getOpcode() != Instruction::Sub &&

      BO->getOpcode() != Instruction::Or) {

    return false;

  }


  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);

  // Do not trace into "or" unless it is equivalent to "add".

  // This is the case if the or's disjoint flag is set.

  if (BO->getOpcode() == Instruction::Or &&

      !cast<PossiblyDisjointInst>(BO)->isDisjoint())

    return false;


  // FIXME: We don't currently support constants from the RHS of subs,

  // when we are zero-extended, because we need a way to zero-extended

  // them before they are negated.

  if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)

    return false;


  // In addition, tracing into BO requires that its surrounding s/zext (if

  // any) is distributable to both operands.

  //

  // Suppose BO = A op B.

  //  SignExtended | ZeroExtended | Distributable?

  // --------------+--------------+----------------------------------

  //       0       |      0       | true because no s/zext exists

  //       0       |      1       | zext(BO) == zext(A) op zext(B)

  //       1       |      0       | sext(BO) == sext(A) op sext(B)

  //       1       |      1       | zext(sext(BO)) ==

  //               |              |     zext(sext(A)) op zext(sext(B))

  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {

    // If a + b >= 0 and (a >= 0 or b >= 0), then

    //   sext(a + b) = sext(a) + sext(b)

    // even if the addition is not marked nsw.

    //

    // Leveraging this invariant, we can trace into an sext'ed inbound GEP

    // index if the constant offset is non-negative.

    //

    // Verified in @sext_add in split-gep.ll.

    if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {

      if (!ConstLHS->isNegative())

        return true;

    }

    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {

      if (!ConstRHS->isNegative())

        return true;

    }

  }


  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)

  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)

  if (BO->getOpcode() == Instruction::Add ||

      BO->getOpcode() == Instruction::Sub) {

    if (SignExtended && !BO->hasNoSignedWrap())

      return false;

    if (ZeroExtended && !BO->hasNoUnsignedWrap())

      return false;

  }


  return true;

}


APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,

                                                   bool SignExtended,

                                                   bool ZeroExtended) {

  // Save off the current height of the chain, in case we need to restore it.

  size_t ChainLength = UserChain.size();


  // BO being non-negative does not shed light on whether its operands are

  // non-negative. Clear the NonNegative flag here.

  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,

                              /* NonNegative */ false);

  // If we found a constant offset in the left operand, stop and return that.

  // This shortcut might cause us to miss opportunities of combining the

  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.

  // However, such cases are probably already handled by -instcombine,

  // given this pass runs after the standard optimizations.

  if (ConstantOffset != 0) return ConstantOffset;


  // Reset the chain back to where it was when we started exploring this node,

  // since visiting the LHS didn't pan out.

  UserChain.resize(ChainLength);


  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,

                        /* NonNegative */ false);

  // If U is a sub operator, negate the constant offset found in the right

  // operand.

  if (BO->getOpcode() == Instruction::Sub)

    ConstantOffset = -ConstantOffset;


  // If RHS wasn't a suitable candidate either, reset the chain again.

  if (ConstantOffset == 0)

    UserChain.resize(ChainLength);


  return ConstantOffset;

}


APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,

                                    bool ZeroExtended, bool NonNegative) {

  // TODO(jingyue): We could trace into integer/pointer casts, such as

  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only

  // integers because it gives good enough results for our benchmarks.

  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();


  // We cannot do much with Values that are not a User, such as an Argument.

  User *U = dyn_cast<User>(V);

  if (U == nullptr) return APInt(BitWidth, 0);


  APInt ConstantOffset(BitWidth, 0);

  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {

    // Hooray, we found it!

    ConstantOffset = CI->getValue();

  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {

    // Trace into subexpressions for more hoisting opportunities.

    if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))

      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);

  } else if (isa<TruncInst>(V)) {

    ConstantOffset =

        find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)

            .trunc(BitWidth);

  } else if (isa<SExtInst>(V)) {

    ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,

                          ZeroExtended, NonNegative).sext(BitWidth);

  } else if (isa<ZExtInst>(V)) {

    // As an optimization, we can clear the SignExtended flag because

    // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.

    //

    // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.

    ConstantOffset =

        find(U->getOperand(0), /* SignExtended */ false,

             /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);

  }


  // If we found a non-zero constant offset, add it to the path for

  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't

  // help this optimization.

  if (ConstantOffset != 0)

    UserChain.push_back(U);

  return ConstantOffset;

}


Value *ConstantOffsetExtractor::applyExts(Value *V) {

  Value *Current = V;

  // ExtInsts is built in the use-def order. Therefore, we apply them to V

  // in the reversed order.

  for (CastInst *I : llvm::reverse(ExtInsts)) {

    if (Constant *C = dyn_cast<Constant>(Current)) {

      // Try to constant fold the cast.

      Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL);

      if (Current)

        continue;

    }


    Instruction *Ext = I->clone();

    Ext->setOperand(0, Current);

    Ext->insertBefore(*IP->getParent(), IP);

    Current = Ext;

  }

  return Current;

}


Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {

  distributeExtsAndCloneChain(UserChain.size() - 1);

  // Remove all nullptrs (used to be s/zext) from UserChain.

  unsigned NewSize = 0;

  for (User *I : UserChain) {

    if (I != nullptr) {

      UserChain[NewSize] = I;

      NewSize++;

    }

  }

  UserChain.resize(NewSize);

  return removeConstOffset(UserChain.size() - 1);

}


Value *

ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {

  User *U = UserChain[ChainIndex];

  if (ChainIndex == 0) {

    assert(isa<ConstantInt>(U));

    // If U is a ConstantInt, applyExts will return a ConstantInt as well.

    return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));

  }


  if (CastInst *Cast = dyn_cast<CastInst>(U)) {

    assert(

        (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&

        "Only following instructions can be traced: sext, zext & trunc");

    ExtInsts.push_back(Cast);

    UserChain[ChainIndex] = nullptr;

    return distributeExtsAndCloneChain(ChainIndex - 1);

  }


  // Function find only trace into BinaryOperator and CastInst.

  BinaryOperator *BO = cast<BinaryOperator>(U);

  // OpNo = which operand of BO is UserChain[ChainIndex - 1]

  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);

  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));

  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);


  BinaryOperator *NewBO = nullptr;

  if (OpNo == 0) {

    NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,

                                   BO->getName(), IP);

  } else {

    NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,

                                   BO->getName(), IP);

  }

  return UserChain[ChainIndex] = NewBO;

}


Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {

  if (ChainIndex == 0) {

    assert(isa<ConstantInt>(UserChain[ChainIndex]));

    return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());

  }


  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);

  assert((BO->use_empty() || BO->hasOneUse()) &&

         "distributeExtsAndCloneChain clones each BinaryOperator in "

         "UserChain, so no one should be used more than "

         "once");


  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);

  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);

  Value *NextInChain = removeConstOffset(ChainIndex - 1);

  Value *TheOther = BO->getOperand(1 - OpNo);


  // If NextInChain is 0 and not the LHS of a sub, we can simplify the

  // sub-expression to be just TheOther.

  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {

    if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))

      return TheOther;

  }


  BinaryOperator::BinaryOps NewOp = BO->getOpcode();

  if (BO->getOpcode() == Instruction::Or) {

    // Rebuild "or" as "add", because "or" may be invalid for the new

    // expression.

    //

    // For instance, given

    //   a | (b + 5) where a and b + 5 have no common bits,

    // we can extract 5 as the constant offset.

    //

    // However, reusing the "or" in the new index would give us

    //   (a | b) + 5

    // which does not equal a | (b + 5).

    //

    // Replacing the "or" with "add" is fine, because

    //   a | (b + 5) = a + (b + 5) = (a + b) + 5

    NewOp = Instruction::Add;

  }


  BinaryOperator *NewBO;

  if (OpNo == 0) {

    NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);

  } else {

    NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);

  }

  NewBO->takeName(BO);

  return NewBO;

}


Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,

                                        User *&UserChainTail) {

  ConstantOffsetExtractor Extractor(GEP->getIterator());

  // Find a non-zero constant offset first.

  APInt ConstantOffset =

      Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,

                     GEP->isInBounds());

  if (ConstantOffset == 0) {

    UserChainTail = nullptr;

    return nullptr;

  }

  // Separates the constant offset from the GEP index.

  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();

  UserChainTail = Extractor.UserChain.back();

  return IdxWithoutConstOffset;

}


int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {

  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.

  return ConstantOffsetExtractor(GEP->getIterator())

      .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,

            GEP->isInBounds())

      .getSExtValue();

}


bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(

    GetElementPtrInst *GEP) {

  bool Changed = false;

  Type *PtrIdxTy = DL->getIndexType(GEP->getType());

  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();

       I != E; ++I, ++GTI) {

    // Skip struct member indices which must be i32.

    if (GTI.isSequential()) {

      if ((*I)->getType() != PtrIdxTy) {

        *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom",

                                         GEP->getIterator());

        Changed = true;

      }

    }

  }

  return Changed;

}


int64_t

SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,

                                                 bool &NeedsExtraction) {

  NeedsExtraction = false;

  int64_t AccumulativeByteOffset = 0;

  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      // Constant offsets of scalable types are not really constant.

      if (GTI.getIndexedType()->isScalableTy())

        continue;


      // Tries to extract a constant offset from this GEP index.

      int64_t ConstantOffset =

          ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);

      if (ConstantOffset != 0) {

        NeedsExtraction = true;

        // A GEP may have multiple indices.  We accumulate the extracted

        // constant offset to a byte offset, and later offset the remainder of

        // the original GEP with this byte offset.

        AccumulativeByteOffset +=

            ConstantOffset * GTI.getSequentialElementStride(*DL);

      }

    } else if (LowerGEP) {

      StructType *StTy = GTI.getStructType();

      uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();

      // Skip field 0 as the offset is always 0.

      if (Field != 0) {

        NeedsExtraction = true;

        AccumulativeByteOffset +=

            DL->getStructLayout(StTy)->getElementOffset(Field);

      }

    }

  }

  return AccumulativeByteOffset;

}


void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(

    GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {

  IRBuilder<> Builder(Variadic);

  Type *PtrIndexTy = DL->getIndexType(Variadic->getType());


  Value *ResultPtr = Variadic->getOperand(0);

  Loop *L = LI->getLoopFor(Variadic->getParent());

  // Check if the base is not loop invariant or used more than once.

  bool isSwapCandidate =

      L && L->isLoopInvariant(ResultPtr) &&

      !hasMoreThanOneUseInLoop(ResultPtr, L);

  Value *FirstResult = nullptr;


  gep_type_iterator GTI = gep_type_begin(*Variadic);

  // Create an ugly GEP for each sequential index. We don't create GEPs for

  // structure indices, as they are accumulated in the constant offset index.

  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      Value *Idx = Variadic->getOperand(I);

      // Skip zero indices.

      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))

        if (CI->isZero())

          continue;


      APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),

                                GTI.getSequentialElementStride(*DL));

      // Scale the index by element size.

      if (ElementSize != 1) {

        if (ElementSize.isPowerOf2()) {

          Idx = Builder.CreateShl(

              Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));

        } else {

          Idx =

              Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));

        }

      }

      // Create an ugly GEP with a single index for each index.

      ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep");

      if (FirstResult == nullptr)

        FirstResult = ResultPtr;

    }

  }


  // Create a GEP with the constant offset index.

  if (AccumulativeByteOffset != 0) {

    Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);

    ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep");

  } else

    isSwapCandidate = false;


  // If we created a GEP with constant index, and the base is loop invariant,

  // then we swap the first one with it, so LICM can move constant GEP out

  // later.

  auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);

  auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);

  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))

    swapGEPOperand(FirstGEP, SecondGEP);


  Variadic->replaceAllUsesWith(ResultPtr);

  Variadic->eraseFromParent();

}


void

SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,

                                               int64_t AccumulativeByteOffset) {

  IRBuilder<> Builder(Variadic);

  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());

  assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&

         "Pointer type must match index type for arithmetic-based lowering of "

         "split GEPs");


  Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);

  gep_type_iterator GTI = gep_type_begin(*Variadic);

  // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We

  // don't create arithmetics for structure indices, as they are accumulated

  // in the constant offset index.

  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      Value *Idx = Variadic->getOperand(I);

      // Skip zero indices.

      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))

        if (CI->isZero())

          continue;


      APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),

                                GTI.getSequentialElementStride(*DL));

      // Scale the index by element size.

      if (ElementSize != 1) {

        if (ElementSize.isPowerOf2()) {

          Idx = Builder.CreateShl(

              Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));

        } else {

          Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));

        }

      }

      // Create an ADD for each index.

      ResultPtr = Builder.CreateAdd(ResultPtr, Idx);

    }

  }


  // Create an ADD for the constant offset index.

  if (AccumulativeByteOffset != 0) {

    ResultPtr = Builder.CreateAdd(

        ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));

  }


  ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());

  Variadic->replaceAllUsesWith(ResultPtr);

  Variadic->eraseFromParent();

}


bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,

                                            TargetTransformInfo &TTI) {

  Type *GEPType = GEP->getResultElementType();

  // TODO: support reordering for non-trivial GEP chains

  if (GEPType->isAggregateType() || GEP->getNumIndices() != 1)

    return false;


  auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());

  if (!PtrGEP)

    return false;

  Type *PtrGEPType = PtrGEP->getResultElementType();

  // TODO: support reordering for non-trivial GEP chains

  if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1)

    return false;


  // TODO: support reordering for non-trivial GEP chains

  if (PtrGEPType != GEPType ||

      PtrGEP->getSourceElementType() != GEP->getSourceElementType())

    return false;


  bool NestedNeedsExtraction;

  int64_t NestedByteOffset =

      accumulateByteOffset(PtrGEP, NestedNeedsExtraction);

  if (!NestedNeedsExtraction)

    return false;


  unsigned AddrSpace = PtrGEP->getPointerAddressSpace();

  if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),

                                 /*BaseGV=*/nullptr, NestedByteOffset,

                                 /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))

    return false;


  IRBuilder<> Builder(GEP);

  Builder.SetCurrentDebugLocation(GEP->getDebugLoc());

  bool GEPInBounds = GEP->isInBounds();

  bool PtrGEPInBounds = PtrGEP->isInBounds();

  bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;

  if (IsChainInBounds) {

    auto GEPIdx = GEP->indices().begin();

    auto KnownGEPIdx = computeKnownBits(GEPIdx->get(), *DL);

    IsChainInBounds &= KnownGEPIdx.isNonNegative();

    if (IsChainInBounds) {

      auto PtrGEPIdx = GEP->indices().begin();

      auto KnownPtrGEPIdx = computeKnownBits(PtrGEPIdx->get(), *DL);

      IsChainInBounds &= KnownPtrGEPIdx.isNonNegative();

    }

  }


  // For trivial GEP chains, we can swap the indicies.

  auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(),

                                  SmallVector<Value *, 4>(GEP->indices()));

  cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);

  auto NewGEP = Builder.CreateGEP(GEPType, NewSrc,

                                  SmallVector<Value *, 4>(PtrGEP->indices()));

  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);

  GEP->replaceAllUsesWith(NewGEP);

  RecursivelyDeleteTriviallyDeadInstructions(GEP);

  return true;

}


bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {

  // Skip vector GEPs.

  if (GEP->getType()->isVectorTy())

    return false;


  // The backend can already nicely handle the case where all indices are

  // constant.

  if (GEP->hasAllConstantIndices())

    return false;


  bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);


  bool NeedsExtraction;

  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);


  TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());


  if (!NeedsExtraction) {

    Changed |= reorderGEP(GEP, TTI);

    return Changed;

  }


  // If LowerGEP is disabled, before really splitting the GEP, check whether the

  // backend supports the addressing mode we are about to produce. If no, this

  // splitting probably won't be beneficial.

  // If LowerGEP is enabled, even the extracted constant offset can not match

  // the addressing mode, we can still do optimizations to other lowered parts

  // of variable indices. Therefore, we don't check for addressing modes in that

  // case.

  if (!LowerGEP) {

    unsigned AddrSpace = GEP->getPointerAddressSpace();

    if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),

                                   /*BaseGV=*/nullptr, AccumulativeByteOffset,

                                   /*HasBaseReg=*/true, /*Scale=*/0,

                                   AddrSpace)) {

      return Changed;

    }

  }


  // Remove the constant offset in each sequential index. The resultant GEP

  // computes the variadic base.

  // Notice that we don't remove struct field indices here. If LowerGEP is

  // disabled, a structure index is not accumulated and we still use the old

  // one. If LowerGEP is enabled, a structure index is accumulated in the

  // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later

  // handle the constant offset and won't need a new structure index.

  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      // Constant offsets of scalable types are not really constant.

      if (GTI.getIndexedType()->isScalableTy())

        continue;


      // Splits this GEP index into a variadic part and a constant offset, and

      // uses the variadic part as the new index.

      Value *OldIdx = GEP->getOperand(I);

      User *UserChainTail;

      Value *NewIdx =

          ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);

      if (NewIdx != nullptr) {

        // Switches to the index with the constant offset removed.

        GEP->setOperand(I, NewIdx);

        // After switching to the new index, we can garbage-collect UserChain

        // and the old index if they are not used.

        RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);

        RecursivelyDeleteTriviallyDeadInstructions(OldIdx);

      }

    }

  }


  // Clear the inbounds attribute because the new index may be off-bound.

  // e.g.,

  //

  //   b     = add i64 a, 5

  //   addr  = gep inbounds float, float* p, i64 b

  //

  // is transformed to:

  //

  //   addr2 = gep float, float* p, i64 a ; inbounds removed

  //   addr  = gep inbounds float, float* addr2, i64 5

  //

  // If a is -4, although the old index b is in bounds, the new index a is

  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the

  // inbounds keyword is not present, the offsets are added to the base

  // address with silently-wrapping two's complement arithmetic".

  // Therefore, the final code will be a semantically equivalent.

  //

  // TODO(jingyue): do some range analysis to keep as many inbounds as

  // possible. GEPs with inbounds are more friendly to alias analysis.

  bool GEPWasInBounds = GEP->isInBounds();

  GEP->setIsInBounds(false);


  // Lowers a GEP to either GEPs with a single index or arithmetic operations.

  if (LowerGEP) {

    // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to

    // arithmetic operations if the target uses alias analysis in codegen.

    // Additionally, pointers that aren't integral (and so can't be safely

    // converted to integers) or those whose offset size is different from their

    // pointer size (which means that doing integer arithmetic on them could

    // affect that data) can't be lowered in this way.

    unsigned AddrSpace = GEP->getPointerAddressSpace();

    bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=

                               DL->getIndexSizeInBits(AddrSpace);

    if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||

        PointerHasExtraData)

      lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);

    else

      lowerToArithmetics(GEP, AccumulativeByteOffset);

    return true;

  }


  // No need to create another GEP if the accumulative byte offset is 0.

  if (AccumulativeByteOffset == 0)

    return true;


  // Offsets the base with the accumulative byte offset.

  //

  //   %gep                        ; the base

  //   ... %gep ...

  //

  // => add the offset

  //

  //   %gep2                       ; clone of %gep

  //   %new.gep = gep i8, %gep2, %offset

  //   %gep                        ; will be removed

  //   ... %gep ...

  //

  // => replace all uses of %gep with %new.gep and remove %gep

  //

  //   %gep2                       ; clone of %gep

  //   %new.gep = gep i8, %gep2, %offset

  //   ... %new.gep ...

  Instruction *NewGEP = GEP->clone();

  NewGEP->insertBefore(GEP);


  Type *PtrIdxTy = DL->getIndexType(GEP->getType());

  IRBuilder<> Builder(GEP);

  NewGEP = cast<Instruction>(Builder.CreatePtrAdd(

      NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true),

      GEP->getName(), GEPWasInBounds));

  NewGEP->copyMetadata(*GEP);


  GEP->replaceAllUsesWith(NewGEP);

  GEP->eraseFromParent();


  return true;

}


bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {

  if (skipFunction(F))

    return false;

  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);

  auto GetTTI = [this](Function &F) -> TargetTransformInfo & {

    return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

  };

  SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);

  return Impl.run(F);

}


bool SeparateConstOffsetFromGEP::run(Function &F) {

  if (DisableSeparateConstOffsetFromGEP)

    return false;


  DL = &F.getParent()->getDataLayout();

  bool Changed = false;

  for (BasicBlock &B : F) {

    if (!DT->isReachableFromEntry(&B))

      continue;


    for (Instruction &I : llvm::make_early_inc_range(B))

      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))

        Changed |= splitGEP(GEP);

    // No need to split GEP ConstantExprs because all its indices are constant

    // already.

  }


  Changed |= reuniteExts(F);


  if (VerifyNoDeadCode)

    verifyNoDeadCode(F);


  return Changed;

}


Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(

    ExprKey Key, Instruction *Dominatee,

    DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {

  auto Pos = DominatingExprs.find(Key);

  if (Pos == DominatingExprs.end())

    return nullptr;


  auto &Candidates = Pos->second;

  // Because we process the basic blocks in pre-order of the dominator tree, a

  // candidate that doesn't dominate the current instruction won't dominate any

  // future instruction either. Therefore, we pop it out of the stack. This

  // optimization makes the algorithm O(n).

  while (!Candidates.empty()) {

    Instruction *Candidate = Candidates.back();

    if (DT->dominates(Candidate, Dominatee))

      return Candidate;

    Candidates.pop_back();

  }

  return nullptr;

}


bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {

  if (!I->getType()->isIntOrIntVectorTy())

    return false;


  //   Dom: LHS+RHS

  //   I: sext(LHS)+sext(RHS)

  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).

  // TODO: handle zext

  Value *LHS = nullptr, *RHS = nullptr;

  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {

    if (LHS->getType() == RHS->getType()) {

      ExprKey Key = createNormalizedCommutablePair(LHS, RHS);

      if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {

        Instruction *NewSExt =

            new SExtInst(Dom, I->getType(), "", I->getIterator());

        NewSExt->takeName(I);

        I->replaceAllUsesWith(NewSExt);

        RecursivelyDeleteTriviallyDeadInstructions(I);

        return true;

      }

    }

  } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {

    if (LHS->getType() == RHS->getType()) {

      if (auto *Dom =

              findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {

        Instruction *NewSExt =

            new SExtInst(Dom, I->getType(), "", I->getIterator());

        NewSExt->takeName(I);

        I->replaceAllUsesWith(NewSExt);

        RecursivelyDeleteTriviallyDeadInstructions(I);

        return true;

      }

    }

  }


  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.

  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {

    if (programUndefinedIfPoison(I)) {

      ExprKey Key = createNormalizedCommutablePair(LHS, RHS);

      DominatingAdds[Key].push_back(I);

    }

  } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {

    if (programUndefinedIfPoison(I))

      DominatingSubs[{LHS, RHS}].push_back(I);

  }

  return false;

}


bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {

  bool Changed = false;

  DominatingAdds.clear();

  DominatingSubs.clear();

  for (const auto Node : depth_first(DT)) {

    BasicBlock *BB = Node->getBlock();

    for (Instruction &I : llvm::make_early_inc_range(*BB))

      Changed |= reuniteExts(&I);

  }

  return Changed;

}


void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {

  for (BasicBlock &B : F) {

    for (Instruction &I : B) {

      if (isInstructionTriviallyDead(&I)) {

        std::string ErrMessage;

        raw_string_ostream RSO(ErrMessage);

        RSO << "Dead instruction detected!\n" << I << "\n";

        llvm_unreachable(RSO.str().c_str());

      }

    }

  }

}


bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(

    GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {

  if (!FirstGEP || !FirstGEP->hasOneUse())

    return false;


  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())

    return false;


  if (FirstGEP == SecondGEP)

    return false;


  unsigned FirstNum = FirstGEP->getNumOperands();

  unsigned SecondNum = SecondGEP->getNumOperands();

  // Give up if the number of operands are not 2.

  if (FirstNum != SecondNum || FirstNum != 2)

    return false;


  Value *FirstBase = FirstGEP->getOperand(0);

  Value *SecondBase = SecondGEP->getOperand(0);

  Value *FirstOffset = FirstGEP->getOperand(1);

  // Give up if the index of the first GEP is loop invariant.

  if (CurLoop->isLoopInvariant(FirstOffset))

    return false;


  // Give up if base doesn't have same type.

  if (FirstBase->getType() != SecondBase->getType())

    return false;


  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);


  // Check if the second operand of first GEP has constant coefficient.

  // For an example, for the following code,  we won't gain anything by

  // hoisting the second GEP out because the second GEP can be folded away.

  //   %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256

  //   %67 = shl i64 %scevgep.sum.ur159, 2

  //   %uglygep160 = getelementptr i8* %65, i64 %67

  //   %uglygep161 = getelementptr i8* %uglygep160, i64 -1024


  // Skip constant shift instruction which may be generated by Splitting GEPs.

  if (FirstOffsetDef && FirstOffsetDef->isShift() &&

      isa<ConstantInt>(FirstOffsetDef->getOperand(1)))

    FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));


  // Give up if FirstOffsetDef is an Add or Sub with constant.

  // Because it may not profitable at all due to constant folding.

  if (FirstOffsetDef)

    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {

      unsigned opc = BO->getOpcode();

      if ((opc == Instruction::Add || opc == Instruction::Sub) &&

          (isa<ConstantInt>(BO->getOperand(0)) ||

           isa<ConstantInt>(BO->getOperand(1))))

        return false;

    }

  return true;

}


bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {

  int UsesInLoop = 0;

  for (User *U : V->users()) {

    if (Instruction *User = dyn_cast<Instruction>(U))

      if (L->contains(User))

        if (++UsesInLoop > 1)

          return true;

  }

  return false;

}


void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,

                                                GetElementPtrInst *Second) {

  Value *Offset1 = First->getOperand(1);

  Value *Offset2 = Second->getOperand(1);

  First->setOperand(1, Offset2);

  Second->setOperand(1, Offset1);


  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.

  const DataLayout &DAL = First->getModule()->getDataLayout();

  APInt Offset(DAL.getIndexSizeInBits(

                   cast<PointerType>(First->getType())->getAddressSpace()),

               0);

  Value *NewBase =

      First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);

  uint64_t ObjectSize;

  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||

     Offset.ugt(ObjectSize)) {

    First->setIsInBounds(false);

    Second->setIsInBounds(false);

  } else

    First->setIsInBounds(true);

}


void SeparateConstOffsetFromGEPPass::printPipeline(

    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {

  static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> *>(this)

      ->printPipeline(OS, MapClassName2PassName);

  OS << '<';

  if (LowerGEP)

    OS << "lower-gep";

  OS << '>';

}


PreservedAnalyses

SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {

  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);

  auto *LI = &AM.getResult<LoopAnalysis>(F);

  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);

  auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {

    return AM.getResult<TargetIRAnalysis>(F);

  };

  SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);

  if (!Impl.run(F))

    return PreservedAnalyses::all();

  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  return PA;

}

for
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Definition: AArch64ExpandPseudoInsts.cpp:113

const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:74

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

BasicBlock.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Casting.h

CommandLine.h

Constant.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Domain::NonNegative
@ NonNegative

DataLayout.h

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:354

DenseMap.h
This file defines the DenseMap class.

DepthFirstIterator.h
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.

DerivedTypes.h

Dominators.h

Function.h

GetElementPtrTypeIterator.h

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171

IRBuilder.h

Instruction.h

InitializePasses.h

InstrTypes.h

Instructions.h

LoopInfo.h

Find
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
Definition: MCSubtargetInfo.cpp:26

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MemoryBuiltins.h

Module.h
Module.h This file contains the declarations for the Module class.

PassManager.h
This header defines various interfaces for pass management in LLVM.

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

PatternMatch.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:53

Scalar.h

DisableSeparateConstOffsetFromGEP
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)

VerifyNoDeadCode
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)

gep
separate const offset from gep
Definition: SeparateConstOffsetFromGEP.cpp:503

CSE
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Definition: SeparateConstOffsetFromGEP.cpp:504

SeparateConstOffsetFromGEP.h

SmallVector.h
This file defines the SmallVector class.

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

Type.h

User.h

ValueTracking.h

Value.h

RHS
Value * RHS
Definition: X86PartialReduction.cpp:76

LHS
Value * LHS
Definition: X86PartialReduction.cpp:75

Node
Definition: ItaniumDemangle.h:161

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:76

llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1703

llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:60

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165

llvm::BinaryOperator
Definition: InstrTypes.h:222

llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:3305

llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:513

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70

llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601

llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3929

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:80

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110

llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420

llvm::DenseMap
Definition: DenseMap.h:742

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311

llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function
Definition: Function.h:62

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973

llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:2033

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666

llvm::Instruction
Definition: Instruction.h:49

llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:398

llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:405

llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:109

llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:152

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:947

llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:259

llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:1278

llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566

llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593

llvm::LoopInfo
Definition: LoopInfo.h:407

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115

llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144

llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:5478

llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2273

llvm::SeparateConstOffsetFromGEPPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SeparateConstOffsetFromGEP.cpp:1403

llvm::SeparateConstOffsetFromGEPPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: SeparateConstOffsetFromGEP.cpp:1414

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:216

llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2927

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:599

llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:624

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:281

llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2983

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:213

llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:407

llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:554

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const

llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:295

llvm::Type::isScalableTy
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::User
Definition: User.h:44

llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169

llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434

llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383

llvm::cl::opt
Definition: CommandLine.h:1430

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36

llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31

llvm::generic_gep_type_iterator::isSequential
bool isSequential() const
Definition: GetElementPtrTypeIterator.h:147

llvm::generic_gep_type_iterator::getStructType
StructType * getStructType() const
Definition: GetElementPtrTypeIterator.h:164

llvm::generic_gep_type_iterator::getSequentialElementStride
TypeSize getSequentialElementStride(const DataLayout &DL) const
Definition: GetElementPtrTypeIterator.h:154

llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:102

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660

uint64_t

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:184

llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:487

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::M68k::MemAddrModeKind::U
@ U

llvm::M68k::MemAddrModeKind::V
@ V

llvm::M68k::MemAddrModeKind::L
@ L

llvm::MCID::Variadic
@ Variadic
Definition: MCInstrDesc.h:150

llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:157

llvm::PatternMatch
Definition: PatternMatch.h:47

llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1047

llvm::PatternMatch::m_NSWSub
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1234

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92

llvm::PatternMatch::m_NSWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1226

llvm::PatternMatch::m_SExt
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1905

llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1059

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450

llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:456

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742

llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:539

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656

llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399

llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:592

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419

llvm::programUndefinedIfPoison
bool programUndefinedIfPoison(const Instruction *Inst)
Definition: ValueTracking.cpp:7729

llvm::initializeSeparateConstOffsetFromGEPLegacyPassPass
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)

llvm::ConstantFoldCastOperand
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
Definition: ConstantFolding.cpp:1383

llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:507

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.

llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:165

llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191

llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:173

llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229

raw_ostream.h

llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45

llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:74

llvm::cl::desc
Definition: CommandLine.h:416