doxygen/DeadStoreElimination_8cpp_source.html

//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// The code below implements dead store elimination using MemorySSA. It uses

// the following general approach: given a MemoryDef, walk upwards to find

// clobbering MemoryDefs that may be killed by the starting def. Then check

// that there are no uses that may read the location of the original MemoryDef

// in between both MemoryDefs. A bit more concretely:

//

// For all MemoryDefs StartDef:

// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking

//    upwards.

// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by

//    checking all uses starting at MaybeDeadAccess and walking until we see

//    StartDef.

// 3. For each found CurrentDef, check that:

//   1. There are no barrier instructions between CurrentDef and StartDef (like

//       throws or stores with ordering constraints).

//   2. StartDef is executed whenever CurrentDef is executed.

//   3. StartDef completely overwrites CurrentDef.

// 4. Erase CurrentDef from the function and MemorySSA.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/DeadStoreElimination.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/PostOrderIterator.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/CaptureTracking.h"

#include "llvm/Analysis/GlobalsModRef.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/MemoryBuiltins.h"

#include "llvm/Analysis/MemoryLocation.h"

#include "llvm/Analysis/MemorySSA.h"

#include "llvm/Analysis/MemorySSAUpdater.h"

#include "llvm/Analysis/MustExecute.h"

#include "llvm/Analysis/PostDominators.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/Argument.h"

#include "llvm/IR/AttributeMask.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/ConstantRangeList.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DebugInfo.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstIterator.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/PassManager.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Value.h"

#include "llvm/InitializePasses.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/DebugCounter.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"

#include "llvm/Transforms/Utils/BuildLibCalls.h"

#include "llvm/Transforms/Utils/Local.h"

#include <algorithm>

#include <cassert>

#include <cstdint>

#include <map>

#include <optional>

#include <utility>


using namespace llvm;

using namespace PatternMatch;


#define DEBUG_TYPE "dse"


STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");

STATISTIC(NumRedundantStores, "Number of redundant stores deleted");

STATISTIC(NumFastStores, "Number of stores deleted");

STATISTIC(NumFastOther, "Number of other instrs removed");

STATISTIC(NumCompletePartials, "Number of stores dead by later partials");

STATISTIC(NumModifiedStores, "Number of stores modified");

STATISTIC(NumCFGChecks, "Number of stores modified");

STATISTIC(NumCFGTries, "Number of stores modified");

STATISTIC(NumCFGSuccess, "Number of stores modified");

STATISTIC(NumGetDomMemoryDefPassed,

          "Number of times a valid candidate is returned from getDomMemoryDef");

STATISTIC(NumDomMemDefChecks,

          "Number iterations check for reads in getDomMemoryDef");


DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",

              "Controls which MemoryDefs are eliminated.");


static cl::opt<bool>

EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",

  cl::init(true), cl::Hidden,

  cl::desc("Enable partial-overwrite tracking in DSE"));


static cl::opt<bool>

EnablePartialStoreMerging("enable-dse-partial-store-merging",

  cl::init(true), cl::Hidden,

  cl::desc("Enable partial store merging in DSE"));


static cl::opt<unsigned>

    MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,

                       cl::desc("The number of memory instructions to scan for "

                                "dead store elimination (default = 150)"));

static cl::opt<unsigned> MemorySSAUpwardsStepLimit(

    "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,

    cl::desc("The maximum number of steps while walking upwards to find "

             "MemoryDefs that may be killed (default = 90)"));


static cl::opt<unsigned> MemorySSAPartialStoreLimit(

    "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,

    cl::desc("The maximum number candidates that only partially overwrite the "

             "killing MemoryDef to consider"

             " (default = 5)"));


static cl::opt<unsigned> MemorySSADefsPerBlockLimit(

    "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,

    cl::desc("The number of MemoryDefs we consider as candidates to eliminated "

             "other stores per basic block (default = 5000)"));


static cl::opt<unsigned> MemorySSASameBBStepCost(

    "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,

    cl::desc(

        "The cost of a step in the same basic block as the killing MemoryDef"

        "(default = 1)"));


static cl::opt<unsigned>

    MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),

                             cl::Hidden,

                             cl::desc("The cost of a step in a different basic "

                                      "block than the killing MemoryDef"

                                      "(default = 5)"));


static cl::opt<unsigned> MemorySSAPathCheckLimit(

    "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,

    cl::desc("The maximum number of blocks to check when trying to prove that "

             "all paths to an exit go through a killing block (default = 50)"));


// This flags allows or disallows DSE to optimize MemorySSA during its

// traversal. Note that DSE optimizing MemorySSA may impact other passes

// downstream of the DSE invocation and can lead to issues not being

// reproducible in isolation (i.e. when MemorySSA is built from scratch). In

// those cases, the flag can be used to check if DSE's MemorySSA optimizations

// impact follow-up passes.

static cl::opt<bool>

    OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,

                      cl::desc("Allow DSE to optimize memory accesses."));


// TODO: remove this flag.

static cl::opt<bool> EnableInitializesImprovement(

    "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,

    cl::desc("Enable the initializes attr improvement in DSE"));


//===----------------------------------------------------------------------===//

// Helper functions

//===----------------------------------------------------------------------===//

using OverlapIntervalsTy = std::map<int64_t, int64_t>;

using InstOverlapIntervalsTy = MapVector<Instruction *, OverlapIntervalsTy>;


/// Returns true if the end of this instruction can be safely shortened in

/// length.


static bool isShortenableAtTheEnd(Instruction *I) {

  // Don't shorten stores for now

  if (isa<StoreInst>(I))

    return false;


  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {

    switch (II->getIntrinsicID()) {

      default: return false;

      case Intrinsic::memset:

      case Intrinsic::memcpy:

      case Intrinsic::memcpy_element_unordered_atomic:

      case Intrinsic::memset_element_unordered_atomic:

        // Do shorten memory intrinsics.

        // FIXME: Add memmove if it's also safe to transform.

        return true;

    }

  }


  // Don't shorten libcalls calls for now.


  return false;

}


/// Returns true if the beginning of this instruction can be safely shortened

/// in length.


static bool isShortenableAtTheBeginning(Instruction *I) {

  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be

  // easily done by offsetting the source address.

  return isa<AnyMemSetInst>(I);

}


static std::optional<TypeSize> getPointerSize(const Value *V,

                                              const DataLayout &DL,

                                              const TargetLibraryInfo &TLI,

                                              const Function *F) {

  uint64_t Size;

  ObjectSizeOpts Opts;

  Opts.NullIsUnknownSize = NullPointerIsDefined(F);


  if (getObjectSize(V, Size, DL, &TLI, Opts))

    return TypeSize::getFixed(Size);

  return std::nullopt;

}


namespace {


enum OverwriteResult {

  OW_Begin,

  OW_Complete,

  OW_End,

  OW_PartialEarlierWithFullLater,

  OW_MaybePartial,

  OW_None,

  OW_Unknown

};


} // end anonymous namespace


/// Check if two instruction are masked stores that completely

/// overwrite one another. More specifically, \p KillingI has to

/// overwrite \p DeadI.


static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,

                                              const Instruction *DeadI,

                                              BatchAAResults &AA) {

  const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);

  const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);

  if (KillingII == nullptr || DeadII == nullptr)

    return OW_Unknown;

  if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())

    return OW_Unknown;


  switch (KillingII->getIntrinsicID()) {

  case Intrinsic::masked_store:

  case Intrinsic::vp_store: {

    const DataLayout &DL = KillingII->getDataLayout();

    auto *KillingTy = KillingII->getArgOperand(0)->getType();

    auto *DeadTy = DeadII->getArgOperand(0)->getType();

    if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))

      return OW_Unknown;

    // Element count.

    if (cast<VectorType>(KillingTy)->getElementCount() !=

        cast<VectorType>(DeadTy)->getElementCount())

      return OW_Unknown;

    // Pointers.

    Value *KillingPtr = KillingII->getArgOperand(1);

    Value *DeadPtr = DeadII->getArgOperand(1);

    if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))

      return OW_Unknown;

    if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {

      // Masks.

      // TODO: check that KillingII's mask is a superset of the DeadII's mask.

      if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))

        return OW_Unknown;

    } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {

      // Masks.

      // TODO: check that KillingII's mask is a superset of the DeadII's mask.

      if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))

        return OW_Unknown;

      // Lengths.

      if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))

        return OW_Unknown;

    }

    return OW_Complete;

  }

  default:

    return OW_Unknown;

  }

}


/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely

/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the

/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'

/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.

/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was

/// overwritten by a killing (smaller) store which doesn't write outside the big

/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.

/// NOTE: This function must only be called if both \p KillingLoc and \p

/// DeadLoc belong to the same underlying object with valid \p KillingOff and

/// \p DeadOff.


static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,

                                          const MemoryLocation &DeadLoc,

                                          int64_t KillingOff, int64_t DeadOff,

                                          Instruction *DeadI,

                                          InstOverlapIntervalsTy &IOL) {

  const uint64_t KillingSize = KillingLoc.Size.getValue();

  const uint64_t DeadSize = DeadLoc.Size.getValue();

  // We may now overlap, although the overlap is not complete. There might also

  // be other incomplete overlaps, and together, they might cover the complete

  // dead store.

  // Note: The correctness of this logic depends on the fact that this function

  // is not even called providing DepWrite when there are any intervening reads.

  if (EnablePartialOverwriteTracking &&

      KillingOff < int64_t(DeadOff + DeadSize) &&

      int64_t(KillingOff + KillingSize) >= DeadOff) {


    // Insert our part of the overlap into the map.

    auto &IM = IOL[DeadI];

    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "

                      << int64_t(DeadOff + DeadSize) << ") KillingLoc ["

                      << KillingOff << ", " << int64_t(KillingOff + KillingSize)

                      << ")\n");


    // Make sure that we only insert non-overlapping intervals and combine

    // adjacent intervals. The intervals are stored in the map with the ending

    // offset as the key (in the half-open sense) and the starting offset as

    // the value.

    int64_t KillingIntStart = KillingOff;

    int64_t KillingIntEnd = KillingOff + KillingSize;


    // Find any intervals ending at, or after, KillingIntStart which start

    // before KillingIntEnd.

    auto ILI = IM.lower_bound(KillingIntStart);

    if (ILI != IM.end() && ILI->second <= KillingIntEnd) {

      // This existing interval is overlapped with the current store somewhere

      // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing

      // intervals and adjusting our start and end.

      KillingIntStart = std::min(KillingIntStart, ILI->second);

      KillingIntEnd = std::max(KillingIntEnd, ILI->first);

      ILI = IM.erase(ILI);


      // Continue erasing and adjusting our end in case other previous

      // intervals are also overlapped with the current store.

      //

      // |--- dead 1 ---|  |--- dead 2 ---|

      //     |------- killing---------|

      //

      while (ILI != IM.end() && ILI->second <= KillingIntEnd) {

        assert(ILI->second > KillingIntStart && "Unexpected interval");

        KillingIntEnd = std::max(KillingIntEnd, ILI->first);

        ILI = IM.erase(ILI);

      }

    }


    IM[KillingIntEnd] = KillingIntStart;


    ILI = IM.begin();

    if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {

      LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["

                        << DeadOff << ", " << int64_t(DeadOff + DeadSize)

                        << ") Composite KillingLoc [" << ILI->second << ", "

                        << ILI->first << ")\n");

      ++NumCompletePartials;

      return OW_Complete;

    }

  }


  // Check for a dead store which writes to all the memory locations that

  // the killing store writes to.

  if (EnablePartialStoreMerging && KillingOff >= DeadOff &&

      int64_t(DeadOff + DeadSize) > KillingOff &&

      uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {

    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff

                      << ", " << int64_t(DeadOff + DeadSize)

                      << ") by a killing store [" << KillingOff << ", "

                      << int64_t(KillingOff + KillingSize) << ")\n");

    // TODO: Maybe come up with a better name?

    return OW_PartialEarlierWithFullLater;

  }


  // Another interesting case is if the killing store overwrites the end of the

  // dead store.

  //

  //      |--dead--|

  //                |--   killing   --|

  //

  // In this case we may want to trim the size of dead store to avoid

  // generating stores to addresses which will definitely be overwritten killing

  // store.

  if (!EnablePartialOverwriteTracking &&

      (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&

       int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))

    return OW_End;


  // Finally, we also need to check if the killing store overwrites the

  // beginning of the dead store.

  //

  //                |--dead--|

  //      |--  killing  --|

  //

  // In this case we may want to move the destination address and trim the size

  // of dead store to avoid generating stores to addresses which will definitely

  // be overwritten killing store.

  if (!EnablePartialOverwriteTracking &&

      (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {

    assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&

           "Expect to be handled as OW_Complete");

    return OW_Begin;

  }

  // Otherwise, they don't completely overlap.

  return OW_Unknown;

}


/// Returns true if the memory which is accessed by the second instruction is not

/// modified between the first and the second instruction.

/// Precondition: Second instruction must be dominated by the first

/// instruction.

static bool


memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,

                           BatchAAResults &AA, const DataLayout &DL,

                           DominatorTree *DT) {

  // Do a backwards scan through the CFG from SecondI to FirstI. Look for

  // instructions which can modify the memory location accessed by SecondI.

  //

  // While doing the walk keep track of the address to check. It might be

  // different in different basic blocks due to PHI translation.

  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;

  SmallVector<BlockAddressPair, 16> WorkList;

  // Keep track of the address we visited each block with. Bail out if we

  // visit a block with different addresses.

  DenseMap<BasicBlock *, Value *> Visited;


  BasicBlock::iterator FirstBBI(FirstI);

  ++FirstBBI;

  BasicBlock::iterator SecondBBI(SecondI);

  BasicBlock *FirstBB = FirstI->getParent();

  BasicBlock *SecondBB = SecondI->getParent();

  MemoryLocation MemLoc;

  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))

    MemLoc = MemoryLocation::getForDest(MemSet);

  else

    MemLoc = MemoryLocation::get(SecondI);


  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);


  // Start checking the SecondBB.

  WorkList.push_back(

      std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));

  bool isFirstBlock = true;


  // Check all blocks going backward until we reach the FirstBB.

  while (!WorkList.empty()) {

    BlockAddressPair Current = WorkList.pop_back_val();

    BasicBlock *B = Current.first;

    PHITransAddr &Addr = Current.second;

    Value *Ptr = Addr.getAddr();


    // Ignore instructions before FirstI if this is the FirstBB.

    BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());


    BasicBlock::iterator EI;

    if (isFirstBlock) {

      // Ignore instructions after SecondI if this is the first visit of SecondBB.

      assert(B == SecondBB && "first block is not the store block");

      EI = SecondBBI;

      isFirstBlock = false;

    } else {

      // It's not SecondBB or (in case of a loop) the second visit of SecondBB.

      // In this case we also have to look at instructions after SecondI.

      EI = B->end();

    }

    for (; BI != EI; ++BI) {

      Instruction *I = &*BI;

      if (I->mayWriteToMemory() && I != SecondI)

        if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))

          return false;

    }

    if (B != FirstBB) {

      assert(B != &FirstBB->getParent()->getEntryBlock() &&

          "Should not hit the entry block because SI must be dominated by LI");

      for (BasicBlock *Pred : predecessors(B)) {

        PHITransAddr PredAddr = Addr;

        if (PredAddr.needsPHITranslationFromBlock(B)) {

          if (!PredAddr.isPotentiallyPHITranslatable())

            return false;

          if (!PredAddr.translateValue(B, Pred, DT, false))

            return false;

        }

        Value *TranslatedPtr = PredAddr.getAddr();

        auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));

        if (!Inserted.second) {

          // We already visited this block before. If it was with a different

          // address - bail out!

          if (TranslatedPtr != Inserted.first->second)

            return false;

          // ... otherwise just skip it.

          continue;

        }

        WorkList.push_back(std::make_pair(Pred, PredAddr));

      }

    }

  }

  return true;

}


static void shortenAssignment(Instruction *Inst, Value *OriginalDest,

                              uint64_t OldOffsetInBits, uint64_t OldSizeInBits,

                              uint64_t NewSizeInBits, bool IsOverwriteEnd) {

  const DataLayout &DL = Inst->getDataLayout();

  uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;

  uint64_t DeadSliceOffsetInBits =

      OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);

  auto SetDeadFragExpr = [](auto *Assign,

                            DIExpression::FragmentInfo DeadFragment) {

    // createFragmentExpression expects an offset relative to the existing

    // fragment offset if there is one.

    uint64_t RelativeOffset = DeadFragment.OffsetInBits -

                              Assign->getExpression()

                                  ->getFragmentInfo()

                                  .value_or(DIExpression::FragmentInfo(0, 0))

                                  .OffsetInBits;

    if (auto NewExpr = DIExpression::createFragmentExpression(

            Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {

      Assign->setExpression(*NewExpr);

      return;

    }

    // Failed to create a fragment expression for this so discard the value,

    // making this a kill location.

    auto *Expr = *DIExpression::createFragmentExpression(

        DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,

        DeadFragment.SizeInBits);

    Assign->setExpression(Expr);

    Assign->setKillLocation();

  };


  // A DIAssignID to use so that the inserted dbg.assign intrinsics do not

  // link to any instructions. Created in the loop below (once).

  DIAssignID *LinkToNothing = nullptr;

  LLVMContext &Ctx = Inst->getContext();

  auto GetDeadLink = [&Ctx, &LinkToNothing]() {

    if (!LinkToNothing)

      LinkToNothing = DIAssignID::getDistinct(Ctx);

    return LinkToNothing;

  };


  // Insert an unlinked dbg.assign intrinsic for the dead fragment after each

  // overlapping dbg.assign intrinsic.

  for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {

    std::optional<DIExpression::FragmentInfo> NewFragment;

    if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,

                                        DeadSliceSizeInBits, Assign,

                                        NewFragment) ||

        !NewFragment) {

      // We couldn't calculate the intersecting fragment for some reason. Be

      // cautious and unlink the whole assignment from the store.

      Assign->setKillAddress();

      Assign->setAssignId(GetDeadLink());

      continue;

    }

    // No intersect.

    if (NewFragment->SizeInBits == 0)

      continue;


    // Fragments overlap: insert a new dbg.assign for this dead part.

    auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());

    NewAssign->insertAfter(Assign->getIterator());

    NewAssign->setAssignId(GetDeadLink());

    if (NewFragment)

      SetDeadFragExpr(NewAssign, *NewFragment);

    NewAssign->setKillAddress();

  }

}


/// Update the attributes given that a memory access is updated (the

/// dereferenced pointer could be moved forward when shortening a

/// mem intrinsic).


static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,

                                uint64_t PtrOffset) {

  // Remember old attributes.

  AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);


  // Find attributes that should be kept, and remove the rest.

  AttributeMask AttrsToRemove;

  for (auto &Attr : OldAttrs) {

    if (Attr.hasKindAsEnum()) {

      switch (Attr.getKindAsEnum()) {

      default:

        break;

      case Attribute::Alignment:

        // Only keep alignment if PtrOffset satisfy the alignment.

        if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))

          continue;

        break;

      case Attribute::Dereferenceable:

      case Attribute::DereferenceableOrNull:

        // We could reduce the size of these attributes according to

        // PtrOffset. But we simply drop these for now.

        break;

      case Attribute::NonNull:

      case Attribute::NoUndef:

        continue;

      }

    }

    AttrsToRemove.addAttribute(Attr);

  }


  // Remove the attributes that should be dropped.

  Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);

}


static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,

                         uint64_t &DeadSize, int64_t KillingStart,

                         uint64_t KillingSize, bool IsOverwriteEnd) {

  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);

  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();


  // We assume that memet/memcpy operates in chunks of the "largest" native

  // type size and aligned on the same value. That means optimal start and size

  // of memset/memcpy should be modulo of preferred alignment of that type. That

  // is it there is no any sense in trying to reduce store size any further

  // since any "extra" stores comes for free anyway.

  // On the other hand, maximum alignment we can achieve is limited by alignment

  // of initial store.


  // TODO: Limit maximum alignment by preferred (or abi?) alignment of the

  // "largest" native type.

  // Note: What is the proper way to get that value?

  // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?

  // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);


  int64_t ToRemoveStart = 0;

  uint64_t ToRemoveSize = 0;

  // Compute start and size of the region to remove. Make sure 'PrefAlign' is

  // maintained on the remaining store.

  if (IsOverwriteEnd) {

    // Calculate required adjustment for 'KillingStart' in order to keep

    // remaining store size aligned on 'PerfAlign'.

    uint64_t Off =

        offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);

    ToRemoveStart = KillingStart + Off;

    if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))

      return false;

    ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);

  } else {

    ToRemoveStart = DeadStart;

    assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&

           "Not overlapping accesses?");

    ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);

    // Calculate required adjustment for 'ToRemoveSize'in order to keep

    // start of the remaining store aligned on 'PerfAlign'.

    uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);

    if (Off != 0) {

      if (ToRemoveSize <= (PrefAlign.value() - Off))

        return false;

      ToRemoveSize -= PrefAlign.value() - Off;

    }

    assert(isAligned(PrefAlign, ToRemoveSize) &&

           "Should preserve selected alignment");

  }


  assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");

  assert(DeadSize > ToRemoveSize && "Can't remove more than original size");


  uint64_t NewSize = DeadSize - ToRemoveSize;

  if (DeadIntrinsic->isAtomic()) {

    // When shortening an atomic memory intrinsic, the newly shortened

    // length must remain an integer multiple of the element size.

    const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();

    if (0 != NewSize % ElementSize)

      return false;

  }


  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "

                    << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI

                    << "\n  KILLER [" << ToRemoveStart << ", "

                    << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");


  DeadIntrinsic->setLength(NewSize);

  DeadIntrinsic->setDestAlignment(PrefAlign);


  Value *OrigDest = DeadIntrinsic->getRawDest();

  if (!IsOverwriteEnd) {

    Value *Indices[1] = {

        ConstantInt::get(DeadIntrinsic->getLength()->getType(), ToRemoveSize)};

    Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(

        Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",

        DeadI->getIterator());

    NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());

    DeadIntrinsic->setDest(NewDestGEP);

    adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);

  }


  // Update attached dbg.assign intrinsics. Assume 8-bit byte.

  shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,

                    IsOverwriteEnd);


  // Finally update start and size of dead access.

  if (!IsOverwriteEnd)

    DeadStart += ToRemoveSize;

  DeadSize = NewSize;


  return true;

}


static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,

                            int64_t &DeadStart, uint64_t &DeadSize) {

  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))

    return false;


  OverlapIntervalsTy::iterator OII = --IntervalMap.end();

  int64_t KillingStart = OII->second;

  uint64_t KillingSize = OII->first - KillingStart;


  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");


  if (KillingStart > DeadStart &&

      // Note: "KillingStart - KillingStart" is known to be positive due to

      // preceding check.

      (uint64_t)(KillingStart - DeadStart) < DeadSize &&

      // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to

      // be non negative due to preceding checks.

      KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {

    if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,

                     true)) {

      IntervalMap.erase(OII);

      return true;

    }

  }

  return false;

}


static bool tryToShortenBegin(Instruction *DeadI,

                              OverlapIntervalsTy &IntervalMap,

                              int64_t &DeadStart, uint64_t &DeadSize) {

  if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))

    return false;


  OverlapIntervalsTy::iterator OII = IntervalMap.begin();

  int64_t KillingStart = OII->second;

  uint64_t KillingSize = OII->first - KillingStart;


  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");


  if (KillingStart <= DeadStart &&

      // Note: "DeadStart - KillingStart" is known to be non negative due to

      // preceding check.

      KillingSize > (uint64_t)(DeadStart - KillingStart)) {

    // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to

    // be positive due to preceding checks.

    assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&

           "Should have been handled as OW_Complete");

    if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,

                     false)) {

      IntervalMap.erase(OII);

      return true;

    }

  }

  return false;

}


static Constant *


tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,

                                   int64_t KillingOffset, int64_t DeadOffset,

                                   const DataLayout &DL, BatchAAResults &AA,

                                   DominatorTree *DT) {


  if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&

      DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&

      KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&

      DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&

      memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {

    // If the store we find is:

    //   a) partially overwritten by the store to 'Loc'

    //   b) the killing store is fully contained in the dead one and

    //   c) they both have a constant value

    //   d) none of the two stores need padding

    // Merge the two stores, replacing the dead store's value with a

    // merge of both values.

    // TODO: Deal with other constant types (vectors, etc), and probably

    // some mem intrinsics (if needed)


    APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();

    APInt KillingValue =

        cast<ConstantInt>(KillingI->getValueOperand())->getValue();

    unsigned KillingBits = KillingValue.getBitWidth();

    assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());

    KillingValue = KillingValue.zext(DeadValue.getBitWidth());


    // Offset of the smaller store inside the larger store

    unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;

    unsigned LShiftAmount =

        DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits

                         : BitOffsetDiff;

    APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,

                                   LShiftAmount + KillingBits);

    // Clear the bits we'll be replacing, then OR with the smaller

    // store, shifted appropriately.

    APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);

    LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n  Dead: " << *DeadI

                      << "\n  Killing: " << *KillingI

                      << "\n  Merged Value: " << Merged << '\n');

    return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);

  }

  return nullptr;

}


// Returns true if \p I is an intrinsic that does not read or write memory.


static bool isNoopIntrinsic(Instruction *I) {

  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {

    switch (II->getIntrinsicID()) {

    case Intrinsic::lifetime_start:

    case Intrinsic::lifetime_end:

    case Intrinsic::invariant_end:

    case Intrinsic::launder_invariant_group:

    case Intrinsic::assume:

      return true;

    case Intrinsic::dbg_declare:

    case Intrinsic::dbg_label:

    case Intrinsic::dbg_value:

      llvm_unreachable("Intrinsic should not be modeled in MemorySSA");

    default:

      return false;

    }

  }

  return false;

}


// Check if we can ignore \p D for DSE.


static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {

  Instruction *DI = D->getMemoryInst();

  // Calls that only access inaccessible memory cannot read or write any memory

  // locations we consider for elimination.

  if (auto *CB = dyn_cast<CallBase>(DI))

    if (CB->onlyAccessesInaccessibleMemory())

      return true;


  // We can eliminate stores to locations not visible to the caller across

  // throwing instructions.

  if (DI->mayThrow() && !DefVisibleToCaller)

    return true;


  // We can remove the dead stores, irrespective of the fence and its ordering

  // (release/acquire/seq_cst). Fences only constraints the ordering of

  // already visible stores, it does not make a store visible to other

  // threads. So, skipping over a fence does not change a store from being

  // dead.

  if (isa<FenceInst>(DI))

    return true;


  // Skip intrinsics that do not really read or modify memory.

  if (isNoopIntrinsic(DI))

    return true;


  return false;

}


namespace {


// A memory location wrapper that represents a MemoryLocation, `MemLoc`,

// defined by `MemDef`.

struct MemoryLocationWrapper {

  MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,

                        bool DefByInitializesAttr)

      : MemLoc(MemLoc), MemDef(MemDef),

        DefByInitializesAttr(DefByInitializesAttr) {

    assert(MemLoc.Ptr && "MemLoc should be not null");

    UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);

    DefInst = MemDef->getMemoryInst();

  }


  MemoryLocation MemLoc;

  const Value *UnderlyingObject;

  MemoryDef *MemDef;

  Instruction *DefInst;

  bool DefByInitializesAttr = false;

};


// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)

// defined by this MemoryDef.

struct MemoryDefWrapper {

  MemoryDefWrapper(MemoryDef *MemDef,

                   ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {

    DefInst = MemDef->getMemoryInst();

    for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)

      DefinedLocations.push_back(

          MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));

  }

  Instruction *DefInst;

  SmallVector<MemoryLocationWrapper, 1> DefinedLocations;

};


struct ArgumentInitInfo {

  unsigned Idx;

  bool IsDeadOrInvisibleOnUnwind;

  ConstantRangeList Inits;

};

} // namespace


static bool hasInitializesAttr(Instruction *I) {

  CallBase *CB = dyn_cast<CallBase>(I);

  return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);

}


// Return the intersected range list of the initializes attributes of "Args".

// "Args" are call arguments that alias to each other.

// If any argument in "Args" doesn't have dead_on_unwind attr and

// "CallHasNoUnwindAttr" is false, return empty.

static ConstantRangeList


getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,

                            bool CallHasNoUnwindAttr) {

  if (Args.empty())

    return {};


  // To address unwind, the function should have nounwind attribute or the

  // arguments have dead or invisible on unwind. Otherwise, return empty.

  for (const auto &Arg : Args) {

    if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)

      return {};

    if (Arg.Inits.empty())

      return {};

  }


  ConstantRangeList IntersectedIntervals = Args.front().Inits;

  for (auto &Arg : Args.drop_front())

    IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);


  return IntersectedIntervals;

}


namespace {


struct DSEState {

  Function &F;

  AliasAnalysis &AA;

  EarliestEscapeAnalysis EA;


  /// The single BatchAA instance that is used to cache AA queries. It will

  /// not be invalidated over the whole run. This is safe, because:

  /// 1. Only memory writes are removed, so the alias cache for memory

  ///    locations remains valid.

  /// 2. No new instructions are added (only instructions removed), so cached

  ///    information for a deleted value cannot be accessed by a re-used new

  ///    value pointer.

  BatchAAResults BatchAA;


  MemorySSA &MSSA;

  DominatorTree &DT;

  PostDominatorTree &PDT;

  const TargetLibraryInfo &TLI;

  const DataLayout &DL;

  const LoopInfo &LI;


  // Whether the function contains any irreducible control flow, useful for

  // being accurately able to detect loops.

  bool ContainsIrreducibleLoops;


  // All MemoryDefs that potentially could kill other MemDefs.

  SmallVector<MemoryDef *, 64> MemDefs;

  // Any that should be skipped as they are already deleted

  SmallPtrSet<MemoryAccess *, 4> SkipStores;

  // Keep track whether a given object is captured before return or not.

  DenseMap<const Value *, bool> CapturedBeforeReturn;

  // Keep track of all of the objects that are invisible to the caller after

  // the function returns.

  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;

  // Keep track of blocks with throwing instructions not modeled in MemorySSA.

  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;

  // Post-order numbers for each basic block. Used to figure out if memory

  // accesses are executed before another access.

  DenseMap<BasicBlock *, unsigned> PostOrderNumbers;


  /// Keep track of instructions (partly) overlapping with killing MemoryDefs per

  /// basic block.

  MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;

  // Check if there are root nodes that are terminated by UnreachableInst.

  // Those roots pessimize post-dominance queries. If there are such roots,

  // fall back to CFG scan starting from all non-unreachable roots.

  bool AnyUnreachableExit;


  // Whether or not we should iterate on removing dead stores at the end of the

  // function due to removing a store causing a previously captured pointer to

  // no longer be captured.

  bool ShouldIterateEndOfFunctionDSE;


  /// Dead instructions to be removed at the end of DSE.

  SmallVector<Instruction *> ToRemove;


  // Class contains self-reference, make sure it's not copied/moved.

  DSEState(const DSEState &) = delete;

  DSEState &operator=(const DSEState &) = delete;


  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,

           PostDominatorTree &PDT, const TargetLibraryInfo &TLI,

           const LoopInfo &LI)

      : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),

        PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {

    // Collect blocks with throwing instructions not modeled in MemorySSA and

    // alloc-like objects.

    unsigned PO = 0;

    for (BasicBlock *BB : post_order(&F)) {

      PostOrderNumbers[BB] = PO++;

      for (Instruction &I : *BB) {

        MemoryAccess *MA = MSSA.getMemoryAccess(&I);

        if (I.mayThrow() && !MA)

          ThrowingBlocks.insert(I.getParent());


        auto *MD = dyn_cast_or_null<MemoryDef>(MA);

        if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&

            (getLocForWrite(&I) || isMemTerminatorInst(&I) ||

             (EnableInitializesImprovement && hasInitializesAttr(&I))))

          MemDefs.push_back(MD);

      }

    }


    // Treat byval, inalloca or dead on return arguments the same as Allocas,

    // stores to them are dead at the end of the function.

    for (Argument &AI : F.args())

      if (AI.hasPassPointeeByValueCopyAttr() || AI.hasDeadOnReturnAttr())

        InvisibleToCallerAfterRet.insert({&AI, true});


    // Collect whether there is any irreducible control flow in the function.

    ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);


    AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {

      return isa<UnreachableInst>(E->getTerminator());

    });

  }


  static void pushMemUses(MemoryAccess *Acc,

                          SmallVectorImpl<MemoryAccess *> &WorkList,

                          SmallPtrSetImpl<MemoryAccess *> &Visited) {

    for (Use &U : Acc->uses()) {

      auto *MA = cast<MemoryAccess>(U.getUser());

      if (Visited.insert(MA).second)

        WorkList.push_back(MA);

    }

  };


  LocationSize strengthenLocationSize(const Instruction *I,

                                      LocationSize Size) const {

    if (auto *CB = dyn_cast<CallBase>(I)) {

      LibFunc F;

      if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&

          (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {

        // Use the precise location size specified by the 3rd argument

        // for determining KillingI overwrites DeadLoc if it is a memset_chk

        // instruction. memset_chk will write either the amount specified as 3rd

        // argument or the function will immediately abort and exit the program.

        // NOTE: AA may determine NoAlias if it can prove that the access size

        // is larger than the allocation size due to that being UB. To avoid

        // returning potentially invalid NoAlias results by AA, limit the use of

        // the precise location size to isOverwrite.

        if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))

          return LocationSize::precise(Len->getZExtValue());

      }

    }

    return Size;

  }


  /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p

  /// KillingI instruction) completely overwrites a store to the 'DeadLoc'

  /// location (by \p DeadI instruction).

  /// Return OW_MaybePartial if \p KillingI does not completely overwrite

  /// \p DeadI, but they both write to the same underlying object. In that

  /// case, use isPartialOverwrite to check if \p KillingI partially overwrites

  /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the

  /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.

  OverwriteResult isOverwrite(const Instruction *KillingI,

                              const Instruction *DeadI,

                              const MemoryLocation &KillingLoc,

                              const MemoryLocation &DeadLoc,

                              int64_t &KillingOff, int64_t &DeadOff) {

    // AliasAnalysis does not always account for loops. Limit overwrite checks

    // to dependencies for which we can guarantee they are independent of any

    // loops they are in.

    if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))

      return OW_Unknown;


    LocationSize KillingLocSize =

        strengthenLocationSize(KillingI, KillingLoc.Size);

    const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();

    const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();

    const Value *DeadUndObj = getUnderlyingObject(DeadPtr);

    const Value *KillingUndObj = getUnderlyingObject(KillingPtr);


    // Check whether the killing store overwrites the whole object, in which

    // case the size/offset of the dead store does not matter.

    if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&

        isIdentifiedObject(KillingUndObj)) {

      std::optional<TypeSize> KillingUndObjSize =

          getPointerSize(KillingUndObj, DL, TLI, &F);

      if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())

        return OW_Complete;

    }


    // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll

    // get imprecise values here, though (except for unknown sizes).

    if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {

      // In case no constant size is known, try to an IR values for the number

      // of bytes written and check if they match.

      const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);

      const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);

      if (KillingMemI && DeadMemI) {

        const Value *KillingV = KillingMemI->getLength();

        const Value *DeadV = DeadMemI->getLength();

        if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))

          return OW_Complete;

      }


      // Masked stores have imprecise locations, but we can reason about them

      // to some extent.

      return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);

    }


    const TypeSize KillingSize = KillingLocSize.getValue();

    const TypeSize DeadSize = DeadLoc.Size.getValue();

    // Bail on doing Size comparison which depends on AA for now

    // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors

    const bool AnyScalable =

        DeadSize.isScalable() || KillingLocSize.isScalable();


    if (AnyScalable)

      return OW_Unknown;

    // Query the alias information

    AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);


    // If the start pointers are the same, we just have to compare sizes to see if

    // the killing store was larger than the dead store.

    if (AAR == AliasResult::MustAlias) {

      // Make sure that the KillingSize size is >= the DeadSize size.

      if (KillingSize >= DeadSize)

        return OW_Complete;

    }


    // If we hit a partial alias we may have a full overwrite

    if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {

      int32_t Off = AAR.getOffset();

      if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)

        return OW_Complete;

    }


    // If we can't resolve the same pointers to the same object, then we can't

    // analyze them at all.

    if (DeadUndObj != KillingUndObj) {

      // Non aliasing stores to different objects don't overlap. Note that

      // if the killing store is known to overwrite whole object (out of

      // bounds access overwrites whole object as well) then it is assumed to

      // completely overwrite any store to the same object even if they don't

      // actually alias (see next check).

      if (AAR == AliasResult::NoAlias)

        return OW_None;

      return OW_Unknown;

    }


    // Okay, we have stores to two completely different pointers.  Try to

    // decompose the pointer into a "base + constant_offset" form.  If the base

    // pointers are equal, then we can reason about the two stores.

    DeadOff = 0;

    KillingOff = 0;

    const Value *DeadBasePtr =

        GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);

    const Value *KillingBasePtr =

        GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);


    // If the base pointers still differ, we have two completely different

    // stores.

    if (DeadBasePtr != KillingBasePtr)

      return OW_Unknown;


    // The killing access completely overlaps the dead store if and only if

    // both start and end of the dead one is "inside" the killing one:

    //    |<->|--dead--|<->|

    //    |-----killing------|

    // Accesses may overlap if and only if start of one of them is "inside"

    // another one:

    //    |<->|--dead--|<-------->|

    //    |-------killing--------|

    //           OR

    //    |-------dead-------|

    //    |<->|---killing---|<----->|

    //

    // We have to be careful here as *Off is signed while *.Size is unsigned.


    // Check if the dead access starts "not before" the killing one.

    if (DeadOff >= KillingOff) {

      // If the dead access ends "not after" the killing access then the

      // dead one is completely overwritten by the killing one.

      if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)

        return OW_Complete;

      // If start of the dead access is "before" end of the killing access

      // then accesses overlap.

      else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)

        return OW_MaybePartial;

    }

    // If start of the killing access is "before" end of the dead access then

    // accesses overlap.

    else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {

      return OW_MaybePartial;

    }


    // Can reach here only if accesses are known not to overlap.

    return OW_None;

  }


  bool isInvisibleToCallerAfterRet(const Value *V) {

    if (isa<AllocaInst>(V))

      return true;


    auto I = InvisibleToCallerAfterRet.insert({V, false});

    if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))

      I.first->second = capturesNothing(PointerMayBeCaptured(

          V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));

    return I.first->second;

  }


  bool isInvisibleToCallerOnUnwind(const Value *V) {

    bool RequiresNoCaptureBeforeUnwind;

    if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))

      return false;

    if (!RequiresNoCaptureBeforeUnwind)

      return true;


    auto I = CapturedBeforeReturn.insert({V, true});

    if (I.second)

      // NOTE: This could be made more precise by PointerMayBeCapturedBefore

      // with the killing MemoryDef. But we refrain from doing so for now to

      // limit compile-time and this does not cause any changes to the number

      // of stores removed on a large test set in practice.

      I.first->second = capturesAnything(PointerMayBeCaptured(

          V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));

    return !I.first->second;

  }


  std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {

    if (!I->mayWriteToMemory())

      return std::nullopt;


    if (auto *CB = dyn_cast<CallBase>(I))

      return MemoryLocation::getForDest(CB, TLI);


    return MemoryLocation::getOrNone(I);

  }


  // Returns a list of <MemoryLocation, bool> pairs written by I.

  // The bool means whether the write is from Initializes attr.

  SmallVector<std::pair<MemoryLocation, bool>, 1>

  getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {

    SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;

    if (isMemTerminatorInst(I)) {

      if (auto Loc = getLocForTerminator(I))

        Locations.push_back(std::make_pair(Loc->first, false));

      return Locations;

    }


    if (auto Loc = getLocForWrite(I))

      Locations.push_back(std::make_pair(*Loc, false));


    if (ConsiderInitializesAttr) {

      for (auto &MemLoc : getInitializesArgMemLoc(I)) {

        Locations.push_back(std::make_pair(MemLoc, true));

      }

    }

    return Locations;

  }


  /// Assuming this instruction has a dead analyzable write, can we delete

  /// this instruction?

  bool isRemovable(Instruction *I) {

    assert(getLocForWrite(I) && "Must have analyzable write");


    // Don't remove volatile/atomic stores.

    if (StoreInst *SI = dyn_cast<StoreInst>(I))

      return SI->isUnordered();


    if (auto *CB = dyn_cast<CallBase>(I)) {

      // Don't remove volatile memory intrinsics.

      if (auto *MI = dyn_cast<MemIntrinsic>(CB))

        return !MI->isVolatile();


      // Never remove dead lifetime intrinsics, e.g. because they are followed

      // by a free.

      if (CB->isLifetimeStartOrEnd())

        return false;


      return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&

             !CB->isTerminator();

    }


    return false;

  }


  /// Returns true if \p UseInst completely overwrites \p DefLoc

  /// (stored by \p DefInst).

  bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,

                           Instruction *UseInst) {

    // UseInst has a MemoryDef associated in MemorySSA. It's possible for a

    // MemoryDef to not write to memory, e.g. a volatile load is modeled as a

    // MemoryDef.

    if (!UseInst->mayWriteToMemory())

      return false;


    if (auto *CB = dyn_cast<CallBase>(UseInst))

      if (CB->onlyAccessesInaccessibleMemory())

        return false;


    int64_t InstWriteOffset, DepWriteOffset;

    if (auto CC = getLocForWrite(UseInst))

      return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,

                         DepWriteOffset) == OW_Complete;

    return false;

  }


  /// Returns true if \p Def is not read before returning from the function.

  bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {

    LLVM_DEBUG(dbgs() << "  Check if def " << *Def << " ("

                      << *Def->getMemoryInst()

                      << ") is at the end the function \n");

    SmallVector<MemoryAccess *, 4> WorkList;

    SmallPtrSet<MemoryAccess *, 8> Visited;


    pushMemUses(Def, WorkList, Visited);

    for (unsigned I = 0; I < WorkList.size(); I++) {

      if (WorkList.size() >= MemorySSAScanLimit) {

        LLVM_DEBUG(dbgs() << "  ... hit exploration limit.\n");

        return false;

      }


      MemoryAccess *UseAccess = WorkList[I];

      if (isa<MemoryPhi>(UseAccess)) {

        // AliasAnalysis does not account for loops. Limit elimination to

        // candidates for which we can guarantee they always store to the same

        // memory location.

        if (!isGuaranteedLoopInvariant(DefLoc.Ptr))

          return false;


        pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);

        continue;

      }

      // TODO: Checking for aliasing is expensive. Consider reducing the amount

      // of times this is called and/or caching it.

      Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();

      if (isReadClobber(DefLoc, UseInst)) {

        LLVM_DEBUG(dbgs() << "  ... hit read clobber " << *UseInst << ".\n");

        return false;

      }


      if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))

        pushMemUses(UseDef, WorkList, Visited);

    }

    return true;

  }


  /// If \p I is a memory  terminator like llvm.lifetime.end or free, return a

  /// pair with the MemoryLocation terminated by \p I and a boolean flag

  /// indicating whether \p I is a free-like call.

  std::optional<std::pair<MemoryLocation, bool>>

  getLocForTerminator(Instruction *I) const {

    if (auto *CB = dyn_cast<CallBase>(I)) {

      if (CB->getIntrinsicID() == Intrinsic::lifetime_end)

        return {

            std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};

      if (Value *FreedOp = getFreedOperand(CB, &TLI))

        return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};

    }


    return std::nullopt;

  }


  /// Returns true if \p I is a memory terminator instruction like

  /// llvm.lifetime.end or free.

  bool isMemTerminatorInst(Instruction *I) const {

    auto *CB = dyn_cast<CallBase>(I);

    return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||

                  getFreedOperand(CB, &TLI) != nullptr);

  }


  /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from

  /// instruction \p AccessI.

  bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,

                       Instruction *MaybeTerm) {

    std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =

        getLocForTerminator(MaybeTerm);


    if (!MaybeTermLoc)

      return false;


    // If the terminator is a free-like call, all accesses to the underlying

    // object can be considered terminated.

    if (getUnderlyingObject(Loc.Ptr) !=

        getUnderlyingObject(MaybeTermLoc->first.Ptr))

      return false;


    auto TermLoc = MaybeTermLoc->first;

    if (MaybeTermLoc->second) {

      const Value *LocUO = getUnderlyingObject(Loc.Ptr);

      return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);

    }

    int64_t InstWriteOffset = 0;

    int64_t DepWriteOffset = 0;

    return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,

                       DepWriteOffset) == OW_Complete;

  }


  // Returns true if \p Use may read from \p DefLoc.

  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {

    if (isNoopIntrinsic(UseInst))

      return false;


    // Monotonic or weaker atomic stores can be re-ordered and do not need to be

    // treated as read clobber.

    if (auto SI = dyn_cast<StoreInst>(UseInst))

      return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);


    if (!UseInst->mayReadFromMemory())

      return false;


    if (auto *CB = dyn_cast<CallBase>(UseInst))

      if (CB->onlyAccessesInaccessibleMemory())

        return false;


    return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));

  }


  /// Returns true if a dependency between \p Current and \p KillingDef is

  /// guaranteed to be loop invariant for the loops that they are in. Either

  /// because they are known to be in the same block, in the same loop level or

  /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation

  /// during execution of the containing function.

  bool isGuaranteedLoopIndependent(const Instruction *Current,

                                   const Instruction *KillingDef,

                                   const MemoryLocation &CurrentLoc) {

    // If the dependency is within the same block or loop level (being careful

    // of irreducible loops), we know that AA will return a valid result for the

    // memory dependency. (Both at the function level, outside of any loop,

    // would also be valid but we currently disable that to limit compile time).

    if (Current->getParent() == KillingDef->getParent())

      return true;

    const Loop *CurrentLI = LI.getLoopFor(Current->getParent());

    if (!ContainsIrreducibleLoops && CurrentLI &&

        CurrentLI == LI.getLoopFor(KillingDef->getParent()))

      return true;

    // Otherwise check the memory location is invariant to any loops.

    return isGuaranteedLoopInvariant(CurrentLoc.Ptr);

  }


  /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible

  /// loop. In particular, this guarantees that it only references a single

  /// MemoryLocation during execution of the containing function.

  bool isGuaranteedLoopInvariant(const Value *Ptr) {

    Ptr = Ptr->stripPointerCasts();

    if (auto *GEP = dyn_cast<GEPOperator>(Ptr))

      if (GEP->hasAllConstantIndices())

        Ptr = GEP->getPointerOperand()->stripPointerCasts();


    if (auto *I = dyn_cast<Instruction>(Ptr)) {

      return I->getParent()->isEntryBlock() ||

             (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));

    }

    return true;

  }


  // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,

  // with no read access between them or on any other path to a function exit

  // block if \p KillingLoc is not accessible after the function returns. If

  // there is no such MemoryDef, return std::nullopt. The returned value may not

  // (completely) overwrite \p KillingLoc. Currently we bail out when we

  // encounter an aliasing MemoryUse (read).

  std::optional<MemoryAccess *>

  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,

                  const MemoryLocation &KillingLoc, const Value *KillingUndObj,

                  unsigned &ScanLimit, unsigned &WalkerStepLimit,

                  bool IsMemTerm, unsigned &PartialLimit,

                  bool IsInitializesAttrMemLoc) {

    if (ScanLimit == 0 || WalkerStepLimit == 0) {

      LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");

      return std::nullopt;

    }


    MemoryAccess *Current = StartAccess;

    Instruction *KillingI = KillingDef->getMemoryInst();

    LLVM_DEBUG(dbgs() << "  trying to get dominating access\n");


    // Only optimize defining access of KillingDef when directly starting at its

    // defining access. The defining access also must only access KillingLoc. At

    // the moment we only support instructions with a single write location, so

    // it should be sufficient to disable optimizations for instructions that

    // also read from memory.

    bool CanOptimize = OptimizeMemorySSA &&

                       KillingDef->getDefiningAccess() == StartAccess &&

                       !KillingI->mayReadFromMemory();


    // Find the next clobbering Mod access for DefLoc, starting at StartAccess.

    std::optional<MemoryLocation> CurrentLoc;

    for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {

      LLVM_DEBUG({

        dbgs() << "   visiting " << *Current;

        if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))

          dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()

                 << ")";

        dbgs() << "\n";

      });


      // Reached TOP.

      if (MSSA.isLiveOnEntryDef(Current)) {

        LLVM_DEBUG(dbgs() << "   ...  found LiveOnEntryDef\n");

        if (CanOptimize && Current != KillingDef->getDefiningAccess())

          // The first clobbering def is... none.

          KillingDef->setOptimized(Current);

        return std::nullopt;

      }


      // Cost of a step. Accesses in the same block are more likely to be valid

      // candidates for elimination, hence consider them cheaper.

      unsigned StepCost = KillingDef->getBlock() == Current->getBlock()

                              ? MemorySSASameBBStepCost

                              : MemorySSAOtherBBStepCost;

      if (WalkerStepLimit <= StepCost) {

        LLVM_DEBUG(dbgs() << "   ...  hit walker step limit\n");

        return std::nullopt;

      }

      WalkerStepLimit -= StepCost;


      // Return for MemoryPhis. They cannot be eliminated directly and the

      // caller is responsible for traversing them.

      if (isa<MemoryPhi>(Current)) {

        LLVM_DEBUG(dbgs() << "   ...  found MemoryPhi\n");

        return Current;

      }


      // Below, check if CurrentDef is a valid candidate to be eliminated by

      // KillingDef. If it is not, check the next candidate.

      MemoryDef *CurrentDef = cast<MemoryDef>(Current);

      Instruction *CurrentI = CurrentDef->getMemoryInst();


      if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {

        CanOptimize = false;

        continue;

      }


      // Before we try to remove anything, check for any extra throwing

      // instructions that block us from DSEing

      if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {

        LLVM_DEBUG(dbgs() << "  ... skip, may throw!\n");

        return std::nullopt;

      }


      // Check for anything that looks like it will be a barrier to further

      // removal

      if (isDSEBarrier(KillingUndObj, CurrentI)) {

        LLVM_DEBUG(dbgs() << "  ... skip, barrier\n");

        return std::nullopt;

      }


      // If Current is known to be on path that reads DefLoc or is a read

      // clobber, bail out, as the path is not profitable. We skip this check

      // for intrinsic calls, because the code knows how to handle memcpy

      // intrinsics.

      if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))

        return std::nullopt;


      // Quick check if there are direct uses that are read-clobbers.

      if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {

            if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))

              return !MSSA.dominates(StartAccess, UseOrDef) &&

                     isReadClobber(KillingLoc, UseOrDef->getMemoryInst());

            return false;

          })) {

        LLVM_DEBUG(dbgs() << "   ...  found a read clobber\n");

        return std::nullopt;

      }


      // If Current does not have an analyzable write location or is not

      // removable, skip it.

      CurrentLoc = getLocForWrite(CurrentI);

      if (!CurrentLoc || !isRemovable(CurrentI)) {

        CanOptimize = false;

        continue;

      }


      // AliasAnalysis does not account for loops. Limit elimination to

      // candidates for which we can guarantee they always store to the same

      // memory location and not located in different loops.

      if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {

        LLVM_DEBUG(dbgs() << "  ... not guaranteed loop independent\n");

        CanOptimize = false;

        continue;

      }


      if (IsMemTerm) {

        // If the killing def is a memory terminator (e.g. lifetime.end), check

        // the next candidate if the current Current does not write the same

        // underlying object as the terminator.

        if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {

          CanOptimize = false;

          continue;

        }

      } else {

        int64_t KillingOffset = 0;

        int64_t DeadOffset = 0;

        auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,

                              KillingOffset, DeadOffset);

        if (CanOptimize) {

          // CurrentDef is the earliest write clobber of KillingDef. Use it as

          // optimized access. Do not optimize if CurrentDef is already the

          // defining access of KillingDef.

          if (CurrentDef != KillingDef->getDefiningAccess() &&

              (OR == OW_Complete || OR == OW_MaybePartial))

            KillingDef->setOptimized(CurrentDef);


          // Once a may-aliasing def is encountered do not set an optimized

          // access.

          if (OR != OW_None)

            CanOptimize = false;

        }


        // If Current does not write to the same object as KillingDef, check

        // the next candidate.

        if (OR == OW_Unknown || OR == OW_None)

          continue;

        else if (OR == OW_MaybePartial) {

          // If KillingDef only partially overwrites Current, check the next

          // candidate if the partial step limit is exceeded. This aggressively

          // limits the number of candidates for partial store elimination,

          // which are less likely to be removable in the end.

          if (PartialLimit <= 1) {

            WalkerStepLimit -= 1;

            LLVM_DEBUG(dbgs() << "   ... reached partial limit ... continue with next access\n");

            continue;

          }

          PartialLimit -= 1;

        }

      }

      break;

    };


    // Accesses to objects accessible after the function returns can only be

    // eliminated if the access is dead along all paths to the exit. Collect

    // the blocks with killing (=completely overwriting MemoryDefs) and check if

    // they cover all paths from MaybeDeadAccess to any function exit.

    SmallPtrSet<Instruction *, 16> KillingDefs;

    KillingDefs.insert(KillingDef->getMemoryInst());

    MemoryAccess *MaybeDeadAccess = Current;

    MemoryLocation MaybeDeadLoc = *CurrentLoc;

    Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();

    LLVM_DEBUG(dbgs() << "  Checking for reads of " << *MaybeDeadAccess << " ("

                      << *MaybeDeadI << ")\n");


    SmallVector<MemoryAccess *, 32> WorkList;

    SmallPtrSet<MemoryAccess *, 32> Visited;

    pushMemUses(MaybeDeadAccess, WorkList, Visited);


    // Check if DeadDef may be read.

    for (unsigned I = 0; I < WorkList.size(); I++) {

      MemoryAccess *UseAccess = WorkList[I];


      LLVM_DEBUG(dbgs() << "   " << *UseAccess);

      // Bail out if the number of accesses to check exceeds the scan limit.

      if (ScanLimit < (WorkList.size() - I)) {

        LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");

        return std::nullopt;

      }

      --ScanLimit;

      NumDomMemDefChecks++;


      if (isa<MemoryPhi>(UseAccess)) {

        if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {

              return DT.properlyDominates(KI->getParent(),

                                          UseAccess->getBlock());

            })) {

          LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");

          continue;

        }

        LLVM_DEBUG(dbgs() << "\n    ... adding PHI uses\n");

        pushMemUses(UseAccess, WorkList, Visited);

        continue;

      }


      Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();

      LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");


      if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {

            return DT.dominates(KI, UseInst);

          })) {

        LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");

        continue;

      }


      // A memory terminator kills all preceeding MemoryDefs and all succeeding

      // MemoryAccesses. We do not have to check it's users.

      if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {

        LLVM_DEBUG(

            dbgs()

            << " ... skipping, memterminator invalidates following accesses\n");

        continue;

      }


      if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {

        LLVM_DEBUG(dbgs() << "    ... adding uses of intrinsic\n");

        pushMemUses(UseAccess, WorkList, Visited);

        continue;

      }


      if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {

        LLVM_DEBUG(dbgs() << "  ... found throwing instruction\n");

        return std::nullopt;

      }


      // Uses which may read the original MemoryDef mean we cannot eliminate the

      // original MD. Stop walk.

      // If KillingDef is a CallInst with "initializes" attribute, the reads in

      // the callee would be dominated by initializations, so it should be safe.

      bool IsKillingDefFromInitAttr = false;

      if (IsInitializesAttrMemLoc) {

        if (KillingI == UseInst &&

            KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))

          IsKillingDefFromInitAttr = true;

      }


      if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {

        LLVM_DEBUG(dbgs() << "    ... found read clobber\n");

        return std::nullopt;

      }


      // If this worklist walks back to the original memory access (and the

      // pointer is not guarenteed loop invariant) then we cannot assume that a

      // store kills itself.

      if (MaybeDeadAccess == UseAccess &&

          !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {

        LLVM_DEBUG(dbgs() << "    ... found not loop invariant self access\n");

        return std::nullopt;

      }

      // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check

      // if it reads the memory location.

      // TODO: It would probably be better to check for self-reads before

      // calling the function.

      if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {

        LLVM_DEBUG(dbgs() << "    ... skipping killing def/dom access\n");

        continue;

      }


      // Check all uses for MemoryDefs, except for defs completely overwriting

      // the original location. Otherwise we have to check uses of *all*

      // MemoryDefs we discover, including non-aliasing ones. Otherwise we might

      // miss cases like the following

      //   1 = Def(LoE) ; <----- DeadDef stores [0,1]

      //   2 = Def(1)   ; (2, 1) = NoAlias,   stores [2,3]

      //   Use(2)       ; MayAlias 2 *and* 1, loads [0, 3].

      //                  (The Use points to the *first* Def it may alias)

      //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,

      //                  stores [0,1]

      if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {

        if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {

          BasicBlock *MaybeKillingBlock = UseInst->getParent();

          if (PostOrderNumbers.find(MaybeKillingBlock)->second <

              PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {

            if (!isInvisibleToCallerAfterRet(KillingUndObj)) {

              LLVM_DEBUG(dbgs()

                         << "    ... found killing def " << *UseInst << "\n");

              KillingDefs.insert(UseInst);

            }

          } else {

            LLVM_DEBUG(dbgs()

                       << "    ... found preceeding def " << *UseInst << "\n");

            return std::nullopt;

          }

        } else

          pushMemUses(UseDef, WorkList, Visited);

      }

    }


    // For accesses to locations visible after the function returns, make sure

    // that the location is dead (=overwritten) along all paths from

    // MaybeDeadAccess to the exit.

    if (!isInvisibleToCallerAfterRet(KillingUndObj)) {

      SmallPtrSet<BasicBlock *, 16> KillingBlocks;

      for (Instruction *KD : KillingDefs)

        KillingBlocks.insert(KD->getParent());

      assert(!KillingBlocks.empty() &&

             "Expected at least a single killing block");


      // Find the common post-dominator of all killing blocks.

      BasicBlock *CommonPred = *KillingBlocks.begin();

      for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {

        if (!CommonPred)

          break;

        CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);

      }


      // If the common post-dominator does not post-dominate MaybeDeadAccess,

      // there is a path from MaybeDeadAccess to an exit not going through a

      // killing block.

      if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {

        if (!AnyUnreachableExit)

          return std::nullopt;


        // Fall back to CFG scan starting at all non-unreachable roots if not

        // all paths to the exit go through CommonPred.

        CommonPred = nullptr;

      }


      // If CommonPred itself is in the set of killing blocks, we're done.

      if (KillingBlocks.count(CommonPred))

        return {MaybeDeadAccess};


      SetVector<BasicBlock *> WorkList;

      // If CommonPred is null, there are multiple exits from the function.

      // They all have to be added to the worklist.

      if (CommonPred)

        WorkList.insert(CommonPred);

      else

        for (BasicBlock *R : PDT.roots()) {

          if (!isa<UnreachableInst>(R->getTerminator()))

            WorkList.insert(R);

        }


      NumCFGTries++;

      // Check if all paths starting from an exit node go through one of the

      // killing blocks before reaching MaybeDeadAccess.

      for (unsigned I = 0; I < WorkList.size(); I++) {

        NumCFGChecks++;

        BasicBlock *Current = WorkList[I];

        if (KillingBlocks.count(Current))

          continue;

        if (Current == MaybeDeadAccess->getBlock())

          return std::nullopt;


        // MaybeDeadAccess is reachable from the entry, so we don't have to

        // explore unreachable blocks further.

        if (!DT.isReachableFromEntry(Current))

          continue;


        WorkList.insert_range(predecessors(Current));


        if (WorkList.size() >= MemorySSAPathCheckLimit)

          return std::nullopt;

      }

      NumCFGSuccess++;

    }


    // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is

    // potentially dead.

    return {MaybeDeadAccess};

  }


  /// Delete dead memory defs and recursively add their operands to ToRemove if

  /// they became dead.

  void

  deleteDeadInstruction(Instruction *SI,

                        SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr) {

    MemorySSAUpdater Updater(&MSSA);

    SmallVector<Instruction *, 32> NowDeadInsts;

    NowDeadInsts.push_back(SI);

    --NumFastOther;


    while (!NowDeadInsts.empty()) {

      Instruction *DeadInst = NowDeadInsts.pop_back_val();

      ++NumFastOther;


      // Try to preserve debug information attached to the dead instruction.

      salvageDebugInfo(*DeadInst);

      salvageKnowledge(DeadInst);


      // Remove the Instruction from MSSA.

      MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);

      bool IsMemDef = MA && isa<MemoryDef>(MA);

      if (MA) {

        if (IsMemDef) {

          auto *MD = cast<MemoryDef>(MA);

          SkipStores.insert(MD);

          if (Deleted)

            Deleted->insert(MD);

          if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {

            if (SI->getValueOperand()->getType()->isPointerTy()) {

              const Value *UO = getUnderlyingObject(SI->getValueOperand());

              if (CapturedBeforeReturn.erase(UO))

                ShouldIterateEndOfFunctionDSE = true;

              InvisibleToCallerAfterRet.erase(UO);

            }

          }

        }


        Updater.removeMemoryAccess(MA);

      }


      auto I = IOLs.find(DeadInst->getParent());

      if (I != IOLs.end())

        I->second.erase(DeadInst);

      // Remove its operands

      for (Use &O : DeadInst->operands())

        if (Instruction *OpI = dyn_cast<Instruction>(O)) {

          O.set(PoisonValue::get(O->getType()));

          if (isInstructionTriviallyDead(OpI, &TLI))

            NowDeadInsts.push_back(OpI);

        }


      EA.removeInstruction(DeadInst);

      // Remove memory defs directly if they don't produce results, but only

      // queue other dead instructions for later removal. They may have been

      // used as memory locations that have been cached by BatchAA. Removing

      // them here may lead to newly created instructions to be allocated at the

      // same address, yielding stale cache entries.

      if (IsMemDef && DeadInst->getType()->isVoidTy())

        DeadInst->eraseFromParent();

      else

        ToRemove.push_back(DeadInst);

    }

  }


  // Check for any extra throws between \p KillingI and \p DeadI that block

  // DSE.  This only checks extra maythrows (those that aren't MemoryDef's).

  // MemoryDef that may throw are handled during the walk from one def to the

  // next.

  bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,

                       const Value *KillingUndObj) {

    // First see if we can ignore it by using the fact that KillingI is an

    // alloca/alloca like object that is not visible to the caller during

    // execution of the function.

    if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))

      return false;


    if (KillingI->getParent() == DeadI->getParent())

      return ThrowingBlocks.count(KillingI->getParent());

    return !ThrowingBlocks.empty();

  }


  // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following

  // instructions act as barriers:

  //  * A memory instruction that may throw and \p KillingI accesses a non-stack

  //  object.

  //  * Atomic stores stronger that monotonic.

  bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {

    // If DeadI may throw it acts as a barrier, unless we are to an

    // alloca/alloca like object that does not escape.

    if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))

      return true;


    // If DeadI is an atomic load/store stronger than monotonic, do not try to

    // eliminate/reorder it.

    if (DeadI->isAtomic()) {

      if (auto *LI = dyn_cast<LoadInst>(DeadI))

        return isStrongerThanMonotonic(LI->getOrdering());

      if (auto *SI = dyn_cast<StoreInst>(DeadI))

        return isStrongerThanMonotonic(SI->getOrdering());

      if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))

        return isStrongerThanMonotonic(ARMW->getOrdering());

      if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))

        return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||

               isStrongerThanMonotonic(CmpXchg->getFailureOrdering());

      llvm_unreachable("other instructions should be skipped in MemorySSA");

    }

    return false;

  }


  /// Eliminate writes to objects that are not visible in the caller and are not

  /// accessed before returning from the function.

  bool eliminateDeadWritesAtEndOfFunction() {

    bool MadeChange = false;

    LLVM_DEBUG(

        dbgs()

        << "Trying to eliminate MemoryDefs at the end of the function\n");

    do {

      ShouldIterateEndOfFunctionDSE = false;

      for (MemoryDef *Def : llvm::reverse(MemDefs)) {

        if (SkipStores.contains(Def))

          continue;


        Instruction *DefI = Def->getMemoryInst();

        auto DefLoc = getLocForWrite(DefI);

        if (!DefLoc || !isRemovable(DefI)) {

          LLVM_DEBUG(dbgs() << "  ... could not get location for write or "

                               "instruction not removable.\n");

          continue;

        }


        // NOTE: Currently eliminating writes at the end of a function is

        // limited to MemoryDefs with a single underlying object, to save

        // compile-time. In practice it appears the case with multiple

        // underlying objects is very uncommon. If it turns out to be important,

        // we can use getUnderlyingObjects here instead.

        const Value *UO = getUnderlyingObject(DefLoc->Ptr);

        if (!isInvisibleToCallerAfterRet(UO))

          continue;


        if (isWriteAtEndOfFunction(Def, *DefLoc)) {

          // See through pointer-to-pointer bitcasts

          LLVM_DEBUG(dbgs() << "   ... MemoryDef is not accessed until the end "

                               "of the function\n");

          deleteDeadInstruction(DefI);

          ++NumFastStores;

          MadeChange = true;

        }

      }

    } while (ShouldIterateEndOfFunctionDSE);

    return MadeChange;

  }


  /// If we have a zero initializing memset following a call to malloc,

  /// try folding it into a call to calloc.

  bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {

    Instruction *DefI = Def->getMemoryInst();

    MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);

    if (!MemSet)

      // TODO: Could handle zero store to small allocation as well.

      return false;

    Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());

    if (!StoredConstant || !StoredConstant->isNullValue())

      return false;


    if (!isRemovable(DefI))

      // The memset might be volatile..

      return false;


    if (F.hasFnAttribute(Attribute::SanitizeMemory) ||

        F.hasFnAttribute(Attribute::SanitizeAddress) ||

        F.hasFnAttribute(Attribute::SanitizeHWAddress) ||

        F.getName() == "calloc")

      return false;

    auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));

    if (!Malloc)

      return false;

    auto *InnerCallee = Malloc->getCalledFunction();

    if (!InnerCallee)

      return false;

    LibFunc Func = NotLibFunc;

    StringRef ZeroedVariantName;

    if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||

        Func != LibFunc_malloc) {

      Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");

      if (!Attr.isValid())

        return false;

      ZeroedVariantName = Attr.getValueAsString();

      if (ZeroedVariantName.empty())

        return false;

    }


    // Gracefully handle malloc with unexpected memory attributes.

    auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));

    if (!MallocDef)

      return false;


    auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {

      // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end

      // of malloc block

      auto *MallocBB = Malloc->getParent(),

        *MemsetBB = Memset->getParent();

      if (MallocBB == MemsetBB)

        return true;

      auto *Ptr = Memset->getArgOperand(0);

      auto *TI = MallocBB->getTerminator();

      BasicBlock *TrueBB, *FalseBB;

      if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),

                                         m_Zero()),

                          TrueBB, FalseBB)))

        return false;

      if (MemsetBB != FalseBB)

        return false;

      return true;

    };


    if (Malloc->getOperand(0) != MemSet->getLength())

      return false;

    if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||

        !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))

      return false;

    IRBuilder<> IRB(Malloc);

    assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());

    Value *Calloc = nullptr;

    if (!ZeroedVariantName.empty()) {

      LLVMContext &Ctx = Malloc->getContext();

      AttributeList Attrs = InnerCallee->getAttributes();

      AllocFnKind AllocKind =

          Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |

          AllocFnKind::Zeroed;

      AllocKind &= ~AllocFnKind::Uninitialized;

      Attrs =

          Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))

              .removeFnAttribute(Ctx, "alloc-variant-zeroed");

      FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(

          ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);

      SmallVector<Value *, 3> Args;

      Args.append(Malloc->arg_begin(), Malloc->arg_end());

      Calloc = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);

    } else {

      Type *SizeTTy = Malloc->getArgOperand(0)->getType();

      Calloc =

          emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),

                     IRB, TLI, Malloc->getType()->getPointerAddressSpace());

    }

    if (!Calloc)

      return false;


    MemorySSAUpdater Updater(&MSSA);

    auto *NewAccess =

      Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,

                                      MallocDef);

    auto *NewAccessMD = cast<MemoryDef>(NewAccess);

    Updater.insertDef(NewAccessMD, /*RenameUses=*/true);

    Malloc->replaceAllUsesWith(Calloc);

    deleteDeadInstruction(Malloc);

    return true;

  }


  // Check if there is a dominating condition, that implies that the value

  // being stored in a ptr is already present in the ptr.

  bool dominatingConditionImpliesValue(MemoryDef *Def) {

    auto *StoreI = cast<StoreInst>(Def->getMemoryInst());

    BasicBlock *StoreBB = StoreI->getParent();

    Value *StorePtr = StoreI->getPointerOperand();

    Value *StoreVal = StoreI->getValueOperand();


    DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();

    if (!IDom)

      return false;


    auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());

    if (!BI || !BI->isConditional())

      return false;


    // In case both blocks are the same, it is not possible to determine

    // if optimization is possible. (We would not want to optimize a store

    // in the FalseBB if condition is true and vice versa.)

    if (BI->getSuccessor(0) == BI->getSuccessor(1))

      return false;


    Instruction *ICmpL;

    CmpPredicate Pred;

    if (!match(BI->getCondition(),

               m_c_ICmp(Pred,

                        m_CombineAnd(m_Load(m_Specific(StorePtr)),

                                     m_Instruction(ICmpL)),

                        m_Specific(StoreVal))) ||

        !ICmpInst::isEquality(Pred))

      return false;


    // In case the else blocks also branches to the if block or the other way

    // around it is not possible to determine if the optimization is possible.

    if (Pred == ICmpInst::ICMP_EQ &&

        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),

                      StoreBB))

      return false;


    if (Pred == ICmpInst::ICMP_NE &&

        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),

                      StoreBB))

      return false;


    MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);

    MemoryAccess *ClobAcc =

        MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);


    return MSSA.dominates(ClobAcc, LoadAcc);

  }


  /// \returns true if \p Def is a no-op store, either because it

  /// directly stores back a loaded value or stores zero to a calloced object.

  bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {

    Instruction *DefI = Def->getMemoryInst();

    StoreInst *Store = dyn_cast<StoreInst>(DefI);

    MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);

    Constant *StoredConstant = nullptr;

    if (Store)

      StoredConstant = dyn_cast<Constant>(Store->getOperand(0));

    else if (MemSet)

      StoredConstant = dyn_cast<Constant>(MemSet->getValue());

    else

      return false;


    if (!isRemovable(DefI))

      return false;


    if (StoredConstant) {

      Constant *InitC =

          getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());

      // If the clobbering access is LiveOnEntry, no instructions between them

      // can modify the memory location.

      if (InitC && InitC == StoredConstant)

        return MSSA.isLiveOnEntryDef(

            MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));

    }


    if (!Store)

      return false;


    if (dominatingConditionImpliesValue(Def))

      return true;


    if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {

      if (LoadI->getPointerOperand() == Store->getOperand(1)) {

        // Get the defining access for the load.

        auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();

        // Fast path: the defining accesses are the same.

        if (LoadAccess == Def->getDefiningAccess())

          return true;


        // Look through phi accesses. Recursively scan all phi accesses by

        // adding them to a worklist. Bail when we run into a memory def that

        // does not match LoadAccess.

        SetVector<MemoryAccess *> ToCheck;

        MemoryAccess *Current =

            MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);

        // We don't want to bail when we run into the store memory def. But,

        // the phi access may point to it. So, pretend like we've already

        // checked it.

        ToCheck.insert(Def);

        ToCheck.insert(Current);

        // Start at current (1) to simulate already having checked Def.

        for (unsigned I = 1; I < ToCheck.size(); ++I) {

          Current = ToCheck[I];

          if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {

            // Check all the operands.

            for (auto &Use : PhiAccess->incoming_values())

              ToCheck.insert(cast<MemoryAccess>(&Use));

            continue;

          }


          // If we found a memory def, bail. This happens when we have an

          // unrelated write in between an otherwise noop store.

          assert(isa<MemoryDef>(Current) &&

                 "Only MemoryDefs should reach here.");

          // TODO: Skip no alias MemoryDefs that have no aliasing reads.

          // We are searching for the definition of the store's destination.

          // So, if that is the same definition as the load, then this is a

          // noop. Otherwise, fail.

          if (LoadAccess != Current)

            return false;

        }

        return true;

      }

    }


    return false;

  }


  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {

    bool Changed = false;

    for (auto OI : IOL) {

      Instruction *DeadI = OI.first;

      MemoryLocation Loc = *getLocForWrite(DeadI);

      assert(isRemovable(DeadI) && "Expect only removable instruction");


      const Value *Ptr = Loc.Ptr->stripPointerCasts();

      int64_t DeadStart = 0;

      uint64_t DeadSize = Loc.Size.getValue();

      GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);

      OverlapIntervalsTy &IntervalMap = OI.second;

      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);

      if (IntervalMap.empty())

        continue;

      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);

    }

    return Changed;

  }


  /// Eliminates writes to locations where the value that is being written

  /// is already stored at the same location.

  bool eliminateRedundantStoresOfExistingValues() {

    bool MadeChange = false;

    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "

                         "already existing value\n");

    for (auto *Def : MemDefs) {

      if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))

        continue;


      Instruction *DefInst = Def->getMemoryInst();

      auto MaybeDefLoc = getLocForWrite(DefInst);

      if (!MaybeDefLoc || !isRemovable(DefInst))

        continue;


      MemoryDef *UpperDef;

      // To conserve compile-time, we avoid walking to the next clobbering def.

      // Instead, we just try to get the optimized access, if it exists. DSE

      // will try to optimize defs during the earlier traversal.

      if (Def->isOptimized())

        UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());

      else

        UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());

      if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))

        continue;


      Instruction *UpperInst = UpperDef->getMemoryInst();

      auto IsRedundantStore = [&]() {

        // We don't care about differences in call attributes here.

        if (DefInst->isIdenticalToWhenDefined(UpperInst,

                                              /*IntersectAttrs=*/true))

          return true;

        if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {

          if (auto *SI = dyn_cast<StoreInst>(DefInst)) {

            // MemSetInst must have a write location.

            auto UpperLoc = getLocForWrite(UpperInst);

            if (!UpperLoc)

              return false;

            int64_t InstWriteOffset = 0;

            int64_t DepWriteOffset = 0;

            auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,

                                  InstWriteOffset, DepWriteOffset);

            Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);

            return StoredByte && StoredByte == MemSetI->getOperand(1) &&

                   OR == OW_Complete;

          }

        }

        return false;

      };


      if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))

        continue;

      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: " << *DefInst

                        << '\n');

      deleteDeadInstruction(DefInst);

      NumRedundantStores++;

      MadeChange = true;

    }

    return MadeChange;

  }


  // Return the locations written by the initializes attribute.

  // Note that this function considers:

  // 1. Unwind edge: use "initializes" attribute only if the callee has

  //    "nounwind" attribute, or the argument has "dead_on_unwind" attribute,

  //    or the argument is invisible to caller on unwind. That is, we don't

  //    perform incorrect DSE on unwind edges in the current function.

  // 2. Argument alias: for aliasing arguments, the "initializes" attribute is

  //    the intersected range list of their "initializes" attributes.

  SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);


  // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are

  // killed by `KillingLocWrapper.MemDef`. Return whether

  // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.

  std::pair<bool, bool>

  eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);


  // Try to eliminate dead defs killed by `KillingDefWrapper` and return the

  // change state: whether make any change.

  bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);

};

} // namespace


// Return true if "Arg" is function local and isn't captured before "CB".


static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,

                                      EarliestEscapeAnalysis &EA) {

  const Value *UnderlyingObj = getUnderlyingObject(Arg);

  return isIdentifiedFunctionLocal(UnderlyingObj) &&

         capturesNothing(

             EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt*/ true));

}


SmallVector<MemoryLocation, 1>

DSEState::getInitializesArgMemLoc(const Instruction *I) {

  const CallBase *CB = dyn_cast<CallBase>(I);

  if (!CB)

    return {};


  // Collect aliasing arguments and their initializes ranges.

  SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;

  for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {

    Value *CurArg = CB->getArgOperand(Idx);

    if (!CurArg->getType()->isPointerTy())

      continue;


    ConstantRangeList Inits;

    Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);

    // initializes on byval arguments refers to the callee copy, not the

    // original memory the caller passed in.

    if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))

      Inits = InitializesAttr.getValueAsConstantRangeList();


    // Check whether "CurArg" could alias with global variables. We require

    // either it's function local and isn't captured before or the "CB" only

    // accesses arg or inaccessible mem.

    if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&

        !isFuncLocalAndNotCaptured(CurArg, CB, EA))

      Inits = ConstantRangeList();


    // We don't perform incorrect DSE on unwind edges in the current function,

    // and use the "initializes" attribute to kill dead stores if:

    // - The call does not throw exceptions, "CB->doesNotThrow()".

    // - Or the callee parameter has "dead_on_unwind" attribute.

    // - Or the argument is invisible to caller on unwind, and there are no

    //   unwind edges from this call in the current function (e.g. `CallInst`).

    bool IsDeadOrInvisibleOnUnwind =

        CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||

        (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));

    ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};

    bool FoundAliasing = false;

    for (auto &[Arg, AliasList] : Arguments) {

      auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),

                               MemoryLocation::getBeforeOrAfter(CurArg));

      if (AAR == AliasResult::NoAlias) {

        continue;

      } else if (AAR == AliasResult::MustAlias) {

        FoundAliasing = true;

        AliasList.push_back(InitInfo);

      } else {

        // For PartialAlias and MayAlias, there is an offset or may be an

        // unknown offset between the arguments and we insert an empty init

        // range to discard the entire initializes info while intersecting.

        FoundAliasing = true;

        AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,

                                             ConstantRangeList()});

      }

    }

    if (!FoundAliasing)

      Arguments[CurArg] = {InitInfo};

  }


  SmallVector<MemoryLocation, 1> Locations;

  for (const auto &[_, Args] : Arguments) {

    auto IntersectedRanges =

        getIntersectedInitRangeList(Args, CB->doesNotThrow());

    if (IntersectedRanges.empty())

      continue;


    for (const auto &Arg : Args) {

      for (const auto &Range : IntersectedRanges) {

        int64_t Start = Range.getLower().getSExtValue();

        int64_t End = Range.getUpper().getSExtValue();

        // For now, we only handle locations starting at offset 0.

        if (Start == 0)

          Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),

                                             LocationSize::precise(End - Start),

                                             CB->getAAMetadata()));

      }

    }

  }

  return Locations;

}


std::pair<bool, bool>

DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {

  bool Changed = false;

  bool DeletedKillingLoc = false;

  unsigned ScanLimit = MemorySSAScanLimit;

  unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;

  unsigned PartialLimit = MemorySSAPartialStoreLimit;

  // Worklist of MemoryAccesses that may be killed by

  // "KillingLocWrapper.MemDef".

  SmallSetVector<MemoryAccess *, 8> ToCheck;

  // Track MemoryAccesses that have been deleted in the loop below, so we can

  // skip them. Don't use SkipStores for this, which may contain reused

  // MemoryAccess addresses.

  SmallPtrSet<MemoryAccess *, 8> Deleted;

  [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();

  ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());


  // Check if MemoryAccesses in the worklist are killed by

  // "KillingLocWrapper.MemDef".

  for (unsigned I = 0; I < ToCheck.size(); I++) {

    MemoryAccess *Current = ToCheck[I];

    if (Deleted.contains(Current))

      continue;

    std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(

        KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,

        KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,

        isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,

        KillingLocWrapper.DefByInitializesAttr);


    if (!MaybeDeadAccess) {

      LLVM_DEBUG(dbgs() << "  finished walk\n");

      continue;

    }

    MemoryAccess *DeadAccess = *MaybeDeadAccess;

    LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);

    if (isa<MemoryPhi>(DeadAccess)) {

      LLVM_DEBUG(dbgs() << "\n  ... adding incoming values to worklist\n");

      for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {

        MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);

        BasicBlock *IncomingBlock = IncomingAccess->getBlock();

        BasicBlock *PhiBlock = DeadAccess->getBlock();


        // We only consider incoming MemoryAccesses that come before the

        // MemoryPhi. Otherwise we could discover candidates that do not

        // strictly dominate our starting def.

        if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])

          ToCheck.insert(IncomingAccess);

      }

      continue;

    }

    // We cannot apply the initializes attribute to DeadAccess/DeadDef.

    // It would incorrectly consider a call instruction as redundant store

    // and remove this call instruction.

    // TODO: this conflates the existence of a MemoryLocation with being able

    // to delete the instruction. Fix isRemovable() to consider calls with

    // side effects that cannot be removed, e.g. calls with the initializes

    // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).

    MemoryDefWrapper DeadDefWrapper(

        cast<MemoryDef>(DeadAccess),

        getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),

                      /*ConsiderInitializesAttr=*/false));

    assert(DeadDefWrapper.DefinedLocations.size() == 1);

    MemoryLocationWrapper &DeadLocWrapper =

        DeadDefWrapper.DefinedLocations.front();

    LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");

    ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());

    NumGetDomMemoryDefPassed++;


    if (!DebugCounter::shouldExecute(MemorySSACounter))

      continue;

    if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {

      if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)

        continue;

      LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "

                        << *DeadLocWrapper.DefInst << "\n  KILLER: "

                        << *KillingLocWrapper.DefInst << '\n');

      deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);

      ++NumFastStores;

      Changed = true;

    } else {

      // Check if DeadI overwrites KillingI.

      int64_t KillingOffset = 0;

      int64_t DeadOffset = 0;

      OverwriteResult OR =

          isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,

                      KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,

                      KillingOffset, DeadOffset);

      if (OR == OW_MaybePartial) {

        auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];

        OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,

                                KillingOffset, DeadOffset,

                                DeadLocWrapper.DefInst, IOL);

      }

      if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {

        auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);

        auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);

        // We are re-using tryToMergePartialOverlappingStores, which requires

        // DeadSI to dominate KillingSI.

        // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.

        if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {

          if (Constant *Merged = tryToMergePartialOverlappingStores(

                  KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,

                  &DT)) {


            // Update stored value of earlier store to merged constant.

            DeadSI->setOperand(0, Merged);

            ++NumModifiedStores;

            Changed = true;

            DeletedKillingLoc = true;


            // Remove killing store and remove any outstanding overlap

            // intervals for the updated store.

            deleteDeadInstruction(KillingSI, &Deleted);

            auto I = IOLs.find(DeadSI->getParent());

            if (I != IOLs.end())

              I->second.erase(DeadSI);

            break;

          }

        }

      }

      if (OR == OW_Complete) {

        LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "

                          << *DeadLocWrapper.DefInst << "\n  KILLER: "

                          << *KillingLocWrapper.DefInst << '\n');

        deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);

        ++NumFastStores;

        Changed = true;

      }

    }

  }


  assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&

         "SkipStores and Deleted out of sync?");


  return {Changed, DeletedKillingLoc};

}


bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {

  if (KillingDefWrapper.DefinedLocations.empty()) {

    LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "

                      << *KillingDefWrapper.DefInst << "\n");

    return false;

  }


  bool MadeChange = false;

  for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {

    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "

                      << *KillingLocWrapper.MemDef << " ("

                      << *KillingLocWrapper.DefInst << ")\n");

    auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);

    MadeChange |= Changed;


    // Check if the store is a no-op.

    if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,

                                          KillingLocWrapper.UnderlyingObject)) {

      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: "

                        << *KillingLocWrapper.DefInst << '\n');

      deleteDeadInstruction(KillingLocWrapper.DefInst);

      NumRedundantStores++;

      MadeChange = true;

      continue;

    }

    // Can we form a calloc from a memset/malloc pair?

    if (!DeletedKillingLoc &&

        tryFoldIntoCalloc(KillingLocWrapper.MemDef,

                          KillingLocWrapper.UnderlyingObject)) {

      LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"

                        << "  DEAD: " << *KillingLocWrapper.DefInst << '\n');

      deleteDeadInstruction(KillingLocWrapper.DefInst);

      MadeChange = true;

      continue;

    }

  }

  return MadeChange;

}


static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,

                                DominatorTree &DT, PostDominatorTree &PDT,

                                const TargetLibraryInfo &TLI,

                                const LoopInfo &LI) {

  bool MadeChange = false;

  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);

  // For each store:

  for (unsigned I = 0; I < State.MemDefs.size(); I++) {

    MemoryDef *KillingDef = State.MemDefs[I];

    if (State.SkipStores.count(KillingDef))

      continue;


    MemoryDefWrapper KillingDefWrapper(

        KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),

                                        EnableInitializesImprovement));

    MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);

  }


  if (EnablePartialOverwriteTracking)

    for (auto &KV : State.IOLs)

      MadeChange |= State.removePartiallyOverlappedStores(KV.second);


  MadeChange |= State.eliminateRedundantStoresOfExistingValues();

  MadeChange |= State.eliminateDeadWritesAtEndOfFunction();


  while (!State.ToRemove.empty()) {

    Instruction *DeadInst = State.ToRemove.pop_back_val();

    DeadInst->eraseFromParent();

  }


  return MadeChange;

}


//===----------------------------------------------------------------------===//

// DSE Pass

//===----------------------------------------------------------------------===//


PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {

  AliasAnalysis &AA = AM.getResult<AAManager>(F);

  const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);

  DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);

  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();

  PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);

  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);


  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);


#ifdef LLVM_ENABLE_STATS

  if (AreStatisticsEnabled())

    for (auto &I : instructions(F))

      NumRemainingStores += isa<StoreInst>(&I);

#endif


  if (!Changed)

    return PreservedAnalyses::all();


  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  PA.preserve<MemorySSAAnalysis>();

  PA.preserve<LoopAnalysis>();

  return PA;

}


namespace {


/// A legacy pass for the legacy pass manager that wraps \c DSEPass.

class DSELegacyPass : public FunctionPass {

public:

  static char ID; // Pass identification, replacement for typeid


  DSELegacyPass() : FunctionPass(ID) {

    initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());

  }


  bool runOnFunction(Function &F) override {

    if (skipFunction(F))

      return false;


    AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

    DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();

    const TargetLibraryInfo &TLI =

        getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);

    MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();

    PostDominatorTree &PDT =

        getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();

    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();


    bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);


#ifdef LLVM_ENABLE_STATS

    if (AreStatisticsEnabled())

      for (auto &I : instructions(F))

        NumRemainingStores += isa<StoreInst>(&I);

#endif


    return Changed;

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    AU.addRequired<AAResultsWrapperPass>();

    AU.addRequired<TargetLibraryInfoWrapperPass>();

    AU.addPreserved<GlobalsAAWrapperPass>();

    AU.addRequired<DominatorTreeWrapperPass>();

    AU.addPreserved<DominatorTreeWrapperPass>();

    AU.addRequired<PostDominatorTreeWrapperPass>();

    AU.addRequired<MemorySSAWrapperPass>();

    AU.addPreserved<PostDominatorTreeWrapperPass>();

    AU.addPreserved<MemorySSAWrapperPass>();

    AU.addRequired<LoopInfoWrapperPass>();

    AU.addPreserved<LoopInfoWrapperPass>();

    AU.addRequired<AssumptionCacheTracker>();

  }

};


} // end anonymous namespace


char DSELegacyPass::ID = 0;


INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,

                      false)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)

INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,

                    false)


LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {

  return new DSELegacyPass();

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Arguments
AMDGPU Lower Kernel Arguments
Definition AMDGPULowerKernelArguments.cpp:253

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

AliasAnalysis.h

AssumeBundleBuilder.h

AssumptionCache.h

instructions
Expand Atomic instructions
Definition AtomicExpandPass.cpp:183

AttributeMask.h

BuildLibCalls.h

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CaptureTracking.h

Casting.h

CommandLine.h

LLVM_ABI
#define LLVM_ABI
Definition Compiler.h:213

ConstantRangeList.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

DataLayout.h

InstOverlapIntervalsTy
MapVector< Instruction *, OverlapIntervalsTy > InstOverlapIntervalsTy
Definition DeadStoreElimination.cpp:180

canSkipDef
static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller)
Definition DeadStoreElimination.cpp:831

EnableInitializesImprovement
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))

shortenAssignment
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
Definition DeadStoreElimination.cpp:508

isShortenableAtTheEnd
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
Definition DeadStoreElimination.cpp:184

isNoopIntrinsic
static bool isNoopIntrinsic(Instruction *I)
Definition DeadStoreElimination.cpp:810

getIntersectedInitRangeList
static ConstantRangeList getIntersectedInitRangeList(ArrayRef< ArgumentInitInfo > Args, bool CallHasNoUnwindAttr)
Definition DeadStoreElimination.cpp:911

EnablePartialStoreMerging
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))

tryToShortenBegin
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition DeadStoreElimination.cpp:734

OverlapIntervalsTy
std::map< int64_t, int64_t > OverlapIntervalsTy
Definition DeadStoreElimination.cpp:179

isShortenableAtTheBeginning
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
Definition DeadStoreElimination.cpp:209

MemorySSADefsPerBlockLimit
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))

tryToMergePartialOverlappingStores
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
Definition DeadStoreElimination.cpp:764

memoryIsNotModifiedBetween
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
Definition DeadStoreElimination.cpp:421

isMaskedStoreOverwrite
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
Definition DeadStoreElimination.cpp:245

MemorySSAOtherBBStepCost
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))

tryToShorten
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
Definition DeadStoreElimination.cpp:613

MemorySSAScanLimit
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))

isFuncLocalAndNotCaptured
static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, EarliestEscapeAnalysis &EA)
Definition DeadStoreElimination.cpp:2340

MemorySSASameBBStepCost
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))

EnablePartialOverwriteTracking
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))

isPartialOverwrite
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
Definition DeadStoreElimination.cpp:303

MemorySSAPartialStoreLimit
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))

getPointerSize
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
Definition DeadStoreElimination.cpp:215

tryToShortenEnd
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition DeadStoreElimination.cpp:707

adjustArgAttributes
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, uint64_t PtrOffset)
Update the attributes given that a memory access is updated (the dereferenced pointer could be moved ...
Definition DeadStoreElimination.cpp:579

MemorySSAUpwardsStepLimit
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))

OptimizeMemorySSA
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))

hasInitializesAttr
static bool hasInitializesAttr(Instruction *I)
Definition DeadStoreElimination.cpp:901

MemorySSAPathCheckLimit
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))

eliminateDeadStores
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI, const LoopInfo &LI)
Definition DeadStoreElimination.cpp:2605

DeadStoreElimination.h

DebugCounter.h
This file provides an implementation of debug counters.

DEBUG_COUNTER
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition DebugCounter.h:197

DenseMap.h
This file defines the DenseMap class.

Dominators.h

MemorySSA
early cse Early CSE w MemorySSA
Definition EarlyCSE.cpp:1950

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:103

GlobalsModRef.h
This is the interface for a simple mod/ref and alias analysis over globals.

GEP
Hexagon Common GEP
Definition HexagonCommonGEP.cpp:164

_
#define _
Definition HexagonMCCodeEmitter.cpp:46

IRBuilder.h

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Argument.h

BasicBlock.h

Constant.h

Function.h

Instruction.h

IntrinsicInst.h

Module.h
Module.h This file contains the declarations for the Module class.

PassManager.h
This header defines various interfaces for pass management in LLVM.

Value.h

InitializePasses.h

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

InstIterator.h

InstrTypes.h

Instructions.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

LoopDeletionResult::Deleted
@ Deleted
Definition LoopDeletion.cpp:48

deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition LoopIdiomRecognize.cpp:313

LoopInfo.h

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MapVector.h
This file implements a map that provides insertion order iteration.

MallocFamily::Malloc
@ Malloc
Definition MemoryBuiltins.cpp:69

MemoryBuiltins.h

MemoryLocation.h
This file provides utility analysis objects describing memory locations.

MemorySSAUpdater.h

MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

MustExecute.h
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

PatternMatch.h

PostDominators.h

PostOrderIterator.h
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

Scalar.h

SetVector.h
This file implements a set that has insertion order iteration characteristics.

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

StringRef.h

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

Ptr
@ Ptr
Definition TargetLibraryInfo.cpp:77

TargetLibraryInfo.h

Local.h

ValueTracking.h

NewExpr
Definition ItaniumDemangle.h:2119

llvm::AAManager
A manager for alias analyses.
Definition AliasAnalysis.h:974

llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition AliasAnalysis.h:1022

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::zext
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012

llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562

llvm::AliasResult::NoAlias
@ NoAlias
The two locations do not alias at all.
Definition AliasAnalysis.h:96

llvm::AliasResult::PartialAlias
@ PartialAlias
The two locations alias, but only due to a partial overlap.
Definition AliasAnalysis.h:101

llvm::AliasResult::MustAlias
@ MustAlias
The two locations precisely alias each other.
Definition AliasAnalysis.h:103

llvm::AliasResult::getOffset
constexpr int32_t getOffset() const
Definition AliasAnalysis.h:124

llvm::AliasResult::hasOffset
constexpr bool hasOffset() const
Definition AliasAnalysis.h:123

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:412

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::AnyMemIntrinsic
Definition IntrinsicInst.h:1260

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition AssumptionCache.h:211

llvm::AttributeMask
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
Definition AttributeMask.h:29

llvm::AttributeMask::addAttribute
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Definition AttributeMask.h:44

llvm::AttributeSet
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361

llvm::Attribute::getValueAsConstantRangeList
LLVM_ABI ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
Definition Attributes.cpp:420

llvm::Attribute::getValueAsString
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition Attributes.cpp:400

llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:223

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233

llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition AliasAnalysis.h:657

llvm::BatchAAResults::alias
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition AliasAnalysis.h:667

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition InstrTypes.h:1114

llvm::CallBase::paramHasAttr
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition Instructions.cpp:418

llvm::CallBase::getParamAttr
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
Definition InstrTypes.h:1646

llvm::CallBase::isByValArgument
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition InstrTypes.h:1707

llvm::CallBase::onlyAccessesInaccessibleMemOrArgMem
LLVM_ABI bool onlyAccessesInaccessibleMemOrArgMem() const
Determine if the function may only access memory that is either inaccessible from the IR or pointed t...
Definition Instructions.cpp:702

llvm::CallBase::doesNotThrow
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition InstrTypes.h:1953

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition InstrTypes.h:1290

llvm::CallBase::getArgOperandWithAttribute
LLVM_ABI Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
Definition Instructions.cpp:405

llvm::CallBase::arg_size
unsigned arg_size() const
Definition InstrTypes.h:1288

llvm::ConstantRangeList
This class represents a list of constant ranges.
Definition ConstantRangeList.h:31

llvm::ConstantRangeList::empty
bool empty() const
Return true if this list contains no members.
Definition ConstantRangeList.h:61

llvm::ConstantRangeList::intersectWith
LLVM_ABI ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
Definition ConstantRangeList.cpp:197

llvm::ConstantRange::getLower
const APInt & getLower() const
Return the lower value for this range.
Definition ConstantRange.h:209

llvm::ConstantRange::getUpper
const APInt & getUpper() const
Return the upper value for this range.
Definition ConstantRange.h:212

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::isNullValue
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90

llvm::DIAssignID
Assignment ID.
Definition DebugInfoMetadata.h:362

llvm::DIAssignID::getDistinct
static DIAssignID * getDistinct(LLVMContext &Context)
Definition DebugInfoMetadata.h:384

llvm::DIExpression::FragmentInfo
DbgVariableFragmentInfo FragmentInfo
Definition DebugInfoMetadata.h:3519

llvm::DIExpression::createFragmentExpression
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
Definition DebugInfoMetadata.cpp:2273

llvm::DSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition DeadStoreElimination.cpp:2641

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63

llvm::DbgVariableRecord
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Definition DebugProgramInstruction.h:277

llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition DebugCounter.h:88

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222

llvm::DenseMap
Definition DenseMap.h:701

llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition GenericDomTree.h:90

llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition GenericDomTree.h:89

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165

llvm::DominatorTree::dominates
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition Dominators.cpp:135

llvm::EarliestEscapeAnalysis
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
Definition AliasAnalysis.h:175

llvm::EarliestEscapeAnalysis::getCapturesBefore
CaptureComponents getCapturesBefore(const Value *Object, const Instruction *I, bool OrAt) override
Return how Object may be captured before instruction I, considering only provenance captures.
Definition BasicAliasAnalysis.cpp:225

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition Function.h:807

llvm::GetElementPtrInst::CreateInBounds
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition Instructions.h:998

llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition GlobalsModRef.h:146

llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition Instructions.h:1323

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::mayThrow
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
Definition Instruction.cpp:1194

llvm::Instruction::mayWriteToMemory
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
Definition Instruction.cpp:1047

llvm::Instruction::isAtomic
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition Instruction.cpp:1067

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:108

llvm::Instruction::isIdenticalToWhenDefined
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
Definition Instruction.cpp:956

llvm::Instruction::mayReadFromMemory
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
Definition Instruction.cpp:1027

llvm::Instruction::getAAMetadata
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition Metadata.cpp:1817

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition Instruction.h:510

llvm::Instruction::getDataLayout
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition Instruction.cpp:90

llvm::IntervalMap
Definition IntervalMap.h:937

llvm::IntervalMap::begin
const_iterator begin() const
Definition IntervalMap.h:1147

llvm::IntervalMap::empty
bool empty() const
empty - Return true when no intervals are mapped.
Definition IntervalMap.h:1102

llvm::IntervalMap::end
const_iterator end() const
Definition IntervalMap.h:1159

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition MemoryLocation.h:95

llvm::LocationSize::isScalable
bool isScalable() const
Definition MemoryLocation.h:156

llvm::LocationSize::getValue
TypeSize getValue() const
Definition MemoryLocation.h:158

llvm::LocationSize::isPrecise
bool isPrecise() const
Definition MemoryLocation.h:167

llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569

llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596

llvm::LoopInfo
Definition LoopInfo.h:408

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36

llvm::MapVector::end
iterator end()
Definition MapVector.h:67

llvm::MapVector::find
iterator find(const KeyT &Key)
Definition MapVector.h:149

llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition IntrinsicInst.h:1004

llvm::MemSetBase::getValue
Value * getValue() const
Definition IntrinsicInst.h:1114

llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition MemorySSA.h:162

llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371

llvm::MemoryDef::setOptimized
void setOptimized(MemoryAccess *MA)
Definition MemorySSA.h:392

llvm::MemoryDependenceWrapperPass
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Definition MemoryDependenceAnalysis.h:532

llvm::MemoryLocation
Representation for a specific memory location.
Definition MemoryLocation.h:217

llvm::MemoryLocation::get
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition MemoryLocation.cpp:36

llvm::MemoryLocation::Size
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
Definition MemoryLocation.h:234

llvm::MemoryLocation::getBeforeOrAfter
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Definition MemoryLocation.h:285

llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition MemoryLocation.h:277

llvm::MemoryLocation::getWithNewPtr
MemoryLocation getWithNewPtr(const Value *NewPtr) const
Definition MemoryLocation.h:301

llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition MemoryLocation.h:225

llvm::MemoryLocation::getForDest
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition MemoryLocation.cpp:104

llvm::MemoryLocation::getOrNone
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition MemoryLocation.cpp:78

llvm::MemoryLocation::getForArgument
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition MemoryLocation.cpp:181

llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936

llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:993

llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702

llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260

llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257

llvm::PHITransAddr
PHITransAddr - An address value which tracks and handles phi translation.
Definition PHITransAddr.h:36

llvm::PHITransAddr::translateValue
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Definition PHITransAddr.cpp:302

llvm::PHITransAddr::isPotentiallyPHITranslatable
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Definition PHITransAddr.cpp:104

llvm::PHITransAddr::needsPHITranslationFromBlock
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition PHITransAddr.h:63

llvm::PHITransAddr::getAddr
Value * getAddr() const
Definition PHITransAddr.h:59

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition PassRegistry.cpp:24

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:1888

llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition PostDominators.h:49

llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition PostDominators.h:29

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132

llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102

llvm::SetVector::insert_range
void insert_range(Range &&R)
Definition SetVector.h:175

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150

llvm::SmallPtrSetImplBase::size
size_type size() const
Definition SmallPtrSet.h:99

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition SmallPtrSet.h:98

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition SmallPtrSet.h:455

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:389

llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition SmallPtrSet.h:478

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:673

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:416

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1202

llvm::StoreInst
An instruction for storing to memory.
Definition Instructions.h:297

llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition Instructions.h:384

llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition TargetLibraryInfo.h:625

llvm::TargetLibraryInfoWrapperPass
Definition TargetLibraryInfo.h:650

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:285

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267

llvm::Type::getInt8Ty
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139

llvm::User::operands
op_range operands()
Definition User.h:292

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::stripPointerCasts
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701

llvm::Value::getContext
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099

llvm::Value::uses
iterator_range< use_iterator > uses()
Definition Value.h:380

llvm::cl::opt
Definition CommandLine.h:1455

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

uint32_t

uint64_t

Changed
Changed
Definition ObjCARCOpts.cpp:2369

DebugInfo.h

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:244

llvm::AA
Abstract Attribute helper functions.
Definition Attributor.h:165

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::AMDGPU::HSAMD::Kernel::Key::Attrs
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
Definition AMDGPUMetadata.h:394

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::ISD::OR
@ OR
Definition ISDOpcodes.h:737

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::ISD::Constant
@ Constant
Definition ISDOpcodes.h:86

llvm::Intrinsic
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
Definition GenericSSAContext.h:27

llvm::M68k::MemAddrModeKind::U
@ U
Definition M68kBaseInfo.h:61

llvm::M68k::MemAddrModeKind::V
@ V
Definition M68kBaseInfo.h:63

llvm::PatternMatch
Definition PatternMatch.h:47

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:49

llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition PatternMatch.h:854

llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition PatternMatch.h:954

llvm::PatternMatch::m_c_ICmp
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
Definition PatternMatch.h:2942

llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition PatternMatch.h:258

llvm::PatternMatch::m_SpecificICmp
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
Definition PatternMatch.h:1809

llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition PatternMatch.h:2044

llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition PatternMatch.h:2368

llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition PatternMatch.h:604

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:463

llvm::RISCVFenceField::O
@ O
Definition RISCVBaseInfo.h:462

llvm::SIEncodingFamily::SI
@ SI
Definition SIDefines.h:36

llvm::SPII::Store
@ Store
Definition SparcInstrInfo.h:33

llvm::at::getDVRAssignmentMarkers
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:201

llvm::at::calculateFragmentIntersect
LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgVariableRecord *DVRAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
Definition DebugInfo.cpp:1993

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:409

llvm::logicalview::LVWarningKind::Locations
@ Locations
Definition LVOptions.h:178

llvm::lsp::TraceLevel::Off
@ Off
Definition Protocol.h:198

llvm::memprof::Meta::Start
@ Start
Definition MemProf.h:69

llvm::omp::RTLDependInfoFields::Len
@ Len
Definition OMPConstants.h:276

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384

llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316

llvm::initializeDSELegacyPassPass
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &)

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::getInitialValueOfAllocation
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
Definition MemoryBuiltins.cpp:428

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition AtomicOrdering.h:125

llvm::Uninitialized
@ Uninitialized
Definition Threading.h:60

llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134

llvm::AllocFnKind
AllocFnKind
Definition Attributes.h:51

llvm::LibFunc
LibFunc
Definition TargetLibraryInfo.h:72

llvm::NotLibFunc
@ NotLibFunc
Definition TargetLibraryInfo.h:77

llvm::salvageDebugInfo
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1724

llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition ValueTracking.h:344

llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition PostOrderIterator.h:197

llvm::isNoAliasCall
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
Definition AliasAnalysis.cpp:825

llvm::DomTreeNode
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732

llvm::isInstructionTriviallyDead
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402

llvm::getObjectSize
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition MemoryBuiltins.cpp:581

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406

llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49

llvm::NullPointerIsDefined
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition Function.cpp:1172

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::AreStatisticsEnabled
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition Statistic.cpp:139

llvm::isNotVisibleOnUnwind
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition AliasAnalysis.cpp:900

llvm::emitCalloc
LLVM_ABI Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
Definition BuildLibCalls.cpp:2089

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1128

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::offsetToAlignment
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition Alignment.h:186

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::salvageKnowledge
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition AssumeBundleBuilder.cpp:292

llvm::PointerMayBeCaptured
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
Definition CaptureTracking.cpp:213

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::getFreedOperand
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Definition MemoryBuiltins.cpp:548

llvm::isIdentifiedFunctionLocal
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
Definition AliasAnalysis.cpp:849

llvm::HighlightColor::Attribute
@ Attribute
Definition WithColor.h:31

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::createDeadStoreEliminationPass
LLVM_ABI FunctionPass * createDeadStoreEliminationPass()
Definition DeadStoreElimination.cpp:2737

llvm::isBytewiseValue
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition ValueTracking.cpp:6125

llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition MachineBasicBlock.h:1438

llvm::capturesAnything
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:319

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:564

llvm::mayContainIrreducibleControl
LLVM_ABI bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
Definition MustExecute.cpp:392

llvm::getUnderlyingObject
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Definition ValueTracking.cpp:6683

llvm::AliasAnalysis
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
Definition AliasAnalysis.h:721

llvm::capturesNothing
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315

llvm::isIdentifiedObject
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition AliasAnalysis.cpp:837

llvm::isStrongerThan
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
Definition AtomicOrdering.h:91

llvm::isRefSet
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52

raw_ostream.h

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::Align::value
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77

llvm::ObjectSizeOpts
Various options to control the behavior of getObjectSize.
Definition MemoryBuiltins.h:143

llvm::ObjectSizeOpts::NullIsUnknownSize
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
Definition MemoryBuiltins.h:167

llvm::PostDominatorTreeWrapperPass
Definition PostDominators.h:76

llvm::cl::desc
Definition CommandLine.h:411