doxygen/DeadStoreElimination_8cpp_source.html

//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// The code below implements dead store elimination using MemorySSA. It uses

// the following general approach: given a MemoryDef, walk upwards to find

// clobbering MemoryDefs that may be killed by the starting def. Then check

// that there are no uses that may read the location of the original MemoryDef

// in between both MemoryDefs. A bit more concretely:

//

// For all MemoryDefs StartDef:

// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking

//    upwards.

// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by

//    checking all uses starting at MaybeDeadAccess and walking until we see

//    StartDef.

// 3. For each found CurrentDef, check that:

//   1. There are no barrier instructions between CurrentDef and StartDef (like

//       throws or stores with ordering constraints).

//   2. StartDef is executed whenever CurrentDef is executed.

//   3. StartDef completely overwrites CurrentDef.

// 4. Erase CurrentDef from the function and MemorySSA.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/DeadStoreElimination.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/PostOrderIterator.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/CaptureTracking.h"

#include "llvm/Analysis/GlobalsModRef.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/MemoryBuiltins.h"

#include "llvm/Analysis/MemoryLocation.h"

#include "llvm/Analysis/MemorySSA.h"

#include "llvm/Analysis/MemorySSAUpdater.h"

#include "llvm/Analysis/MustExecute.h"

#include "llvm/Analysis/PostDominators.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/Argument.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DebugInfo.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstIterator.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/PassManager.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/DebugCounter.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"

#include "llvm/Transforms/Utils/BuildLibCalls.h"

#include "llvm/Transforms/Utils/Local.h"

#include <algorithm>

#include <cassert>

#include <cstdint>

#include <iterator>

#include <map>

#include <optional>

#include <utility>


using namespace llvm;

using namespace PatternMatch;


#define DEBUG_TYPE "dse"


STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");

STATISTIC(NumRedundantStores, "Number of redundant stores deleted");

STATISTIC(NumFastStores, "Number of stores deleted");

STATISTIC(NumFastOther, "Number of other instrs removed");

STATISTIC(NumCompletePartials, "Number of stores dead by later partials");

STATISTIC(NumModifiedStores, "Number of stores modified");

STATISTIC(NumCFGChecks, "Number of stores modified");

STATISTIC(NumCFGTries, "Number of stores modified");

STATISTIC(NumCFGSuccess, "Number of stores modified");

STATISTIC(NumGetDomMemoryDefPassed,

          "Number of times a valid candidate is returned from getDomMemoryDef");

STATISTIC(NumDomMemDefChecks,

          "Number iterations check for reads in getDomMemoryDef");


DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",

              "Controls which MemoryDefs are eliminated.");


static cl::opt<bool>

EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",

  cl::init(true), cl::Hidden,

  cl::desc("Enable partial-overwrite tracking in DSE"));


static cl::opt<bool>

EnablePartialStoreMerging("enable-dse-partial-store-merging",

  cl::init(true), cl::Hidden,

  cl::desc("Enable partial store merging in DSE"));


static cl::opt<unsigned>

    MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,

                       cl::desc("The number of memory instructions to scan for "

                                "dead store elimination (default = 150)"));

static cl::opt<unsigned> MemorySSAUpwardsStepLimit(

    "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,

    cl::desc("The maximum number of steps while walking upwards to find "

             "MemoryDefs that may be killed (default = 90)"));


static cl::opt<unsigned> MemorySSAPartialStoreLimit(

    "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,

    cl::desc("The maximum number candidates that only partially overwrite the "

             "killing MemoryDef to consider"

             " (default = 5)"));


static cl::opt<unsigned> MemorySSADefsPerBlockLimit(

    "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,

    cl::desc("The number of MemoryDefs we consider as candidates to eliminated "

             "other stores per basic block (default = 5000)"));


static cl::opt<unsigned> MemorySSASameBBStepCost(

    "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,

    cl::desc(

        "The cost of a step in the same basic block as the killing MemoryDef"

        "(default = 1)"));


static cl::opt<unsigned>

    MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),

                             cl::Hidden,

                             cl::desc("The cost of a step in a different basic "

                                      "block than the killing MemoryDef"

                                      "(default = 5)"));


static cl::opt<unsigned> MemorySSAPathCheckLimit(

    "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,

    cl::desc("The maximum number of blocks to check when trying to prove that "

             "all paths to an exit go through a killing block (default = 50)"));


// This flags allows or disallows DSE to optimize MemorySSA during its

// traversal. Note that DSE optimizing MemorySSA may impact other passes

// downstream of the DSE invocation and can lead to issues not being

// reproducible in isolation (i.e. when MemorySSA is built from scratch). In

// those cases, the flag can be used to check if DSE's MemorySSA optimizations

// impact follow-up passes.

static cl::opt<bool>

    OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,

                      cl::desc("Allow DSE to optimize memory accesses."));


//===----------------------------------------------------------------------===//

// Helper functions

//===----------------------------------------------------------------------===//

using OverlapIntervalsTy = std::map<int64_t, int64_t>;

using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;


/// Returns true if the end of this instruction can be safely shortened in

/// length.

static bool isShortenableAtTheEnd(Instruction *I) {

  // Don't shorten stores for now

  if (isa<StoreInst>(I))

    return false;


  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {

    switch (II->getIntrinsicID()) {

      default: return false;

      case Intrinsic::memset:

      case Intrinsic::memcpy:

      case Intrinsic::memcpy_element_unordered_atomic:

      case Intrinsic::memset_element_unordered_atomic:

        // Do shorten memory intrinsics.

        // FIXME: Add memmove if it's also safe to transform.

        return true;

    }

  }


  // Don't shorten libcalls calls for now.


  return false;

}


/// Returns true if the beginning of this instruction can be safely shortened

/// in length.

static bool isShortenableAtTheBeginning(Instruction *I) {

  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be

  // easily done by offsetting the source address.

  return isa<AnyMemSetInst>(I);

}


static std::optional<TypeSize> getPointerSize(const Value *V,

                                              const DataLayout &DL,

                                              const TargetLibraryInfo &TLI,

                                              const Function *F) {

  uint64_t Size;

  ObjectSizeOpts Opts;

  Opts.NullIsUnknownSize = NullPointerIsDefined(F);


  if (getObjectSize(V, Size, DL, &TLI, Opts))

    return TypeSize::getFixed(Size);

  return std::nullopt;

}


namespace {


enum OverwriteResult {

  OW_Begin,

  OW_Complete,

  OW_End,

  OW_PartialEarlierWithFullLater,

  OW_MaybePartial,

  OW_None,

  OW_Unknown

};


} // end anonymous namespace


/// Check if two instruction are masked stores that completely

/// overwrite one another. More specifically, \p KillingI has to

/// overwrite \p DeadI.

static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,

                                              const Instruction *DeadI,

                                              BatchAAResults &AA) {

  const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);

  const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);

  if (KillingII == nullptr || DeadII == nullptr)

    return OW_Unknown;

  if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())

    return OW_Unknown;

  if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {

    // Type size.

    VectorType *KillingTy =

        cast<VectorType>(KillingII->getArgOperand(0)->getType());

    VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());

    if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())

      return OW_Unknown;

    // Element count.

    if (KillingTy->getElementCount() != DeadTy->getElementCount())

      return OW_Unknown;

    // Pointers.

    Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();

    Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();

    if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))

      return OW_Unknown;

    // Masks.

    // TODO: check that KillingII's mask is a superset of the DeadII's mask.

    if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))

      return OW_Unknown;

    return OW_Complete;

  }

  return OW_Unknown;

}


/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely

/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the

/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'

/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.

/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was

/// overwritten by a killing (smaller) store which doesn't write outside the big

/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.

/// NOTE: This function must only be called if both \p KillingLoc and \p

/// DeadLoc belong to the same underlying object with valid \p KillingOff and

/// \p DeadOff.

static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,

                                          const MemoryLocation &DeadLoc,

                                          int64_t KillingOff, int64_t DeadOff,

                                          Instruction *DeadI,

                                          InstOverlapIntervalsTy &IOL) {

  const uint64_t KillingSize = KillingLoc.Size.getValue();

  const uint64_t DeadSize = DeadLoc.Size.getValue();

  // We may now overlap, although the overlap is not complete. There might also

  // be other incomplete overlaps, and together, they might cover the complete

  // dead store.

  // Note: The correctness of this logic depends on the fact that this function

  // is not even called providing DepWrite when there are any intervening reads.

  if (EnablePartialOverwriteTracking &&

      KillingOff < int64_t(DeadOff + DeadSize) &&

      int64_t(KillingOff + KillingSize) >= DeadOff) {


    // Insert our part of the overlap into the map.

    auto &IM = IOL[DeadI];

    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "

                      << int64_t(DeadOff + DeadSize) << ") KillingLoc ["

                      << KillingOff << ", " << int64_t(KillingOff + KillingSize)

                      << ")\n");


    // Make sure that we only insert non-overlapping intervals and combine

    // adjacent intervals. The intervals are stored in the map with the ending

    // offset as the key (in the half-open sense) and the starting offset as

    // the value.

    int64_t KillingIntStart = KillingOff;

    int64_t KillingIntEnd = KillingOff + KillingSize;


    // Find any intervals ending at, or after, KillingIntStart which start

    // before KillingIntEnd.

    auto ILI = IM.lower_bound(KillingIntStart);

    if (ILI != IM.end() && ILI->second <= KillingIntEnd) {

      // This existing interval is overlapped with the current store somewhere

      // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing

      // intervals and adjusting our start and end.

      KillingIntStart = std::min(KillingIntStart, ILI->second);

      KillingIntEnd = std::max(KillingIntEnd, ILI->first);

      ILI = IM.erase(ILI);


      // Continue erasing and adjusting our end in case other previous

      // intervals are also overlapped with the current store.

      //

      // |--- dead 1 ---|  |--- dead 2 ---|

      //     |------- killing---------|

      //

      while (ILI != IM.end() && ILI->second <= KillingIntEnd) {

        assert(ILI->second > KillingIntStart && "Unexpected interval");

        KillingIntEnd = std::max(KillingIntEnd, ILI->first);

        ILI = IM.erase(ILI);

      }

    }


    IM[KillingIntEnd] = KillingIntStart;


    ILI = IM.begin();

    if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {

      LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["

                        << DeadOff << ", " << int64_t(DeadOff + DeadSize)

                        << ") Composite KillingLoc [" << ILI->second << ", "

                        << ILI->first << ")\n");

      ++NumCompletePartials;

      return OW_Complete;

    }

  }


  // Check for a dead store which writes to all the memory locations that

  // the killing store writes to.

  if (EnablePartialStoreMerging && KillingOff >= DeadOff &&

      int64_t(DeadOff + DeadSize) > KillingOff &&

      uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {

    LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff

                      << ", " << int64_t(DeadOff + DeadSize)

                      << ") by a killing store [" << KillingOff << ", "

                      << int64_t(KillingOff + KillingSize) << ")\n");

    // TODO: Maybe come up with a better name?

    return OW_PartialEarlierWithFullLater;

  }


  // Another interesting case is if the killing store overwrites the end of the

  // dead store.

  //

  //      |--dead--|

  //                |--   killing   --|

  //

  // In this case we may want to trim the size of dead store to avoid

  // generating stores to addresses which will definitely be overwritten killing

  // store.

  if (!EnablePartialOverwriteTracking &&

      (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&

       int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))

    return OW_End;


  // Finally, we also need to check if the killing store overwrites the

  // beginning of the dead store.

  //

  //                |--dead--|

  //      |--  killing  --|

  //

  // In this case we may want to move the destination address and trim the size

  // of dead store to avoid generating stores to addresses which will definitely

  // be overwritten killing store.

  if (!EnablePartialOverwriteTracking &&

      (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {

    assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&

           "Expect to be handled as OW_Complete");

    return OW_Begin;

  }

  // Otherwise, they don't completely overlap.

  return OW_Unknown;

}


/// Returns true if the memory which is accessed by the second instruction is not

/// modified between the first and the second instruction.

/// Precondition: Second instruction must be dominated by the first

/// instruction.

static bool

memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,

                           BatchAAResults &AA, const DataLayout &DL,

                           DominatorTree *DT) {

  // Do a backwards scan through the CFG from SecondI to FirstI. Look for

  // instructions which can modify the memory location accessed by SecondI.

  //

  // While doing the walk keep track of the address to check. It might be

  // different in different basic blocks due to PHI translation.

  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;

  SmallVector<BlockAddressPair, 16> WorkList;

  // Keep track of the address we visited each block with. Bail out if we

  // visit a block with different addresses.

  DenseMap<BasicBlock *, Value *> Visited;


  BasicBlock::iterator FirstBBI(FirstI);

  ++FirstBBI;

  BasicBlock::iterator SecondBBI(SecondI);

  BasicBlock *FirstBB = FirstI->getParent();

  BasicBlock *SecondBB = SecondI->getParent();

  MemoryLocation MemLoc;

  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))

    MemLoc = MemoryLocation::getForDest(MemSet);

  else

    MemLoc = MemoryLocation::get(SecondI);


  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);


  // Start checking the SecondBB.

  WorkList.push_back(

      std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));

  bool isFirstBlock = true;


  // Check all blocks going backward until we reach the FirstBB.

  while (!WorkList.empty()) {

    BlockAddressPair Current = WorkList.pop_back_val();

    BasicBlock *B = Current.first;

    PHITransAddr &Addr = Current.second;

    Value *Ptr = Addr.getAddr();


    // Ignore instructions before FirstI if this is the FirstBB.

    BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());


    BasicBlock::iterator EI;

    if (isFirstBlock) {

      // Ignore instructions after SecondI if this is the first visit of SecondBB.

      assert(B == SecondBB && "first block is not the store block");

      EI = SecondBBI;

      isFirstBlock = false;

    } else {

      // It's not SecondBB or (in case of a loop) the second visit of SecondBB.

      // In this case we also have to look at instructions after SecondI.

      EI = B->end();

    }

    for (; BI != EI; ++BI) {

      Instruction *I = &*BI;

      if (I->mayWriteToMemory() && I != SecondI)

        if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))

          return false;

    }

    if (B != FirstBB) {

      assert(B != &FirstBB->getParent()->getEntryBlock() &&

          "Should not hit the entry block because SI must be dominated by LI");

      for (BasicBlock *Pred : predecessors(B)) {

        PHITransAddr PredAddr = Addr;

        if (PredAddr.needsPHITranslationFromBlock(B)) {

          if (!PredAddr.isPotentiallyPHITranslatable())

            return false;

          if (!PredAddr.translateValue(B, Pred, DT, false))

            return false;

        }

        Value *TranslatedPtr = PredAddr.getAddr();

        auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));

        if (!Inserted.second) {

          // We already visited this block before. If it was with a different

          // address - bail out!

          if (TranslatedPtr != Inserted.first->second)

            return false;

          // ... otherwise just skip it.

          continue;

        }

        WorkList.push_back(std::make_pair(Pred, PredAddr));

      }

    }

  }

  return true;

}


static void shortenAssignment(Instruction *Inst, Value *OriginalDest,

                              uint64_t OldOffsetInBits, uint64_t OldSizeInBits,

                              uint64_t NewSizeInBits, bool IsOverwriteEnd) {

  const DataLayout &DL = Inst->getModule()->getDataLayout();

  uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;

  uint64_t DeadSliceOffsetInBits =

      OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);

  auto SetDeadFragExpr = [](auto *Assign,

                            DIExpression::FragmentInfo DeadFragment) {

    // createFragmentExpression expects an offset relative to the existing

    // fragment offset if there is one.

    uint64_t RelativeOffset = DeadFragment.OffsetInBits -

                              Assign->getExpression()

                                  ->getFragmentInfo()

                                  .value_or(DIExpression::FragmentInfo(0, 0))

                                  .OffsetInBits;

    if (auto NewExpr = DIExpression::createFragmentExpression(

            Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {

      Assign->setExpression(*NewExpr);

      return;

    }

    // Failed to create a fragment expression for this so discard the value,

    // making this a kill location.

    auto *Expr = *DIExpression::createFragmentExpression(

        DIExpression::get(Assign->getContext(), std::nullopt),

        DeadFragment.OffsetInBits, DeadFragment.SizeInBits);

    Assign->setExpression(Expr);

    Assign->setKillLocation();

  };


  // A DIAssignID to use so that the inserted dbg.assign intrinsics do not

  // link to any instructions. Created in the loop below (once).

  DIAssignID *LinkToNothing = nullptr;

  LLVMContext &Ctx = Inst->getContext();

  auto GetDeadLink = [&Ctx, &LinkToNothing]() {

    if (!LinkToNothing)

      LinkToNothing = DIAssignID::getDistinct(Ctx);

    return LinkToNothing;

  };


  // Insert an unlinked dbg.assign intrinsic for the dead fragment after each

  // overlapping dbg.assign intrinsic. The loop invalidates the iterators

  // returned by getAssignmentMarkers so save a copy of the markers to iterate

  // over.

  auto LinkedRange = at::getAssignmentMarkers(Inst);

  SmallVector<DbgVariableRecord *> LinkedDVRAssigns =

      at::getDVRAssignmentMarkers(Inst);

  SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),

                                           LinkedRange.end());

  auto InsertAssignForOverlap = [&](auto *Assign) {

    std::optional<DIExpression::FragmentInfo> NewFragment;

    if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,

                                        DeadSliceSizeInBits, Assign,

                                        NewFragment) ||

        !NewFragment) {

      // We couldn't calculate the intersecting fragment for some reason. Be

      // cautious and unlink the whole assignment from the store.

      Assign->setKillAddress();

      Assign->setAssignId(GetDeadLink());

      return;

    }

    // No intersect.

    if (NewFragment->SizeInBits == 0)

      return;


    // Fragments overlap: insert a new dbg.assign for this dead part.

    auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());

    NewAssign->insertAfter(Assign);

    NewAssign->setAssignId(GetDeadLink());

    if (NewFragment)

      SetDeadFragExpr(NewAssign, *NewFragment);

    NewAssign->setKillAddress();

  };

  for_each(Linked, InsertAssignForOverlap);

  for_each(LinkedDVRAssigns, InsertAssignForOverlap);

}


static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,

                         uint64_t &DeadSize, int64_t KillingStart,

                         uint64_t KillingSize, bool IsOverwriteEnd) {

  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);

  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();


  // We assume that memet/memcpy operates in chunks of the "largest" native

  // type size and aligned on the same value. That means optimal start and size

  // of memset/memcpy should be modulo of preferred alignment of that type. That

  // is it there is no any sense in trying to reduce store size any further

  // since any "extra" stores comes for free anyway.

  // On the other hand, maximum alignment we can achieve is limited by alignment

  // of initial store.


  // TODO: Limit maximum alignment by preferred (or abi?) alignment of the

  // "largest" native type.

  // Note: What is the proper way to get that value?

  // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?

  // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);


  int64_t ToRemoveStart = 0;

  uint64_t ToRemoveSize = 0;

  // Compute start and size of the region to remove. Make sure 'PrefAlign' is

  // maintained on the remaining store.

  if (IsOverwriteEnd) {

    // Calculate required adjustment for 'KillingStart' in order to keep

    // remaining store size aligned on 'PerfAlign'.

    uint64_t Off =

        offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);

    ToRemoveStart = KillingStart + Off;

    if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))

      return false;

    ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);

  } else {

    ToRemoveStart = DeadStart;

    assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&

           "Not overlapping accesses?");

    ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);

    // Calculate required adjustment for 'ToRemoveSize'in order to keep

    // start of the remaining store aligned on 'PerfAlign'.

    uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);

    if (Off != 0) {

      if (ToRemoveSize <= (PrefAlign.value() - Off))

        return false;

      ToRemoveSize -= PrefAlign.value() - Off;

    }

    assert(isAligned(PrefAlign, ToRemoveSize) &&

           "Should preserve selected alignment");

  }


  assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");

  assert(DeadSize > ToRemoveSize && "Can't remove more than original size");


  uint64_t NewSize = DeadSize - ToRemoveSize;

  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {

    // When shortening an atomic memory intrinsic, the newly shortened

    // length must remain an integer multiple of the element size.

    const uint32_t ElementSize = AMI->getElementSizeInBytes();

    if (0 != NewSize % ElementSize)

      return false;

  }


  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "

                    << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI

                    << "\n  KILLER [" << ToRemoveStart << ", "

                    << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");


  Value *DeadWriteLength = DeadIntrinsic->getLength();

  Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);

  DeadIntrinsic->setLength(TrimmedLength);

  DeadIntrinsic->setDestAlignment(PrefAlign);


  Value *OrigDest = DeadIntrinsic->getRawDest();

  if (!IsOverwriteEnd) {

    Value *Indices[1] = {

        ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};

    Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(

        Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",

        DeadI->getIterator());

    NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());

    DeadIntrinsic->setDest(NewDestGEP);

  }


  // Update attached dbg.assign intrinsics. Assume 8-bit byte.

  shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,

                    IsOverwriteEnd);


  // Finally update start and size of dead access.

  if (!IsOverwriteEnd)

    DeadStart += ToRemoveSize;

  DeadSize = NewSize;


  return true;

}


static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,

                            int64_t &DeadStart, uint64_t &DeadSize) {

  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))

    return false;


  OverlapIntervalsTy::iterator OII = --IntervalMap.end();

  int64_t KillingStart = OII->second;

  uint64_t KillingSize = OII->first - KillingStart;


  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");


  if (KillingStart > DeadStart &&

      // Note: "KillingStart - KillingStart" is known to be positive due to

      // preceding check.

      (uint64_t)(KillingStart - DeadStart) < DeadSize &&

      // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to

      // be non negative due to preceding checks.

      KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {

    if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,

                     true)) {

      IntervalMap.erase(OII);

      return true;

    }

  }

  return false;

}


static bool tryToShortenBegin(Instruction *DeadI,

                              OverlapIntervalsTy &IntervalMap,

                              int64_t &DeadStart, uint64_t &DeadSize) {

  if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))

    return false;


  OverlapIntervalsTy::iterator OII = IntervalMap.begin();

  int64_t KillingStart = OII->second;

  uint64_t KillingSize = OII->first - KillingStart;


  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");


  if (KillingStart <= DeadStart &&

      // Note: "DeadStart - KillingStart" is known to be non negative due to

      // preceding check.

      KillingSize > (uint64_t)(DeadStart - KillingStart)) {

    // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to

    // be positive due to preceding checks.

    assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&

           "Should have been handled as OW_Complete");

    if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,

                     false)) {

      IntervalMap.erase(OII);

      return true;

    }

  }

  return false;

}


static Constant *

tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,

                                   int64_t KillingOffset, int64_t DeadOffset,

                                   const DataLayout &DL, BatchAAResults &AA,

                                   DominatorTree *DT) {


  if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&

      DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&

      KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&

      DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&

      memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {

    // If the store we find is:

    //   a) partially overwritten by the store to 'Loc'

    //   b) the killing store is fully contained in the dead one and

    //   c) they both have a constant value

    //   d) none of the two stores need padding

    // Merge the two stores, replacing the dead store's value with a

    // merge of both values.

    // TODO: Deal with other constant types (vectors, etc), and probably

    // some mem intrinsics (if needed)


    APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();

    APInt KillingValue =

        cast<ConstantInt>(KillingI->getValueOperand())->getValue();

    unsigned KillingBits = KillingValue.getBitWidth();

    assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());

    KillingValue = KillingValue.zext(DeadValue.getBitWidth());


    // Offset of the smaller store inside the larger store

    unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;

    unsigned LShiftAmount =

        DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits

                         : BitOffsetDiff;

    APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,

                                   LShiftAmount + KillingBits);

    // Clear the bits we'll be replacing, then OR with the smaller

    // store, shifted appropriately.

    APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);

    LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n  Dead: " << *DeadI

                      << "\n  Killing: " << *KillingI

                      << "\n  Merged Value: " << Merged << '\n');

    return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);

  }

  return nullptr;

}


namespace {

// Returns true if \p I is an intrinsic that does not read or write memory.

bool isNoopIntrinsic(Instruction *I) {

  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {

    switch (II->getIntrinsicID()) {

    case Intrinsic::lifetime_start:

    case Intrinsic::lifetime_end:

    case Intrinsic::invariant_end:

    case Intrinsic::launder_invariant_group:

    case Intrinsic::assume:

      return true;

    case Intrinsic::dbg_declare:

    case Intrinsic::dbg_label:

    case Intrinsic::dbg_value:

      llvm_unreachable("Intrinsic should not be modeled in MemorySSA");

    default:

      return false;

    }

  }

  return false;

}


// Check if we can ignore \p D for DSE.

bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {

  Instruction *DI = D->getMemoryInst();

  // Calls that only access inaccessible memory cannot read or write any memory

  // locations we consider for elimination.

  if (auto *CB = dyn_cast<CallBase>(DI))

    if (CB->onlyAccessesInaccessibleMemory())

      return true;


  // We can eliminate stores to locations not visible to the caller across

  // throwing instructions.

  if (DI->mayThrow() && !DefVisibleToCaller)

    return true;


  // We can remove the dead stores, irrespective of the fence and its ordering

  // (release/acquire/seq_cst). Fences only constraints the ordering of

  // already visible stores, it does not make a store visible to other

  // threads. So, skipping over a fence does not change a store from being

  // dead.

  if (isa<FenceInst>(DI))

    return true;


  // Skip intrinsics that do not really read or modify memory.

  if (isNoopIntrinsic(DI))

    return true;


  return false;

}


struct DSEState {

  Function &F;

  AliasAnalysis &AA;

  EarliestEscapeInfo EI;


  /// The single BatchAA instance that is used to cache AA queries. It will

  /// not be invalidated over the whole run. This is safe, because:

  /// 1. Only memory writes are removed, so the alias cache for memory

  ///    locations remains valid.

  /// 2. No new instructions are added (only instructions removed), so cached

  ///    information for a deleted value cannot be accessed by a re-used new

  ///    value pointer.

  BatchAAResults BatchAA;


  MemorySSA &MSSA;

  DominatorTree &DT;

  PostDominatorTree &PDT;

  const TargetLibraryInfo &TLI;

  const DataLayout &DL;

  const LoopInfo &LI;


  // Whether the function contains any irreducible control flow, useful for

  // being accurately able to detect loops.

  bool ContainsIrreducibleLoops;


  // All MemoryDefs that potentially could kill other MemDefs.

  SmallVector<MemoryDef *, 64> MemDefs;

  // Any that should be skipped as they are already deleted

  SmallPtrSet<MemoryAccess *, 4> SkipStores;

  // Keep track whether a given object is captured before return or not.

  DenseMap<const Value *, bool> CapturedBeforeReturn;

  // Keep track of all of the objects that are invisible to the caller after

  // the function returns.

  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;

  // Keep track of blocks with throwing instructions not modeled in MemorySSA.

  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;

  // Post-order numbers for each basic block. Used to figure out if memory

  // accesses are executed before another access.

  DenseMap<BasicBlock *, unsigned> PostOrderNumbers;


  /// Keep track of instructions (partly) overlapping with killing MemoryDefs per

  /// basic block.

  MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;

  // Check if there are root nodes that are terminated by UnreachableInst.

  // Those roots pessimize post-dominance queries. If there are such roots,

  // fall back to CFG scan starting from all non-unreachable roots.

  bool AnyUnreachableExit;


  // Whether or not we should iterate on removing dead stores at the end of the

  // function due to removing a store causing a previously captured pointer to

  // no longer be captured.

  bool ShouldIterateEndOfFunctionDSE;


  /// Dead instructions to be removed at the end of DSE.

  SmallVector<Instruction *> ToRemove;


  // Class contains self-reference, make sure it's not copied/moved.

  DSEState(const DSEState &) = delete;

  DSEState &operator=(const DSEState &) = delete;


  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,

           PostDominatorTree &PDT, const TargetLibraryInfo &TLI,

           const LoopInfo &LI)

      : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),

        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {

    // Collect blocks with throwing instructions not modeled in MemorySSA and

    // alloc-like objects.

    unsigned PO = 0;

    for (BasicBlock *BB : post_order(&F)) {

      PostOrderNumbers[BB] = PO++;

      for (Instruction &I : *BB) {

        MemoryAccess *MA = MSSA.getMemoryAccess(&I);

        if (I.mayThrow() && !MA)

          ThrowingBlocks.insert(I.getParent());


        auto *MD = dyn_cast_or_null<MemoryDef>(MA);

        if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&

            (getLocForWrite(&I) || isMemTerminatorInst(&I)))

          MemDefs.push_back(MD);

      }

    }


    // Treat byval or inalloca arguments the same as Allocas, stores to them are

    // dead at the end of the function.

    for (Argument &AI : F.args())

      if (AI.hasPassPointeeByValueCopyAttr())

        InvisibleToCallerAfterRet.insert({&AI, true});


    // Collect whether there is any irreducible control flow in the function.

    ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);


    AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {

      return isa<UnreachableInst>(E->getTerminator());

    });

  }


  LocationSize strengthenLocationSize(const Instruction *I,

                                      LocationSize Size) const {

    if (auto *CB = dyn_cast<CallBase>(I)) {

      LibFunc F;

      if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&

          (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {

        // Use the precise location size specified by the 3rd argument

        // for determining KillingI overwrites DeadLoc if it is a memset_chk

        // instruction. memset_chk will write either the amount specified as 3rd

        // argument or the function will immediately abort and exit the program.

        // NOTE: AA may determine NoAlias if it can prove that the access size

        // is larger than the allocation size due to that being UB. To avoid

        // returning potentially invalid NoAlias results by AA, limit the use of

        // the precise location size to isOverwrite.

        if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))

          return LocationSize::precise(Len->getZExtValue());

      }

    }

    return Size;

  }


  /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p

  /// KillingI instruction) completely overwrites a store to the 'DeadLoc'

  /// location (by \p DeadI instruction).

  /// Return OW_MaybePartial if \p KillingI does not completely overwrite

  /// \p DeadI, but they both write to the same underlying object. In that

  /// case, use isPartialOverwrite to check if \p KillingI partially overwrites

  /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the

  /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.

  OverwriteResult isOverwrite(const Instruction *KillingI,

                              const Instruction *DeadI,

                              const MemoryLocation &KillingLoc,

                              const MemoryLocation &DeadLoc,

                              int64_t &KillingOff, int64_t &DeadOff) {

    // AliasAnalysis does not always account for loops. Limit overwrite checks

    // to dependencies for which we can guarantee they are independent of any

    // loops they are in.

    if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))

      return OW_Unknown;


    LocationSize KillingLocSize =

        strengthenLocationSize(KillingI, KillingLoc.Size);

    const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();

    const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();

    const Value *DeadUndObj = getUnderlyingObject(DeadPtr);

    const Value *KillingUndObj = getUnderlyingObject(KillingPtr);


    // Check whether the killing store overwrites the whole object, in which

    // case the size/offset of the dead store does not matter.

    if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&

        isIdentifiedObject(KillingUndObj)) {

      std::optional<TypeSize> KillingUndObjSize =

          getPointerSize(KillingUndObj, DL, TLI, &F);

      if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())

        return OW_Complete;

    }


    // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll

    // get imprecise values here, though (except for unknown sizes).

    if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {

      // In case no constant size is known, try to an IR values for the number

      // of bytes written and check if they match.

      const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);

      const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);

      if (KillingMemI && DeadMemI) {

        const Value *KillingV = KillingMemI->getLength();

        const Value *DeadV = DeadMemI->getLength();

        if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))

          return OW_Complete;

      }


      // Masked stores have imprecise locations, but we can reason about them

      // to some extent.

      return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);

    }


    const TypeSize KillingSize = KillingLocSize.getValue();

    const TypeSize DeadSize = DeadLoc.Size.getValue();

    // Bail on doing Size comparison which depends on AA for now

    // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors

    const bool AnyScalable =

        DeadSize.isScalable() || KillingLocSize.isScalable();


    if (AnyScalable)

      return OW_Unknown;

    // Query the alias information

    AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);


    // If the start pointers are the same, we just have to compare sizes to see if

    // the killing store was larger than the dead store.

    if (AAR == AliasResult::MustAlias) {

      // Make sure that the KillingSize size is >= the DeadSize size.

      if (KillingSize >= DeadSize)

        return OW_Complete;

    }


    // If we hit a partial alias we may have a full overwrite

    if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {

      int32_t Off = AAR.getOffset();

      if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)

        return OW_Complete;

    }


    // If we can't resolve the same pointers to the same object, then we can't

    // analyze them at all.

    if (DeadUndObj != KillingUndObj) {

      // Non aliasing stores to different objects don't overlap. Note that

      // if the killing store is known to overwrite whole object (out of

      // bounds access overwrites whole object as well) then it is assumed to

      // completely overwrite any store to the same object even if they don't

      // actually alias (see next check).

      if (AAR == AliasResult::NoAlias)

        return OW_None;

      return OW_Unknown;

    }


    // Okay, we have stores to two completely different pointers.  Try to

    // decompose the pointer into a "base + constant_offset" form.  If the base

    // pointers are equal, then we can reason about the two stores.

    DeadOff = 0;

    KillingOff = 0;

    const Value *DeadBasePtr =

        GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);

    const Value *KillingBasePtr =

        GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);


    // If the base pointers still differ, we have two completely different

    // stores.

    if (DeadBasePtr != KillingBasePtr)

      return OW_Unknown;


    // The killing access completely overlaps the dead store if and only if

    // both start and end of the dead one is "inside" the killing one:

    //    |<->|--dead--|<->|

    //    |-----killing------|

    // Accesses may overlap if and only if start of one of them is "inside"

    // another one:

    //    |<->|--dead--|<-------->|

    //    |-------killing--------|

    //           OR

    //    |-------dead-------|

    //    |<->|---killing---|<----->|

    //

    // We have to be careful here as *Off is signed while *.Size is unsigned.


    // Check if the dead access starts "not before" the killing one.

    if (DeadOff >= KillingOff) {

      // If the dead access ends "not after" the killing access then the

      // dead one is completely overwritten by the killing one.

      if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)

        return OW_Complete;

      // If start of the dead access is "before" end of the killing access

      // then accesses overlap.

      else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)

        return OW_MaybePartial;

    }

    // If start of the killing access is "before" end of the dead access then

    // accesses overlap.

    else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {

      return OW_MaybePartial;

    }


    // Can reach here only if accesses are known not to overlap.

    return OW_None;

  }


  bool isInvisibleToCallerAfterRet(const Value *V) {

    if (isa<AllocaInst>(V))

      return true;

    auto I = InvisibleToCallerAfterRet.insert({V, false});

    if (I.second) {

      if (!isInvisibleToCallerOnUnwind(V)) {

        I.first->second = false;

      } else if (isNoAliasCall(V)) {

        I.first->second = !PointerMayBeCaptured(V, true, false);

      }

    }

    return I.first->second;

  }


  bool isInvisibleToCallerOnUnwind(const Value *V) {

    bool RequiresNoCaptureBeforeUnwind;

    if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))

      return false;

    if (!RequiresNoCaptureBeforeUnwind)

      return true;


    auto I = CapturedBeforeReturn.insert({V, true});

    if (I.second)

      // NOTE: This could be made more precise by PointerMayBeCapturedBefore

      // with the killing MemoryDef. But we refrain from doing so for now to

      // limit compile-time and this does not cause any changes to the number

      // of stores removed on a large test set in practice.

      I.first->second = PointerMayBeCaptured(V, false, true);

    return !I.first->second;

  }


  std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {

    if (!I->mayWriteToMemory())

      return std::nullopt;


    if (auto *CB = dyn_cast<CallBase>(I))

      return MemoryLocation::getForDest(CB, TLI);


    return MemoryLocation::getOrNone(I);

  }


  /// Assuming this instruction has a dead analyzable write, can we delete

  /// this instruction?

  bool isRemovable(Instruction *I) {

    assert(getLocForWrite(I) && "Must have analyzable write");


    // Don't remove volatile/atomic stores.

    if (StoreInst *SI = dyn_cast<StoreInst>(I))

      return SI->isUnordered();


    if (auto *CB = dyn_cast<CallBase>(I)) {

      // Don't remove volatile memory intrinsics.

      if (auto *MI = dyn_cast<MemIntrinsic>(CB))

        return !MI->isVolatile();


      // Never remove dead lifetime intrinsics, e.g. because they are followed

      // by a free.

      if (CB->isLifetimeStartOrEnd())

        return false;


      return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&

             !CB->isTerminator();

    }


    return false;

  }


  /// Returns true if \p UseInst completely overwrites \p DefLoc

  /// (stored by \p DefInst).

  bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,

                           Instruction *UseInst) {

    // UseInst has a MemoryDef associated in MemorySSA. It's possible for a

    // MemoryDef to not write to memory, e.g. a volatile load is modeled as a

    // MemoryDef.

    if (!UseInst->mayWriteToMemory())

      return false;


    if (auto *CB = dyn_cast<CallBase>(UseInst))

      if (CB->onlyAccessesInaccessibleMemory())

        return false;


    int64_t InstWriteOffset, DepWriteOffset;

    if (auto CC = getLocForWrite(UseInst))

      return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,

                         DepWriteOffset) == OW_Complete;

    return false;

  }


  /// Returns true if \p Def is not read before returning from the function.

  bool isWriteAtEndOfFunction(MemoryDef *Def) {

    LLVM_DEBUG(dbgs() << "  Check if def " << *Def << " ("

                      << *Def->getMemoryInst()

                      << ") is at the end the function \n");


    auto MaybeLoc = getLocForWrite(Def->getMemoryInst());

    if (!MaybeLoc) {

      LLVM_DEBUG(dbgs() << "  ... could not get location for write.\n");

      return false;

    }


    SmallVector<MemoryAccess *, 4> WorkList;

    SmallPtrSet<MemoryAccess *, 8> Visited;

    auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {

      if (!Visited.insert(Acc).second)

        return;

      for (Use &U : Acc->uses())

        WorkList.push_back(cast<MemoryAccess>(U.getUser()));

    };

    PushMemUses(Def);

    for (unsigned I = 0; I < WorkList.size(); I++) {

      if (WorkList.size() >= MemorySSAScanLimit) {

        LLVM_DEBUG(dbgs() << "  ... hit exploration limit.\n");

        return false;

      }


      MemoryAccess *UseAccess = WorkList[I];

      if (isa<MemoryPhi>(UseAccess)) {

        // AliasAnalysis does not account for loops. Limit elimination to

        // candidates for which we can guarantee they always store to the same

        // memory location.

        if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))

          return false;


        PushMemUses(cast<MemoryPhi>(UseAccess));

        continue;

      }

      // TODO: Checking for aliasing is expensive. Consider reducing the amount

      // of times this is called and/or caching it.

      Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();

      if (isReadClobber(*MaybeLoc, UseInst)) {

        LLVM_DEBUG(dbgs() << "  ... hit read clobber " << *UseInst << ".\n");

        return false;

      }


      if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))

        PushMemUses(UseDef);

    }

    return true;

  }


  /// If \p I is a memory  terminator like llvm.lifetime.end or free, return a

  /// pair with the MemoryLocation terminated by \p I and a boolean flag

  /// indicating whether \p I is a free-like call.

  std::optional<std::pair<MemoryLocation, bool>>

  getLocForTerminator(Instruction *I) const {

    uint64_t Len;

    Value *Ptr;

    if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),

                                                      m_Value(Ptr))))

      return {std::make_pair(MemoryLocation(Ptr, Len), false)};


    if (auto *CB = dyn_cast<CallBase>(I)) {

      if (Value *FreedOp = getFreedOperand(CB, &TLI))

        return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};

    }


    return std::nullopt;

  }


  /// Returns true if \p I is a memory terminator instruction like

  /// llvm.lifetime.end or free.

  bool isMemTerminatorInst(Instruction *I) const {

    auto *CB = dyn_cast<CallBase>(I);

    return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||

                  getFreedOperand(CB, &TLI) != nullptr);

  }


  /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from

  /// instruction \p AccessI.

  bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,

                       Instruction *MaybeTerm) {

    std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =

        getLocForTerminator(MaybeTerm);


    if (!MaybeTermLoc)

      return false;


    // If the terminator is a free-like call, all accesses to the underlying

    // object can be considered terminated.

    if (getUnderlyingObject(Loc.Ptr) !=

        getUnderlyingObject(MaybeTermLoc->first.Ptr))

      return false;


    auto TermLoc = MaybeTermLoc->first;

    if (MaybeTermLoc->second) {

      const Value *LocUO = getUnderlyingObject(Loc.Ptr);

      return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);

    }

    int64_t InstWriteOffset = 0;

    int64_t DepWriteOffset = 0;

    return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,

                       DepWriteOffset) == OW_Complete;

  }


  // Returns true if \p Use may read from \p DefLoc.

  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {

    if (isNoopIntrinsic(UseInst))

      return false;


    // Monotonic or weaker atomic stores can be re-ordered and do not need to be

    // treated as read clobber.

    if (auto SI = dyn_cast<StoreInst>(UseInst))

      return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);


    if (!UseInst->mayReadFromMemory())

      return false;


    if (auto *CB = dyn_cast<CallBase>(UseInst))

      if (CB->onlyAccessesInaccessibleMemory())

        return false;


    return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));

  }


  /// Returns true if a dependency between \p Current and \p KillingDef is

  /// guaranteed to be loop invariant for the loops that they are in. Either

  /// because they are known to be in the same block, in the same loop level or

  /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation

  /// during execution of the containing function.

  bool isGuaranteedLoopIndependent(const Instruction *Current,

                                   const Instruction *KillingDef,

                                   const MemoryLocation &CurrentLoc) {

    // If the dependency is within the same block or loop level (being careful

    // of irreducible loops), we know that AA will return a valid result for the

    // memory dependency. (Both at the function level, outside of any loop,

    // would also be valid but we currently disable that to limit compile time).

    if (Current->getParent() == KillingDef->getParent())

      return true;

    const Loop *CurrentLI = LI.getLoopFor(Current->getParent());

    if (!ContainsIrreducibleLoops && CurrentLI &&

        CurrentLI == LI.getLoopFor(KillingDef->getParent()))

      return true;

    // Otherwise check the memory location is invariant to any loops.

    return isGuaranteedLoopInvariant(CurrentLoc.Ptr);

  }


  /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible

  /// loop. In particular, this guarantees that it only references a single

  /// MemoryLocation during execution of the containing function.

  bool isGuaranteedLoopInvariant(const Value *Ptr) {

    Ptr = Ptr->stripPointerCasts();

    if (auto *GEP = dyn_cast<GEPOperator>(Ptr))

      if (GEP->hasAllConstantIndices())

        Ptr = GEP->getPointerOperand()->stripPointerCasts();


    if (auto *I = dyn_cast<Instruction>(Ptr)) {

      return I->getParent()->isEntryBlock() ||

             (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));

    }

    return true;

  }


  // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,

  // with no read access between them or on any other path to a function exit

  // block if \p KillingLoc is not accessible after the function returns. If

  // there is no such MemoryDef, return std::nullopt. The returned value may not

  // (completely) overwrite \p KillingLoc. Currently we bail out when we

  // encounter an aliasing MemoryUse (read).

  std::optional<MemoryAccess *>

  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,

                  const MemoryLocation &KillingLoc, const Value *KillingUndObj,

                  unsigned &ScanLimit, unsigned &WalkerStepLimit,

                  bool IsMemTerm, unsigned &PartialLimit) {

    if (ScanLimit == 0 || WalkerStepLimit == 0) {

      LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");

      return std::nullopt;

    }


    MemoryAccess *Current = StartAccess;

    Instruction *KillingI = KillingDef->getMemoryInst();

    LLVM_DEBUG(dbgs() << "  trying to get dominating access\n");


    // Only optimize defining access of KillingDef when directly starting at its

    // defining access. The defining access also must only access KillingLoc. At

    // the moment we only support instructions with a single write location, so

    // it should be sufficient to disable optimizations for instructions that

    // also read from memory.

    bool CanOptimize = OptimizeMemorySSA &&

                       KillingDef->getDefiningAccess() == StartAccess &&

                       !KillingI->mayReadFromMemory();


    // Find the next clobbering Mod access for DefLoc, starting at StartAccess.

    std::optional<MemoryLocation> CurrentLoc;

    for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {

      LLVM_DEBUG({

        dbgs() << "   visiting " << *Current;

        if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))

          dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()

                 << ")";

        dbgs() << "\n";

      });


      // Reached TOP.

      if (MSSA.isLiveOnEntryDef(Current)) {

        LLVM_DEBUG(dbgs() << "   ...  found LiveOnEntryDef\n");

        if (CanOptimize && Current != KillingDef->getDefiningAccess())

          // The first clobbering def is... none.

          KillingDef->setOptimized(Current);

        return std::nullopt;

      }


      // Cost of a step. Accesses in the same block are more likely to be valid

      // candidates for elimination, hence consider them cheaper.

      unsigned StepCost = KillingDef->getBlock() == Current->getBlock()

                              ? MemorySSASameBBStepCost

                              : MemorySSAOtherBBStepCost;

      if (WalkerStepLimit <= StepCost) {

        LLVM_DEBUG(dbgs() << "   ...  hit walker step limit\n");

        return std::nullopt;

      }

      WalkerStepLimit -= StepCost;


      // Return for MemoryPhis. They cannot be eliminated directly and the

      // caller is responsible for traversing them.

      if (isa<MemoryPhi>(Current)) {

        LLVM_DEBUG(dbgs() << "   ...  found MemoryPhi\n");

        return Current;

      }


      // Below, check if CurrentDef is a valid candidate to be eliminated by

      // KillingDef. If it is not, check the next candidate.

      MemoryDef *CurrentDef = cast<MemoryDef>(Current);

      Instruction *CurrentI = CurrentDef->getMemoryInst();


      if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {

        CanOptimize = false;

        continue;

      }


      // Before we try to remove anything, check for any extra throwing

      // instructions that block us from DSEing

      if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {

        LLVM_DEBUG(dbgs() << "  ... skip, may throw!\n");

        return std::nullopt;

      }


      // Check for anything that looks like it will be a barrier to further

      // removal

      if (isDSEBarrier(KillingUndObj, CurrentI)) {

        LLVM_DEBUG(dbgs() << "  ... skip, barrier\n");

        return std::nullopt;

      }


      // If Current is known to be on path that reads DefLoc or is a read

      // clobber, bail out, as the path is not profitable. We skip this check

      // for intrinsic calls, because the code knows how to handle memcpy

      // intrinsics.

      if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))

        return std::nullopt;


      // Quick check if there are direct uses that are read-clobbers.

      if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {

            if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))

              return !MSSA.dominates(StartAccess, UseOrDef) &&

                     isReadClobber(KillingLoc, UseOrDef->getMemoryInst());

            return false;

          })) {

        LLVM_DEBUG(dbgs() << "   ...  found a read clobber\n");

        return std::nullopt;

      }


      // If Current does not have an analyzable write location or is not

      // removable, skip it.

      CurrentLoc = getLocForWrite(CurrentI);

      if (!CurrentLoc || !isRemovable(CurrentI)) {

        CanOptimize = false;

        continue;

      }


      // AliasAnalysis does not account for loops. Limit elimination to

      // candidates for which we can guarantee they always store to the same

      // memory location and not located in different loops.

      if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {

        LLVM_DEBUG(dbgs() << "  ... not guaranteed loop independent\n");

        CanOptimize = false;

        continue;

      }


      if (IsMemTerm) {

        // If the killing def is a memory terminator (e.g. lifetime.end), check

        // the next candidate if the current Current does not write the same

        // underlying object as the terminator.

        if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {

          CanOptimize = false;

          continue;

        }

      } else {

        int64_t KillingOffset = 0;

        int64_t DeadOffset = 0;

        auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,

                              KillingOffset, DeadOffset);

        if (CanOptimize) {

          // CurrentDef is the earliest write clobber of KillingDef. Use it as

          // optimized access. Do not optimize if CurrentDef is already the

          // defining access of KillingDef.

          if (CurrentDef != KillingDef->getDefiningAccess() &&

              (OR == OW_Complete || OR == OW_MaybePartial))

            KillingDef->setOptimized(CurrentDef);


          // Once a may-aliasing def is encountered do not set an optimized

          // access.

          if (OR != OW_None)

            CanOptimize = false;

        }


        // If Current does not write to the same object as KillingDef, check

        // the next candidate.

        if (OR == OW_Unknown || OR == OW_None)

          continue;

        else if (OR == OW_MaybePartial) {

          // If KillingDef only partially overwrites Current, check the next

          // candidate if the partial step limit is exceeded. This aggressively

          // limits the number of candidates for partial store elimination,

          // which are less likely to be removable in the end.

          if (PartialLimit <= 1) {

            WalkerStepLimit -= 1;

            LLVM_DEBUG(dbgs() << "   ... reached partial limit ... continue with next access\n");

            continue;

          }

          PartialLimit -= 1;

        }

      }

      break;

    };


    // Accesses to objects accessible after the function returns can only be

    // eliminated if the access is dead along all paths to the exit. Collect

    // the blocks with killing (=completely overwriting MemoryDefs) and check if

    // they cover all paths from MaybeDeadAccess to any function exit.

    SmallPtrSet<Instruction *, 16> KillingDefs;

    KillingDefs.insert(KillingDef->getMemoryInst());

    MemoryAccess *MaybeDeadAccess = Current;

    MemoryLocation MaybeDeadLoc = *CurrentLoc;

    Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();

    LLVM_DEBUG(dbgs() << "  Checking for reads of " << *MaybeDeadAccess << " ("

                      << *MaybeDeadI << ")\n");


    SmallSetVector<MemoryAccess *, 32> WorkList;

    auto PushMemUses = [&WorkList](MemoryAccess *Acc) {

      for (Use &U : Acc->uses())

        WorkList.insert(cast<MemoryAccess>(U.getUser()));

    };

    PushMemUses(MaybeDeadAccess);


    // Check if DeadDef may be read.

    for (unsigned I = 0; I < WorkList.size(); I++) {

      MemoryAccess *UseAccess = WorkList[I];


      LLVM_DEBUG(dbgs() << "   " << *UseAccess);

      // Bail out if the number of accesses to check exceeds the scan limit.

      if (ScanLimit < (WorkList.size() - I)) {

        LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");

        return std::nullopt;

      }

      --ScanLimit;

      NumDomMemDefChecks++;


      if (isa<MemoryPhi>(UseAccess)) {

        if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {

              return DT.properlyDominates(KI->getParent(),

                                          UseAccess->getBlock());

            })) {

          LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");

          continue;

        }

        LLVM_DEBUG(dbgs() << "\n    ... adding PHI uses\n");

        PushMemUses(UseAccess);

        continue;

      }


      Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();

      LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");


      if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {

            return DT.dominates(KI, UseInst);

          })) {

        LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");

        continue;

      }


      // A memory terminator kills all preceeding MemoryDefs and all succeeding

      // MemoryAccesses. We do not have to check it's users.

      if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {

        LLVM_DEBUG(

            dbgs()

            << " ... skipping, memterminator invalidates following accesses\n");

        continue;

      }


      if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {

        LLVM_DEBUG(dbgs() << "    ... adding uses of intrinsic\n");

        PushMemUses(UseAccess);

        continue;

      }


      if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {

        LLVM_DEBUG(dbgs() << "  ... found throwing instruction\n");

        return std::nullopt;

      }


      // Uses which may read the original MemoryDef mean we cannot eliminate the

      // original MD. Stop walk.

      if (isReadClobber(MaybeDeadLoc, UseInst)) {

        LLVM_DEBUG(dbgs() << "    ... found read clobber\n");

        return std::nullopt;

      }


      // If this worklist walks back to the original memory access (and the

      // pointer is not guarenteed loop invariant) then we cannot assume that a

      // store kills itself.

      if (MaybeDeadAccess == UseAccess &&

          !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {

        LLVM_DEBUG(dbgs() << "    ... found not loop invariant self access\n");

        return std::nullopt;

      }

      // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check

      // if it reads the memory location.

      // TODO: It would probably be better to check for self-reads before

      // calling the function.

      if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {

        LLVM_DEBUG(dbgs() << "    ... skipping killing def/dom access\n");

        continue;

      }


      // Check all uses for MemoryDefs, except for defs completely overwriting

      // the original location. Otherwise we have to check uses of *all*

      // MemoryDefs we discover, including non-aliasing ones. Otherwise we might

      // miss cases like the following

      //   1 = Def(LoE) ; <----- DeadDef stores [0,1]

      //   2 = Def(1)   ; (2, 1) = NoAlias,   stores [2,3]

      //   Use(2)       ; MayAlias 2 *and* 1, loads [0, 3].

      //                  (The Use points to the *first* Def it may alias)

      //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,

      //                  stores [0,1]

      if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {

        if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {

          BasicBlock *MaybeKillingBlock = UseInst->getParent();

          if (PostOrderNumbers.find(MaybeKillingBlock)->second <

              PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {

            if (!isInvisibleToCallerAfterRet(KillingUndObj)) {

              LLVM_DEBUG(dbgs()

                         << "    ... found killing def " << *UseInst << "\n");

              KillingDefs.insert(UseInst);

            }

          } else {

            LLVM_DEBUG(dbgs()

                       << "    ... found preceeding def " << *UseInst << "\n");

            return std::nullopt;

          }

        } else

          PushMemUses(UseDef);

      }

    }


    // For accesses to locations visible after the function returns, make sure

    // that the location is dead (=overwritten) along all paths from

    // MaybeDeadAccess to the exit.

    if (!isInvisibleToCallerAfterRet(KillingUndObj)) {

      SmallPtrSet<BasicBlock *, 16> KillingBlocks;

      for (Instruction *KD : KillingDefs)

        KillingBlocks.insert(KD->getParent());

      assert(!KillingBlocks.empty() &&

             "Expected at least a single killing block");


      // Find the common post-dominator of all killing blocks.

      BasicBlock *CommonPred = *KillingBlocks.begin();

      for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {

        if (!CommonPred)

          break;

        CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);

      }


      // If the common post-dominator does not post-dominate MaybeDeadAccess,

      // there is a path from MaybeDeadAccess to an exit not going through a

      // killing block.

      if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {

        if (!AnyUnreachableExit)

          return std::nullopt;


        // Fall back to CFG scan starting at all non-unreachable roots if not

        // all paths to the exit go through CommonPred.

        CommonPred = nullptr;

      }


      // If CommonPred itself is in the set of killing blocks, we're done.

      if (KillingBlocks.count(CommonPred))

        return {MaybeDeadAccess};


      SetVector<BasicBlock *> WorkList;

      // If CommonPred is null, there are multiple exits from the function.

      // They all have to be added to the worklist.

      if (CommonPred)

        WorkList.insert(CommonPred);

      else

        for (BasicBlock *R : PDT.roots()) {

          if (!isa<UnreachableInst>(R->getTerminator()))

            WorkList.insert(R);

        }


      NumCFGTries++;

      // Check if all paths starting from an exit node go through one of the

      // killing blocks before reaching MaybeDeadAccess.

      for (unsigned I = 0; I < WorkList.size(); I++) {

        NumCFGChecks++;

        BasicBlock *Current = WorkList[I];

        if (KillingBlocks.count(Current))

          continue;

        if (Current == MaybeDeadAccess->getBlock())

          return std::nullopt;


        // MaybeDeadAccess is reachable from the entry, so we don't have to

        // explore unreachable blocks further.

        if (!DT.isReachableFromEntry(Current))

          continue;


        for (BasicBlock *Pred : predecessors(Current))

          WorkList.insert(Pred);


        if (WorkList.size() >= MemorySSAPathCheckLimit)

          return std::nullopt;

      }

      NumCFGSuccess++;

    }


    // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is

    // potentially dead.

    return {MaybeDeadAccess};

  }


  /// Delete dead memory defs and recursively add their operands to ToRemove if

  /// they became dead.

  void

  deleteDeadInstruction(Instruction *SI,

                        SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr) {

    MemorySSAUpdater Updater(&MSSA);

    SmallVector<Instruction *, 32> NowDeadInsts;

    NowDeadInsts.push_back(SI);

    --NumFastOther;


    while (!NowDeadInsts.empty()) {

      Instruction *DeadInst = NowDeadInsts.pop_back_val();

      ++NumFastOther;


      // Try to preserve debug information attached to the dead instruction.

      salvageDebugInfo(*DeadInst);

      salvageKnowledge(DeadInst);


      // Remove the Instruction from MSSA.

      MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);

      bool IsMemDef = MA && isa<MemoryDef>(MA);

      if (MA) {

        if (IsMemDef) {

          auto *MD = cast<MemoryDef>(MA);

          SkipStores.insert(MD);

          if (Deleted)

            Deleted->insert(MD);

          if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {

            if (SI->getValueOperand()->getType()->isPointerTy()) {

              const Value *UO = getUnderlyingObject(SI->getValueOperand());

              if (CapturedBeforeReturn.erase(UO))

                ShouldIterateEndOfFunctionDSE = true;

              InvisibleToCallerAfterRet.erase(UO);

            }

          }

        }


        Updater.removeMemoryAccess(MA);

      }


      auto I = IOLs.find(DeadInst->getParent());

      if (I != IOLs.end())

        I->second.erase(DeadInst);

      // Remove its operands

      for (Use &O : DeadInst->operands())

        if (Instruction *OpI = dyn_cast<Instruction>(O)) {

          O.set(PoisonValue::get(O->getType()));

          if (isInstructionTriviallyDead(OpI, &TLI))

            NowDeadInsts.push_back(OpI);

        }


      EI.removeInstruction(DeadInst);

      // Remove memory defs directly if they don't produce results, but only

      // queue other dead instructions for later removal. They may have been

      // used as memory locations that have been cached by BatchAA. Removing

      // them here may lead to newly created instructions to be allocated at the

      // same address, yielding stale cache entries.

      if (IsMemDef && DeadInst->getType()->isVoidTy())

        DeadInst->eraseFromParent();

      else

        ToRemove.push_back(DeadInst);

    }

  }


  // Check for any extra throws between \p KillingI and \p DeadI that block

  // DSE.  This only checks extra maythrows (those that aren't MemoryDef's).

  // MemoryDef that may throw are handled during the walk from one def to the

  // next.

  bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,

                       const Value *KillingUndObj) {

    // First see if we can ignore it by using the fact that KillingI is an

    // alloca/alloca like object that is not visible to the caller during

    // execution of the function.

    if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))

      return false;


    if (KillingI->getParent() == DeadI->getParent())

      return ThrowingBlocks.count(KillingI->getParent());

    return !ThrowingBlocks.empty();

  }


  // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following

  // instructions act as barriers:

  //  * A memory instruction that may throw and \p KillingI accesses a non-stack

  //  object.

  //  * Atomic stores stronger that monotonic.

  bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {

    // If DeadI may throw it acts as a barrier, unless we are to an

    // alloca/alloca like object that does not escape.

    if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))

      return true;


    // If DeadI is an atomic load/store stronger than monotonic, do not try to

    // eliminate/reorder it.

    if (DeadI->isAtomic()) {

      if (auto *LI = dyn_cast<LoadInst>(DeadI))

        return isStrongerThanMonotonic(LI->getOrdering());

      if (auto *SI = dyn_cast<StoreInst>(DeadI))

        return isStrongerThanMonotonic(SI->getOrdering());

      if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))

        return isStrongerThanMonotonic(ARMW->getOrdering());

      if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))

        return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||

               isStrongerThanMonotonic(CmpXchg->getFailureOrdering());

      llvm_unreachable("other instructions should be skipped in MemorySSA");

    }

    return false;

  }


  /// Eliminate writes to objects that are not visible in the caller and are not

  /// accessed before returning from the function.

  bool eliminateDeadWritesAtEndOfFunction() {

    bool MadeChange = false;

    LLVM_DEBUG(

        dbgs()

        << "Trying to eliminate MemoryDefs at the end of the function\n");

    do {

      ShouldIterateEndOfFunctionDSE = false;

      for (MemoryDef *Def : llvm::reverse(MemDefs)) {

        if (SkipStores.contains(Def))

          continue;


        Instruction *DefI = Def->getMemoryInst();

        auto DefLoc = getLocForWrite(DefI);

        if (!DefLoc || !isRemovable(DefI))

          continue;


        // NOTE: Currently eliminating writes at the end of a function is

        // limited to MemoryDefs with a single underlying object, to save

        // compile-time. In practice it appears the case with multiple

        // underlying objects is very uncommon. If it turns out to be important,

        // we can use getUnderlyingObjects here instead.

        const Value *UO = getUnderlyingObject(DefLoc->Ptr);

        if (!isInvisibleToCallerAfterRet(UO))

          continue;


        if (isWriteAtEndOfFunction(Def)) {

          // See through pointer-to-pointer bitcasts

          LLVM_DEBUG(dbgs() << "   ... MemoryDef is not accessed until the end "

                               "of the function\n");

          deleteDeadInstruction(DefI);

          ++NumFastStores;

          MadeChange = true;

        }

      }

    } while (ShouldIterateEndOfFunctionDSE);

    return MadeChange;

  }


  /// If we have a zero initializing memset following a call to malloc,

  /// try folding it into a call to calloc.

  bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {

    Instruction *DefI = Def->getMemoryInst();

    MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);

    if (!MemSet)

      // TODO: Could handle zero store to small allocation as well.

      return false;

    Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());

    if (!StoredConstant || !StoredConstant->isNullValue())

      return false;


    if (!isRemovable(DefI))

      // The memset might be volatile..

      return false;


    if (F.hasFnAttribute(Attribute::SanitizeMemory) ||

        F.hasFnAttribute(Attribute::SanitizeAddress) ||

        F.hasFnAttribute(Attribute::SanitizeHWAddress) ||

        F.getName() == "calloc")

      return false;

    auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));

    if (!Malloc)

      return false;

    auto *InnerCallee = Malloc->getCalledFunction();

    if (!InnerCallee)

      return false;

    LibFunc Func;

    if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||

        Func != LibFunc_malloc)

      return false;

    // Gracefully handle malloc with unexpected memory attributes.

    auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));

    if (!MallocDef)

      return false;


    auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {

      // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end

      // of malloc block

      auto *MallocBB = Malloc->getParent(),

        *MemsetBB = Memset->getParent();

      if (MallocBB == MemsetBB)

        return true;

      auto *Ptr = Memset->getArgOperand(0);

      auto *TI = MallocBB->getTerminator();

      ICmpInst::Predicate Pred;

      BasicBlock *TrueBB, *FalseBB;

      if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,

                          FalseBB)))

        return false;

      if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)

        return false;

      return true;

    };


    if (Malloc->getOperand(0) != MemSet->getLength())

      return false;

    if (!shouldCreateCalloc(Malloc, MemSet) ||

        !DT.dominates(Malloc, MemSet) ||

        !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))

      return false;

    IRBuilder<> IRB(Malloc);

    Type *SizeTTy = Malloc->getArgOperand(0)->getType();

    auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),

                              Malloc->getArgOperand(0), IRB, TLI);

    if (!Calloc)

      return false;


    MemorySSAUpdater Updater(&MSSA);

    auto *NewAccess =

      Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,

                                      MallocDef);

    auto *NewAccessMD = cast<MemoryDef>(NewAccess);

    Updater.insertDef(NewAccessMD, /*RenameUses=*/true);

    Malloc->replaceAllUsesWith(Calloc);

    deleteDeadInstruction(Malloc);

    return true;

  }


  // Check if there is a dominating condition, that implies that the value

  // being stored in a ptr is already present in the ptr.

  bool dominatingConditionImpliesValue(MemoryDef *Def) {

    auto *StoreI = cast<StoreInst>(Def->getMemoryInst());

    BasicBlock *StoreBB = StoreI->getParent();

    Value *StorePtr = StoreI->getPointerOperand();

    Value *StoreVal = StoreI->getValueOperand();


    DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();

    if (!IDom)

      return false;


    auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());

    if (!BI || !BI->isConditional())

      return false;


    // In case both blocks are the same, it is not possible to determine

    // if optimization is possible. (We would not want to optimize a store

    // in the FalseBB if condition is true and vice versa.)

    if (BI->getSuccessor(0) == BI->getSuccessor(1))

      return false;


    Instruction *ICmpL;

    ICmpInst::Predicate Pred;

    if (!match(BI->getCondition(),

               m_c_ICmp(Pred,

                        m_CombineAnd(m_Load(m_Specific(StorePtr)),

                                     m_Instruction(ICmpL)),

                        m_Specific(StoreVal))) ||

        !ICmpInst::isEquality(Pred))

      return false;


    // In case the else blocks also branches to the if block or the other way

    // around it is not possible to determine if the optimization is possible.

    if (Pred == ICmpInst::ICMP_EQ &&

        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),

                      StoreBB))

      return false;


    if (Pred == ICmpInst::ICMP_NE &&

        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),

                      StoreBB))

      return false;


    MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);

    MemoryAccess *ClobAcc =

        MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);


    return MSSA.dominates(ClobAcc, LoadAcc);

  }


  /// \returns true if \p Def is a no-op store, either because it

  /// directly stores back a loaded value or stores zero to a calloced object.

  bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {

    Instruction *DefI = Def->getMemoryInst();

    StoreInst *Store = dyn_cast<StoreInst>(DefI);

    MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);

    Constant *StoredConstant = nullptr;

    if (Store)

      StoredConstant = dyn_cast<Constant>(Store->getOperand(0));

    else if (MemSet)

      StoredConstant = dyn_cast<Constant>(MemSet->getValue());

    else

      return false;


    if (!isRemovable(DefI))

      return false;


    if (StoredConstant) {

      Constant *InitC =

          getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());

      // If the clobbering access is LiveOnEntry, no instructions between them

      // can modify the memory location.

      if (InitC && InitC == StoredConstant)

        return MSSA.isLiveOnEntryDef(

            MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));

    }


    if (!Store)

      return false;


    if (dominatingConditionImpliesValue(Def))

      return true;


    if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {

      if (LoadI->getPointerOperand() == Store->getOperand(1)) {

        // Get the defining access for the load.

        auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();

        // Fast path: the defining accesses are the same.

        if (LoadAccess == Def->getDefiningAccess())

          return true;


        // Look through phi accesses. Recursively scan all phi accesses by

        // adding them to a worklist. Bail when we run into a memory def that

        // does not match LoadAccess.

        SetVector<MemoryAccess *> ToCheck;

        MemoryAccess *Current =

            MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);

        // We don't want to bail when we run into the store memory def. But,

        // the phi access may point to it. So, pretend like we've already

        // checked it.

        ToCheck.insert(Def);

        ToCheck.insert(Current);

        // Start at current (1) to simulate already having checked Def.

        for (unsigned I = 1; I < ToCheck.size(); ++I) {

          Current = ToCheck[I];

          if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {

            // Check all the operands.

            for (auto &Use : PhiAccess->incoming_values())

              ToCheck.insert(cast<MemoryAccess>(&Use));

            continue;

          }


          // If we found a memory def, bail. This happens when we have an

          // unrelated write in between an otherwise noop store.

          assert(isa<MemoryDef>(Current) &&

                 "Only MemoryDefs should reach here.");

          // TODO: Skip no alias MemoryDefs that have no aliasing reads.

          // We are searching for the definition of the store's destination.

          // So, if that is the same definition as the load, then this is a

          // noop. Otherwise, fail.

          if (LoadAccess != Current)

            return false;

        }

        return true;

      }

    }


    return false;

  }


  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {

    bool Changed = false;

    for (auto OI : IOL) {

      Instruction *DeadI = OI.first;

      MemoryLocation Loc = *getLocForWrite(DeadI);

      assert(isRemovable(DeadI) && "Expect only removable instruction");


      const Value *Ptr = Loc.Ptr->stripPointerCasts();

      int64_t DeadStart = 0;

      uint64_t DeadSize = Loc.Size.getValue();

      GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);

      OverlapIntervalsTy &IntervalMap = OI.second;

      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);

      if (IntervalMap.empty())

        continue;

      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);

    }

    return Changed;

  }


  /// Eliminates writes to locations where the value that is being written

  /// is already stored at the same location.

  bool eliminateRedundantStoresOfExistingValues() {

    bool MadeChange = false;

    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "

                         "already existing value\n");

    for (auto *Def : MemDefs) {

      if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))

        continue;


      Instruction *DefInst = Def->getMemoryInst();

      auto MaybeDefLoc = getLocForWrite(DefInst);

      if (!MaybeDefLoc || !isRemovable(DefInst))

        continue;


      MemoryDef *UpperDef;

      // To conserve compile-time, we avoid walking to the next clobbering def.

      // Instead, we just try to get the optimized access, if it exists. DSE

      // will try to optimize defs during the earlier traversal.

      if (Def->isOptimized())

        UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());

      else

        UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());

      if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))

        continue;


      Instruction *UpperInst = UpperDef->getMemoryInst();

      auto IsRedundantStore = [&]() {

        if (DefInst->isIdenticalTo(UpperInst))

          return true;

        if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {

          if (auto *SI = dyn_cast<StoreInst>(DefInst)) {

            // MemSetInst must have a write location.

            MemoryLocation UpperLoc = *getLocForWrite(UpperInst);

            int64_t InstWriteOffset = 0;

            int64_t DepWriteOffset = 0;

            auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,

                                  InstWriteOffset, DepWriteOffset);

            Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);

            return StoredByte && StoredByte == MemSetI->getOperand(1) &&

                   OR == OW_Complete;

          }

        }

        return false;

      };


      if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))

        continue;

      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: " << *DefInst

                        << '\n');

      deleteDeadInstruction(DefInst);

      NumRedundantStores++;

      MadeChange = true;

    }

    return MadeChange;

  }

};


static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,

                                DominatorTree &DT, PostDominatorTree &PDT,

                                const TargetLibraryInfo &TLI,

                                const LoopInfo &LI) {

  bool MadeChange = false;


  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);

  // For each store:

  for (unsigned I = 0; I < State.MemDefs.size(); I++) {

    MemoryDef *KillingDef = State.MemDefs[I];

    if (State.SkipStores.count(KillingDef))

      continue;

    Instruction *KillingI = KillingDef->getMemoryInst();


    std::optional<MemoryLocation> MaybeKillingLoc;

    if (State.isMemTerminatorInst(KillingI)) {

      if (auto KillingLoc = State.getLocForTerminator(KillingI))

        MaybeKillingLoc = KillingLoc->first;

    } else {

      MaybeKillingLoc = State.getLocForWrite(KillingI);

    }


    if (!MaybeKillingLoc) {

      LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "

                        << *KillingI << "\n");

      continue;

    }

    MemoryLocation KillingLoc = *MaybeKillingLoc;

    assert(KillingLoc.Ptr && "KillingLoc should not be null");

    const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);

    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "

                      << *KillingDef << " (" << *KillingI << ")\n");


    unsigned ScanLimit = MemorySSAScanLimit;

    unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;

    unsigned PartialLimit = MemorySSAPartialStoreLimit;

    // Worklist of MemoryAccesses that may be killed by KillingDef.

    SmallSetVector<MemoryAccess *, 8> ToCheck;

    // Track MemoryAccesses that have been deleted in the loop below, so we can

    // skip them. Don't use SkipStores for this, which may contain reused

    // MemoryAccess addresses.

    SmallPtrSet<MemoryAccess *, 8> Deleted;

    [[maybe_unused]] unsigned OrigNumSkipStores = State.SkipStores.size();

    ToCheck.insert(KillingDef->getDefiningAccess());


    bool Shortend = false;

    bool IsMemTerm = State.isMemTerminatorInst(KillingI);

    // Check if MemoryAccesses in the worklist are killed by KillingDef.

    for (unsigned I = 0; I < ToCheck.size(); I++) {

      MemoryAccess *Current = ToCheck[I];

      if (Deleted.contains(Current))

        continue;


      std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(

          KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,

          WalkerStepLimit, IsMemTerm, PartialLimit);


      if (!MaybeDeadAccess) {

        LLVM_DEBUG(dbgs() << "  finished walk\n");

        continue;

      }


      MemoryAccess *DeadAccess = *MaybeDeadAccess;

      LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);

      if (isa<MemoryPhi>(DeadAccess)) {

        LLVM_DEBUG(dbgs() << "\n  ... adding incoming values to worklist\n");

        for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {

          MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);

          BasicBlock *IncomingBlock = IncomingAccess->getBlock();

          BasicBlock *PhiBlock = DeadAccess->getBlock();


          // We only consider incoming MemoryAccesses that come before the

          // MemoryPhi. Otherwise we could discover candidates that do not

          // strictly dominate our starting def.

          if (State.PostOrderNumbers[IncomingBlock] >

              State.PostOrderNumbers[PhiBlock])

            ToCheck.insert(IncomingAccess);

        }

        continue;

      }

      auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);

      Instruction *DeadI = DeadDefAccess->getMemoryInst();

      LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");

      ToCheck.insert(DeadDefAccess->getDefiningAccess());

      NumGetDomMemoryDefPassed++;


      if (!DebugCounter::shouldExecute(MemorySSACounter))

        continue;


      MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);


      if (IsMemTerm) {

        const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);

        if (KillingUndObj != DeadUndObj)

          continue;

        LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *DeadI

                          << "\n  KILLER: " << *KillingI << '\n');

        State.deleteDeadInstruction(DeadI, &Deleted);

        ++NumFastStores;

        MadeChange = true;

      } else {

        // Check if DeadI overwrites KillingI.

        int64_t KillingOffset = 0;

        int64_t DeadOffset = 0;

        OverwriteResult OR = State.isOverwrite(

            KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);

        if (OR == OW_MaybePartial) {

          auto Iter = State.IOLs.insert(

              std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(

                  DeadI->getParent(), InstOverlapIntervalsTy()));

          auto &IOL = Iter.first->second;

          OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,

                                  DeadOffset, DeadI, IOL);

        }


        if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {

          auto *DeadSI = dyn_cast<StoreInst>(DeadI);

          auto *KillingSI = dyn_cast<StoreInst>(KillingI);

          // We are re-using tryToMergePartialOverlappingStores, which requires

          // DeadSI to dominate KillingSI.

          // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.

          if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {

            if (Constant *Merged = tryToMergePartialOverlappingStores(

                    KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,

                    State.BatchAA, &DT)) {


              // Update stored value of earlier store to merged constant.

              DeadSI->setOperand(0, Merged);

              ++NumModifiedStores;

              MadeChange = true;


              Shortend = true;

              // Remove killing store and remove any outstanding overlap

              // intervals for the updated store.

              State.deleteDeadInstruction(KillingSI, &Deleted);

              auto I = State.IOLs.find(DeadSI->getParent());

              if (I != State.IOLs.end())

                I->second.erase(DeadSI);

              break;

            }

          }

        }


        if (OR == OW_Complete) {

          LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *DeadI

                            << "\n  KILLER: " << *KillingI << '\n');

          State.deleteDeadInstruction(DeadI, &Deleted);

          ++NumFastStores;

          MadeChange = true;

        }

      }

    }


    assert(State.SkipStores.size() - OrigNumSkipStores == Deleted.size() &&

           "SkipStores and Deleted out of sync?");


    // Check if the store is a no-op.

    if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {

      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: " << *KillingI

                        << '\n');

      State.deleteDeadInstruction(KillingI);

      NumRedundantStores++;

      MadeChange = true;

      continue;

    }


    // Can we form a calloc from a memset/malloc pair?

    if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {

      LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"

                        << "  DEAD: " << *KillingI << '\n');

      State.deleteDeadInstruction(KillingI);

      MadeChange = true;

      continue;

    }

  }


  if (EnablePartialOverwriteTracking)

    for (auto &KV : State.IOLs)

      MadeChange |= State.removePartiallyOverlappedStores(KV.second);


  MadeChange |= State.eliminateRedundantStoresOfExistingValues();

  MadeChange |= State.eliminateDeadWritesAtEndOfFunction();


  while (!State.ToRemove.empty()) {

    Instruction *DeadInst = State.ToRemove.pop_back_val();

    DeadInst->eraseFromParent();

  }


  return MadeChange;

}

} // end anonymous namespace


//===----------------------------------------------------------------------===//

// DSE Pass

//===----------------------------------------------------------------------===//

PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {

  AliasAnalysis &AA = AM.getResult<AAManager>(F);

  const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);

  DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);

  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();

  PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);

  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);


  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);


#ifdef LLVM_ENABLE_STATS

  if (AreStatisticsEnabled())

    for (auto &I : instructions(F))

      NumRemainingStores += isa<StoreInst>(&I);

#endif


  if (!Changed)

    return PreservedAnalyses::all();


  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  PA.preserve<MemorySSAAnalysis>();

  PA.preserve<LoopAnalysis>();

  return PA;

}

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:74

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:541

AliasAnalysis.h

Argument.h

AssumeBundleBuilder.h

instructions
Expand Atomic instructions
Definition: AtomicExpandPass.cpp:158

getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:849

BasicBlock.h

BuildLibCalls.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

CaptureTracking.h

Casting.h

CommandLine.h

Constant.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

DataLayout.h

shortenAssignment
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
Definition: DeadStoreElimination.cpp:484

isShortenableAtTheEnd
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:175

EnablePartialStoreMerging
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))

tryToShortenBegin
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:683

OverlapIntervalsTy
std::map< int64_t, int64_t > OverlapIntervalsTy
Definition: DeadStoreElimination.cpp:170

isShortenableAtTheBeginning
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:200

MemorySSADefsPerBlockLimit
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))

tryToMergePartialOverlappingStores
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
Definition: DeadStoreElimination.cpp:713

memoryIsNotModifiedBetween
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
Definition: DeadStoreElimination.cpp:397

isMaskedStoreOverwrite
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
Definition: DeadStoreElimination.cpp:236

MemorySSAOtherBBStepCost
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))

tryToShorten
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
Definition: DeadStoreElimination.cpp:561

MemorySSAScanLimit
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))

MemorySSASameBBStepCost
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))

EnablePartialOverwriteTracking
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))

isPartialOverwrite
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
Definition: DeadStoreElimination.cpp:279

MemorySSAPartialStoreLimit
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))

getPointerSize
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
Definition: DeadStoreElimination.cpp:206

tryToShortenEnd
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:656

MemorySSAUpwardsStepLimit
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))

OptimizeMemorySSA
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))

MemorySSAPathCheckLimit
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))

DeadStoreElimination.h

DebugCounter.h
This file provides an implementation of debug counters.

DEBUG_COUNTER
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

DenseMap.h
This file defines the DenseMap class.

Dominators.h

Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

Function.h

GlobalsModRef.h
This is the interface for a simple mod/ref and alias analysis over globals.

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171

IRBuilder.h

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

Instruction.h

InstIterator.h

InstrTypes.h

Instructions.h

IntrinsicInst.h

LoopDeletionResult::Deleted
@ Deleted

deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition: LoopIdiomRecognize.cpp:274

LoopInfo.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MapVector.h
This file implements a map that provides insertion order iteration.

MallocFamily::Malloc
@ Malloc

MemoryBuiltins.h

MemoryLocation.h
This file provides utility analysis objects describing memory locations.

MemorySSAUpdater.h

MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

Module.h
Module.h This file contains the declarations for the Module class.

MustExecute.h
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...

PassManager.h
This header defines various interfaces for pass management in LLVM.

PatternMatch.h

PostDominators.h

PostOrderIterator.h
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SetVector.h
This file implements a set that has insertion order iteration characteristics.

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167

StringRef.h

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:76

TargetLibraryInfo.h

Local.h

ValueTracking.h

Value.h

NewExpr
Definition: ItaniumDemangle.h:2075

VectorType
Definition: ItaniumDemangle.h:1149

llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:912

llvm::AAResults
Definition: AliasAnalysis.h:307

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:76

llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981

llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439

llvm::AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:81

llvm::AliasResult::NoAlias
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:99

llvm::AliasResult::PartialAlias
@ PartialAlias
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:104

llvm::AliasResult::MustAlias
@ MustAlias
The two locations precisely alias each other.
Definition: AliasAnalysis.h:106

llvm::AliasResult::getOffset
constexpr int32_t getOffset() const
Definition: AliasAnalysis.h:127

llvm::AliasResult::hasOffset
constexpr bool hasOffset() const
Definition: AliasAnalysis.h:126

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::BasicBlockEdge
Definition: Dominators.h:94

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:60

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221

llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:623

llvm::BatchAAResults::alias
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:632

llvm::BatchAAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:655

llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Definition: AliasAnalysis.h:642

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1565

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41

llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90

llvm::DIAssignID
Assignment ID.
Definition: DebugInfoMetadata.h:308

llvm::DIAssignID::getDistinct
static DIAssignID * getDistinct(LLVMContext &Context)
Definition: DebugInfoMetadata.h:330

llvm::DIExpression::createFragmentExpression
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
Definition: DebugInfoMetadata.cpp:1924

llvm::DSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: DeadStoreElimination.cpp:2332

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110

llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155

llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:329

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220

llvm::DenseMap
Definition: DenseMap.h:742

llvm::DomTreeNodeBase< BasicBlock >

llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:90

llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:89

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279

llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:484

llvm::DominatorTreeBase::roots
iterator_range< root_iterator > roots()
Definition: GenericDomTree.h:320

llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:367

llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:408

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321

llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122

llvm::EarliestEscapeInfo
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
Definition: AliasAnalysis.h:177

llvm::EarliestEscapeInfo::removeInstruction
void removeInstruction(Instruction *I)
Definition: BasicAliasAnalysis.cpp:251

llvm::Function
Definition: Function.h:62

llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:783

llvm::GetElementPtrInst::CreateInBounds
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Create an "inbounds" getelementptr.
Definition: Instructions.h:1035

llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1359

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666

llvm::Instruction
Definition: Instruction.h:49

llvm::Instruction::mayThrow
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
Definition: Instruction.cpp:1105

llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
Definition: Instruction.cpp:958

llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:82

llvm::Instruction::isAtomic
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:978

llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:152

llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:104

llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
Definition: Instruction.cpp:938

llvm::Instruction::isIdenticalTo
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
Definition: Instruction.cpp:864

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451

llvm::IntervalMap
Definition: IntervalMap.h:936

llvm::IntervalMap::begin
const_iterator begin() const
Definition: IntervalMap.h:1146

llvm::IntervalMap::empty
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1101

llvm::IntervalMap::end
const_iterator end() const
Definition: IntervalMap.h:1158

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LocationSize
Definition: MemoryLocation.h:69

llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:109

llvm::LocationSize::isScalable
bool isScalable() const
Definition: MemoryLocation.h:169

llvm::LocationSize::getValue
TypeSize getValue() const
Definition: MemoryLocation.h:171

llvm::LocationSize::isPrecise
bool isPrecise() const
Definition: MemoryLocation.h:180

llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566

llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: GenericLoopInfo.h:602

llvm::LoopInfo
Definition: LoopInfo.h:407

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36

llvm::MapVector::end
iterator end()
Definition: MapVector.h:71

llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:167

llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:929

llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:1045

llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:1185

llvm::MemoryAccess
Definition: MemorySSA.h:145

llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:164

llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372

llvm::MemoryDef::setOptimized
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392

llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:228

llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:36

llvm::MemoryLocation::Size
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
Definition: MemoryLocation.h:245

llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:288

llvm::MemoryLocation::getWithNewPtr
MemoryLocation getWithNewPtr(const Value *NewPtr) const
Definition: MemoryLocation.h:306

llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:236

llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:108

llvm::MemoryLocation::getOrNone
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:78

llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923

llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54

llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1040

llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700

llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1562

llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2113

llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1549

llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717

llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737

llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262

llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259

llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293

llvm::PHITransAddr
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35

llvm::PHITransAddr::translateValue
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Definition: PHITransAddr.cpp:298

llvm::PHITransAddr::isPotentiallyPHITranslatable
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Definition: PHITransAddr.cpp:103

llvm::PHITransAddr::needsPHITranslationFromBlock
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:62

llvm::PHITransAddr::getAddr
Value * getAddr() const
Definition: PHITransAddr.h:58

llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827

llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:48

llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28

llvm::PostDominatorTree::dominates
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
Definition: PostDominators.cpp:54

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115

llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144

llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:93

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342

llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:380

llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427

llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:94

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:686

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:317

llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition: Instructions.h:414

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:599

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:281

llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:393

llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:353

llvm::TypeSize
Definition: TypeSize.h:319

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::User::operands
op_range operands()
Definition: User.h:242

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074

llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376

llvm::cl::opt
Definition: CommandLine.h:1430

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:109

uint32_t

uint64_t

DebugInfo.h

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off

llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:681

llvm::M68k::MemAddrModeKind::U
@ U

llvm::M68k::MemAddrModeKind::V
@ V

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49

llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:771

llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:830

llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168

llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:245

llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1540

llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1754

llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:2024

llvm::PatternMatch::m_c_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate, true > m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
Definition: PatternMatch.h:2527

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92

llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:567

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:314

llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:313

llvm::SIEncodingFamily::SI
@ SI
Definition: SIDefines.h:36

llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33

llvm::at::getAssignmentMarkers
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1898

llvm::at::getDVRAssignmentMarkers
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238

llvm::at::calculateFragmentIntersect
bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DbgAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
Definition: DebugInfo.cpp:2121

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450

llvm::omp::RTLDependInfoFields::Len
@ Len

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384

llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329

llvm::for_each
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715

llvm::getInitialValueOfAllocation
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
Definition: MemoryBuiltins.cpp:440

llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:125

llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145

llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:69

llvm::salvageDebugInfo
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1650

llvm::emitCalloc
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Definition: BuildLibCalls.cpp:1923

llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:607

llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Definition: ValueTracking.cpp:6293

llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:193

llvm::isNoAliasCall
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
Definition: AliasAnalysis.cpp:839

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729

llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399

llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:592

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419

llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48

llvm::PointerMayBeCaptured
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
Definition: CaptureTracking.cpp:204

llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2043

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::AreStatisticsEnabled
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139

llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:894

llvm::offsetToAlignment
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197

llvm::salvageKnowledge
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:292

llvm::getFreedOperand
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Definition: MemoryBuiltins.cpp:558

llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:5714

llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition: MachineBasicBlock.h:1307

llvm::mayContainIrreducibleControl
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
Definition: MustExecute.cpp:384

llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:851

llvm::isStrongerThan
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
Definition: AtomicOrdering.h:91

llvm::isRefSet
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51

raw_ostream.h

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85

llvm::DIExpression::FragmentInfo
Holds the characteristics of one fragment of a larger variable.
Definition: DebugInfoMetadata.h:2883

llvm::ObjectSizeOpts
Various options to control the behavior of getObjectSize.
Definition: MemoryBuiltins.h:139

llvm::ObjectSizeOpts::NullIsUnknownSize
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
Definition: MemoryBuiltins.h:163

llvm::cl::desc
Definition: CommandLine.h:416