LLVM 23.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
36#include "llvm/ADT/SetVector.h"
39#include "llvm/ADT/Statistic.h"
40#include "llvm/ADT/StringRef.h"
46#include "llvm/Analysis/Loads.h"
55#include "llvm/IR/Argument.h"
57#include "llvm/IR/BasicBlock.h"
58#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugInfo.h"
63#include "llvm/IR/Dominators.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/InstrTypes.h"
68#include "llvm/IR/Instruction.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/PassManager.h"
74#include "llvm/IR/Value.h"
78#include "llvm/Support/Debug.h"
86#include <algorithm>
87#include <cassert>
88#include <cstdint>
89#include <map>
90#include <optional>
91#include <utility>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "dse"
97
98STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
99STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
100STATISTIC(NumFastStores, "Number of stores deleted");
101STATISTIC(NumFastOther, "Number of other instrs removed");
102STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
103STATISTIC(NumModifiedStores, "Number of stores modified");
104STATISTIC(NumCFGChecks, "Number of stores modified");
105STATISTIC(NumCFGTries, "Number of stores modified");
106STATISTIC(NumCFGSuccess, "Number of stores modified");
107STATISTIC(NumGetDomMemoryDefPassed,
108 "Number of times a valid candidate is returned from getDomMemoryDef");
109STATISTIC(NumDomMemDefChecks,
110 "Number iterations check for reads in getDomMemoryDef");
111
112DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
113 "Controls which MemoryDefs are eliminated.");
114
115static cl::opt<bool>
116EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
117 cl::init(true), cl::Hidden,
118 cl::desc("Enable partial-overwrite tracking in DSE"));
119
120static cl::opt<bool>
121EnablePartialStoreMerging("enable-dse-partial-store-merging",
122 cl::init(true), cl::Hidden,
123 cl::desc("Enable partial store merging in DSE"));
124
126 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
127 cl::desc("The number of memory instructions to scan for "
128 "dead store elimination (default = 150)"));
130 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
131 cl::desc("The maximum number of steps while walking upwards to find "
132 "MemoryDefs that may be killed (default = 90)"));
133
135 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
136 cl::desc("The maximum number candidates that only partially overwrite the "
137 "killing MemoryDef to consider"
138 " (default = 5)"));
139
141 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
142 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
143 "other stores per basic block (default = 5000)"));
144
146 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
147 cl::desc(
148 "The cost of a step in the same basic block as the killing MemoryDef"
149 "(default = 1)"));
150
152 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
154 cl::desc("The cost of a step in a different basic "
155 "block than the killing MemoryDef"
156 "(default = 5)"));
157
159 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
160 cl::desc("The maximum number of blocks to check when trying to prove that "
161 "all paths to an exit go through a killing block (default = 50)"));
162
163// This flags allows or disallows DSE to optimize MemorySSA during its
164// traversal. Note that DSE optimizing MemorySSA may impact other passes
165// downstream of the DSE invocation and can lead to issues not being
166// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
167// those cases, the flag can be used to check if DSE's MemorySSA optimizations
168// impact follow-up passes.
169static cl::opt<bool>
170 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
171 cl::desc("Allow DSE to optimize memory accesses."));
172
173// TODO: remove this flag.
175 "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
176 cl::desc("Enable the initializes attr improvement in DSE"));
177
179 "dse-max-dom-cond-depth", cl::init(1024), cl::Hidden,
180 cl::desc("Max dominator tree recursion depth for eliminating redundant "
181 "stores via dominating conditions"));
182
183//===----------------------------------------------------------------------===//
184// Helper functions
185//===----------------------------------------------------------------------===//
186using OverlapIntervalsTy = std::map<int64_t, int64_t>;
188
189/// Returns true if the end of this instruction can be safely shortened in
190/// length.
192 // Don't shorten stores for now
193 if (isa<StoreInst>(I))
194 return false;
195
197 switch (II->getIntrinsicID()) {
198 default: return false;
199 case Intrinsic::memset:
200 case Intrinsic::memcpy:
201 case Intrinsic::memcpy_element_unordered_atomic:
202 case Intrinsic::memset_element_unordered_atomic:
203 // Do shorten memory intrinsics.
204 // FIXME: Add memmove if it's also safe to transform.
205 return true;
206 }
207 }
208
209 // Don't shorten libcalls calls for now.
210
211 return false;
212}
213
214/// Returns true if the beginning of this instruction can be safely shortened
215/// in length.
217 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
218 // easily done by offsetting the source address.
219 return isa<AnyMemSetInst>(I);
220}
221
222static std::optional<TypeSize> getPointerSize(const Value *V,
223 const DataLayout &DL,
224 const TargetLibraryInfo &TLI,
225 const Function *F) {
227 ObjectSizeOpts Opts;
229
230 if (getObjectSize(V, Size, DL, &TLI, Opts))
231 return TypeSize::getFixed(Size);
232 return std::nullopt;
233}
234
235namespace {
236
237enum OverwriteResult {
238 OW_Begin,
239 OW_Complete,
240 OW_End,
241 OW_PartialEarlierWithFullLater,
242 OW_MaybePartial,
243 OW_None,
244 OW_Unknown
245};
246
247} // end anonymous namespace
248
249/// Check if two instruction are masked stores that completely
250/// overwrite one another. More specifically, \p KillingI has to
251/// overwrite \p DeadI.
252static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
253 const Instruction *DeadI,
255 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
256 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
257 if (KillingII == nullptr || DeadII == nullptr)
258 return OW_Unknown;
259 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
260 return OW_Unknown;
261
262 switch (KillingII->getIntrinsicID()) {
263 case Intrinsic::masked_store:
264 case Intrinsic::vp_store: {
265 const DataLayout &DL = KillingII->getDataLayout();
266 auto *KillingTy = KillingII->getArgOperand(0)->getType();
267 auto *DeadTy = DeadII->getArgOperand(0)->getType();
268 if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
269 return OW_Unknown;
270 // Element count.
271 if (cast<VectorType>(KillingTy)->getElementCount() !=
272 cast<VectorType>(DeadTy)->getElementCount())
273 return OW_Unknown;
274 // Pointers.
275 Value *KillingPtr = KillingII->getArgOperand(1);
276 Value *DeadPtr = DeadII->getArgOperand(1);
277 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
278 return OW_Unknown;
279 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
280 // Masks.
281 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
282 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
283 return OW_Unknown;
284 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
285 // Masks.
286 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
287 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
288 return OW_Unknown;
289 // Lengths.
290 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
291 return OW_Unknown;
292 }
293 return OW_Complete;
294 }
295 default:
296 return OW_Unknown;
297 }
298}
299
300/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
301/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
302/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
303/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
304/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
305/// overwritten by a killing (smaller) store which doesn't write outside the big
306/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
307/// NOTE: This function must only be called if both \p KillingLoc and \p
308/// DeadLoc belong to the same underlying object with valid \p KillingOff and
309/// \p DeadOff.
310static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
311 const MemoryLocation &DeadLoc,
312 int64_t KillingOff, int64_t DeadOff,
313 Instruction *DeadI,
315 const uint64_t KillingSize = KillingLoc.Size.getValue();
316 const uint64_t DeadSize = DeadLoc.Size.getValue();
317 // We may now overlap, although the overlap is not complete. There might also
318 // be other incomplete overlaps, and together, they might cover the complete
319 // dead store.
320 // Note: The correctness of this logic depends on the fact that this function
321 // is not even called providing DepWrite when there are any intervening reads.
323 KillingOff < int64_t(DeadOff + DeadSize) &&
324 int64_t(KillingOff + KillingSize) >= DeadOff) {
325
326 // Insert our part of the overlap into the map.
327 auto &IM = IOL[DeadI];
328 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
329 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
330 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
331 << ")\n");
332
333 // Make sure that we only insert non-overlapping intervals and combine
334 // adjacent intervals. The intervals are stored in the map with the ending
335 // offset as the key (in the half-open sense) and the starting offset as
336 // the value.
337 int64_t KillingIntStart = KillingOff;
338 int64_t KillingIntEnd = KillingOff + KillingSize;
339
340 // Find any intervals ending at, or after, KillingIntStart which start
341 // before KillingIntEnd.
342 auto ILI = IM.lower_bound(KillingIntStart);
343 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
344 // This existing interval is overlapped with the current store somewhere
345 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
346 // intervals and adjusting our start and end.
347 KillingIntStart = std::min(KillingIntStart, ILI->second);
348 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
349 ILI = IM.erase(ILI);
350
351 // Continue erasing and adjusting our end in case other previous
352 // intervals are also overlapped with the current store.
353 //
354 // |--- dead 1 ---| |--- dead 2 ---|
355 // |------- killing---------|
356 //
357 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
358 assert(ILI->second > KillingIntStart && "Unexpected interval");
359 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
360 ILI = IM.erase(ILI);
361 }
362 }
363
364 IM[KillingIntEnd] = KillingIntStart;
365
366 ILI = IM.begin();
367 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
368 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
369 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
370 << ") Composite KillingLoc [" << ILI->second << ", "
371 << ILI->first << ")\n");
372 ++NumCompletePartials;
373 return OW_Complete;
374 }
375 }
376
377 // Check for a dead store which writes to all the memory locations that
378 // the killing store writes to.
379 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
380 int64_t(DeadOff + DeadSize) > KillingOff &&
381 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
382 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
383 << ", " << int64_t(DeadOff + DeadSize)
384 << ") by a killing store [" << KillingOff << ", "
385 << int64_t(KillingOff + KillingSize) << ")\n");
386 // TODO: Maybe come up with a better name?
387 return OW_PartialEarlierWithFullLater;
388 }
389
390 // Another interesting case is if the killing store overwrites the end of the
391 // dead store.
392 //
393 // |--dead--|
394 // |-- killing --|
395 //
396 // In this case we may want to trim the size of dead store to avoid
397 // generating stores to addresses which will definitely be overwritten killing
398 // store.
400 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
401 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
402 return OW_End;
403
404 // Finally, we also need to check if the killing store overwrites the
405 // beginning of the dead store.
406 //
407 // |--dead--|
408 // |-- killing --|
409 //
410 // In this case we may want to move the destination address and trim the size
411 // of dead store to avoid generating stores to addresses which will definitely
412 // be overwritten killing store.
414 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
415 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
416 "Expect to be handled as OW_Complete");
417 return OW_Begin;
418 }
419 // Otherwise, they don't completely overlap.
420 return OW_Unknown;
421}
422
423/// Returns true if the memory which is accessed by the second instruction is not
424/// modified between the first and the second instruction.
425/// Precondition: Second instruction must be dominated by the first
426/// instruction.
427static bool
430 DominatorTree *DT) {
431 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
432 // instructions which can modify the memory location accessed by SecondI.
433 //
434 // While doing the walk keep track of the address to check. It might be
435 // different in different basic blocks due to PHI translation.
436 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
438 // Keep track of the address we visited each block with. Bail out if we
439 // visit a block with different addresses.
441
442 BasicBlock::iterator FirstBBI(FirstI);
443 ++FirstBBI;
444 BasicBlock::iterator SecondBBI(SecondI);
445 BasicBlock *FirstBB = FirstI->getParent();
446 BasicBlock *SecondBB = SecondI->getParent();
447 MemoryLocation MemLoc;
448 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
449 MemLoc = MemoryLocation::getForDest(MemSet);
450 else
451 MemLoc = MemoryLocation::get(SecondI);
452
453 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
454
455 // Start checking the SecondBB.
456 WorkList.push_back(
457 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
458 bool isFirstBlock = true;
459
460 // Check all blocks going backward until we reach the FirstBB.
461 while (!WorkList.empty()) {
462 BlockAddressPair Current = WorkList.pop_back_val();
463 BasicBlock *B = Current.first;
464 PHITransAddr &Addr = Current.second;
465 Value *Ptr = Addr.getAddr();
466
467 // Ignore instructions before FirstI if this is the FirstBB.
468 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
469
471 if (isFirstBlock) {
472 // Ignore instructions after SecondI if this is the first visit of SecondBB.
473 assert(B == SecondBB && "first block is not the store block");
474 EI = SecondBBI;
475 isFirstBlock = false;
476 } else {
477 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
478 // In this case we also have to look at instructions after SecondI.
479 EI = B->end();
480 }
481 for (; BI != EI; ++BI) {
482 Instruction *I = &*BI;
483 if (I->mayWriteToMemory() && I != SecondI)
484 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
485 return false;
486 }
487 if (B != FirstBB) {
488 assert(B != &FirstBB->getParent()->getEntryBlock() &&
489 "Should not hit the entry block because SI must be dominated by LI");
490 for (BasicBlock *Pred : predecessors(B)) {
491 PHITransAddr PredAddr = Addr;
492 if (PredAddr.needsPHITranslationFromBlock(B)) {
493 if (!PredAddr.isPotentiallyPHITranslatable())
494 return false;
495 if (!PredAddr.translateValue(B, Pred, DT, false))
496 return false;
497 }
498 Value *TranslatedPtr = PredAddr.getAddr();
499 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
500 if (!Inserted.second) {
501 // We already visited this block before. If it was with a different
502 // address - bail out!
503 if (TranslatedPtr != Inserted.first->second)
504 return false;
505 // ... otherwise just skip it.
506 continue;
507 }
508 WorkList.push_back(std::make_pair(Pred, PredAddr));
509 }
510 }
511 }
512 return true;
513}
514
515static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
516 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
517 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
518 const DataLayout &DL = Inst->getDataLayout();
519 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
520 uint64_t DeadSliceOffsetInBits =
521 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
522 auto SetDeadFragExpr = [](auto *Assign,
523 DIExpression::FragmentInfo DeadFragment) {
524 // createFragmentExpression expects an offset relative to the existing
525 // fragment offset if there is one.
526 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
527 Assign->getExpression()
528 ->getFragmentInfo()
529 .value_or(DIExpression::FragmentInfo(0, 0))
530 .OffsetInBits;
532 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
533 Assign->setExpression(*NewExpr);
534 return;
535 }
536 // Failed to create a fragment expression for this so discard the value,
537 // making this a kill location.
539 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
540 DeadFragment.SizeInBits);
541 Assign->setExpression(Expr);
542 Assign->setKillLocation();
543 };
544
545 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
546 // link to any instructions. Created in the loop below (once).
547 DIAssignID *LinkToNothing = nullptr;
548 LLVMContext &Ctx = Inst->getContext();
549 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
550 if (!LinkToNothing)
551 LinkToNothing = DIAssignID::getDistinct(Ctx);
552 return LinkToNothing;
553 };
554
555 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
556 // overlapping dbg.assign intrinsic.
557 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
558 std::optional<DIExpression::FragmentInfo> NewFragment;
559 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
560 DeadSliceSizeInBits, Assign,
561 NewFragment) ||
562 !NewFragment) {
563 // We couldn't calculate the intersecting fragment for some reason. Be
564 // cautious and unlink the whole assignment from the store.
565 Assign->setKillAddress();
566 Assign->setAssignId(GetDeadLink());
567 continue;
568 }
569 // No intersect.
570 if (NewFragment->SizeInBits == 0)
571 continue;
572
573 // Fragments overlap: insert a new dbg.assign for this dead part.
574 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
575 NewAssign->insertAfter(Assign->getIterator());
576 NewAssign->setAssignId(GetDeadLink());
577 if (NewFragment)
578 SetDeadFragExpr(NewAssign, *NewFragment);
579 NewAssign->setKillAddress();
580 }
581}
582
583/// Update the attributes given that a memory access is updated (the
584/// dereferenced pointer could be moved forward when shortening a
585/// mem intrinsic).
586static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
587 uint64_t PtrOffset) {
588 // Remember old attributes.
589 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
590
591 // Find attributes that should be kept, and remove the rest.
592 AttributeMask AttrsToRemove;
593 for (auto &Attr : OldAttrs) {
594 if (Attr.hasKindAsEnum()) {
595 switch (Attr.getKindAsEnum()) {
596 default:
597 break;
598 case Attribute::Alignment:
599 // Only keep alignment if PtrOffset satisfy the alignment.
600 if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
601 continue;
602 break;
603 case Attribute::Dereferenceable:
604 case Attribute::DereferenceableOrNull:
605 // We could reduce the size of these attributes according to
606 // PtrOffset. But we simply drop these for now.
607 break;
608 case Attribute::NonNull:
609 case Attribute::NoUndef:
610 continue;
611 }
612 }
613 AttrsToRemove.addAttribute(Attr);
614 }
615
616 // Remove the attributes that should be dropped.
617 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
618}
619
620static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
621 uint64_t &DeadSize, int64_t KillingStart,
622 uint64_t KillingSize, bool IsOverwriteEnd) {
623 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
624 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
625
626 // We assume that memet/memcpy operates in chunks of the "largest" native
627 // type size and aligned on the same value. That means optimal start and size
628 // of memset/memcpy should be modulo of preferred alignment of that type. That
629 // is it there is no any sense in trying to reduce store size any further
630 // since any "extra" stores comes for free anyway.
631 // On the other hand, maximum alignment we can achieve is limited by alignment
632 // of initial store.
633
634 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
635 // "largest" native type.
636 // Note: What is the proper way to get that value?
637 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
638 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
639
640 int64_t ToRemoveStart = 0;
641 uint64_t ToRemoveSize = 0;
642 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
643 // maintained on the remaining store.
644 if (IsOverwriteEnd) {
645 // Calculate required adjustment for 'KillingStart' in order to keep
646 // remaining store size aligned on 'PerfAlign'.
647 uint64_t Off =
648 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
649 ToRemoveStart = KillingStart + Off;
650 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
651 return false;
652 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
653 } else {
654 ToRemoveStart = DeadStart;
655 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
656 "Not overlapping accesses?");
657 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
658 // Calculate required adjustment for 'ToRemoveSize'in order to keep
659 // start of the remaining store aligned on 'PerfAlign'.
660 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
661 if (Off != 0) {
662 if (ToRemoveSize <= (PrefAlign.value() - Off))
663 return false;
664 ToRemoveSize -= PrefAlign.value() - Off;
665 }
666 assert(isAligned(PrefAlign, ToRemoveSize) &&
667 "Should preserve selected alignment");
668 }
669
670 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
671 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
672
673 uint64_t NewSize = DeadSize - ToRemoveSize;
674 if (DeadIntrinsic->isAtomic()) {
675 // When shortening an atomic memory intrinsic, the newly shortened
676 // length must remain an integer multiple of the element size.
677 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
678 if (0 != NewSize % ElementSize)
679 return false;
680 }
681
682 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
683 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
684 << "\n KILLER [" << ToRemoveStart << ", "
685 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
686
687 DeadIntrinsic->setLength(NewSize);
688 DeadIntrinsic->setDestAlignment(PrefAlign);
689
690 Value *OrigDest = DeadIntrinsic->getRawDest();
691 if (!IsOverwriteEnd) {
692 Value *Indices[1] = {
693 ConstantInt::get(DeadIntrinsic->getLength()->getType(), ToRemoveSize)};
695 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
696 DeadI->getIterator());
697 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
698 DeadIntrinsic->setDest(NewDestGEP);
699 adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
700 }
701
702 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
703 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
704 IsOverwriteEnd);
705
706 // Finally update start and size of dead access.
707 if (!IsOverwriteEnd)
708 DeadStart += ToRemoveSize;
709 DeadSize = NewSize;
710
711 return true;
712}
713
715 int64_t &DeadStart, uint64_t &DeadSize) {
716 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
717 return false;
718
719 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
720 int64_t KillingStart = OII->second;
721 uint64_t KillingSize = OII->first - KillingStart;
722
723 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
724
725 if (KillingStart > DeadStart &&
726 // Note: "KillingStart - KillingStart" is known to be positive due to
727 // preceding check.
728 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
729 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
730 // be non negative due to preceding checks.
731 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
732 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
733 true)) {
734 IntervalMap.erase(OII);
735 return true;
736 }
737 }
738 return false;
739}
740
743 int64_t &DeadStart, uint64_t &DeadSize) {
745 return false;
746
747 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
748 int64_t KillingStart = OII->second;
749 uint64_t KillingSize = OII->first - KillingStart;
750
751 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
752
753 if (KillingStart <= DeadStart &&
754 // Note: "DeadStart - KillingStart" is known to be non negative due to
755 // preceding check.
756 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
757 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
758 // be positive due to preceding checks.
759 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
760 "Should have been handled as OW_Complete");
761 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
762 false)) {
763 IntervalMap.erase(OII);
764 return true;
765 }
766 }
767 return false;
768}
769
770static Constant *
772 int64_t KillingOffset, int64_t DeadOffset,
774 DominatorTree *DT) {
775
776 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
777 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
778 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
779 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
780 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
781 // If the store we find is:
782 // a) partially overwritten by the store to 'Loc'
783 // b) the killing store is fully contained in the dead one and
784 // c) they both have a constant value
785 // d) none of the two stores need padding
786 // Merge the two stores, replacing the dead store's value with a
787 // merge of both values.
788 // TODO: Deal with other constant types (vectors, etc), and probably
789 // some mem intrinsics (if needed)
790
791 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
792 APInt KillingValue =
793 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
794 unsigned KillingBits = KillingValue.getBitWidth();
795 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
796 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
797
798 // Offset of the smaller store inside the larger store
799 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
800 unsigned LShiftAmount =
801 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
802 : BitOffsetDiff;
803 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
804 LShiftAmount + KillingBits);
805 // Clear the bits we'll be replacing, then OR with the smaller
806 // store, shifted appropriately.
807 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
808 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
809 << "\n Killing: " << *KillingI
810 << "\n Merged Value: " << Merged << '\n');
811 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
812 }
813 return nullptr;
814}
815
816// Returns true if \p I is an intrinsic that does not read or write memory.
819 switch (II->getIntrinsicID()) {
820 case Intrinsic::lifetime_start:
821 case Intrinsic::lifetime_end:
822 case Intrinsic::invariant_end:
823 case Intrinsic::launder_invariant_group:
824 case Intrinsic::assume:
825 return true;
826 case Intrinsic::dbg_declare:
827 case Intrinsic::dbg_label:
828 case Intrinsic::dbg_value:
829 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
830 default:
831 return false;
832 }
833 }
834 return false;
835}
836
837// Check if we can ignore \p D for DSE.
838static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
839 Instruction *DI = D->getMemoryInst();
840 // Calls that only access inaccessible memory cannot read or write any memory
841 // locations we consider for elimination.
842 if (auto *CB = dyn_cast<CallBase>(DI))
843 if (CB->onlyAccessesInaccessibleMemory())
844 return true;
845
846 // We can eliminate stores to locations not visible to the caller across
847 // throwing instructions.
848 if (DI->mayThrow() && !DefVisibleToCaller)
849 return true;
850
851 // We can remove the dead stores, irrespective of the fence and its ordering
852 // (release/acquire/seq_cst). Fences only constraints the ordering of
853 // already visible stores, it does not make a store visible to other
854 // threads. So, skipping over a fence does not change a store from being
855 // dead.
856 if (isa<FenceInst>(DI))
857 return true;
858
859 // Skip intrinsics that do not really read or modify memory.
860 if (isNoopIntrinsic(DI))
861 return true;
862
863 return false;
864}
865
866namespace {
867
868// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
869// defined by `MemDef`.
870struct MemoryLocationWrapper {
871 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
872 bool DefByInitializesAttr)
873 : MemLoc(MemLoc), MemDef(MemDef),
874 DefByInitializesAttr(DefByInitializesAttr) {
875 assert(MemLoc.Ptr && "MemLoc should be not null");
876 UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
877 DefInst = MemDef->getMemoryInst();
878 }
879
880 MemoryLocation MemLoc;
881 const Value *UnderlyingObject;
882 MemoryDef *MemDef;
883 Instruction *DefInst;
884 bool DefByInitializesAttr = false;
885};
886
887// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
888// defined by this MemoryDef.
889struct MemoryDefWrapper {
890 MemoryDefWrapper(MemoryDef *MemDef,
891 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
892 DefInst = MemDef->getMemoryInst();
893 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
894 DefinedLocations.push_back(
895 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
896 }
897 Instruction *DefInst;
899};
900
901struct ArgumentInitInfo {
902 unsigned Idx;
903 bool IsDeadOrInvisibleOnUnwind;
904 ConstantRangeList Inits;
905};
906} // namespace
907
910 return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
911}
912
913// Return the intersected range list of the initializes attributes of "Args".
914// "Args" are call arguments that alias to each other.
915// If any argument in "Args" doesn't have dead_on_unwind attr and
916// "CallHasNoUnwindAttr" is false, return empty.
919 bool CallHasNoUnwindAttr) {
920 if (Args.empty())
921 return {};
922
923 // To address unwind, the function should have nounwind attribute or the
924 // arguments have dead or invisible on unwind. Otherwise, return empty.
925 for (const auto &Arg : Args) {
926 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
927 return {};
928 if (Arg.Inits.empty())
929 return {};
930 }
931
932 ConstantRangeList IntersectedIntervals = Args.front().Inits;
933 for (auto &Arg : Args.drop_front())
934 IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
935
936 return IntersectedIntervals;
937}
938
939namespace {
940
941struct DSEState {
942 Function &F;
943 AliasAnalysis &AA;
944 EarliestEscapeAnalysis EA;
945
946 /// The single BatchAA instance that is used to cache AA queries. It will
947 /// not be invalidated over the whole run. This is safe, because:
948 /// 1. Only memory writes are removed, so the alias cache for memory
949 /// locations remains valid.
950 /// 2. No new instructions are added (only instructions removed), so cached
951 /// information for a deleted value cannot be accessed by a re-used new
952 /// value pointer.
953 BatchAAResults BatchAA;
954
955 MemorySSA &MSSA;
956 DominatorTree &DT;
957 PostDominatorTree &PDT;
958 const TargetLibraryInfo &TLI;
959 const DataLayout &DL;
960 const CycleInfo &CI;
961
962 // All MemoryDefs that potentially could kill other MemDefs.
964 // Any that should be skipped as they are already deleted
965 SmallPtrSet<MemoryAccess *, 4> SkipStores;
966 // Keep track whether a given object is captured before return or not.
967 DenseMap<const Value *, bool> CapturedBeforeReturn;
968 // Keep track of all of the objects that are invisible to the caller after
969 // the function returns.
970 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
971 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
972 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
973 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
974 // Post-order numbers for each basic block. Used to figure out if memory
975 // accesses are executed before another access.
976 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
977
978 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
979 /// basic block.
980 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
981 // Check if there are root nodes that are terminated by UnreachableInst.
982 // Those roots pessimize post-dominance queries. If there are such roots,
983 // fall back to CFG scan starting from all non-unreachable roots.
984 bool AnyUnreachableExit;
985
986 // Whether or not we should iterate on removing dead stores at the end of the
987 // function due to removing a store causing a previously captured pointer to
988 // no longer be captured.
989 bool ShouldIterateEndOfFunctionDSE;
990
991 /// Dead instructions to be removed at the end of DSE.
993
994 // Class contains self-reference, make sure it's not copied/moved.
995 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
996 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
997 const CycleInfo &CI);
998 DSEState(const DSEState &) = delete;
999 DSEState &operator=(const DSEState &) = delete;
1000
1001 LocationSize strengthenLocationSize(const Instruction *I,
1002 LocationSize Size) const;
1003
1004 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1005 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1006 /// location (by \p DeadI instruction).
1007 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1008 /// \p DeadI, but they both write to the same underlying object. In that
1009 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1010 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1011 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1012 OverwriteResult isOverwrite(const Instruction *KillingI,
1013 const Instruction *DeadI,
1014 const MemoryLocation &KillingLoc,
1015 const MemoryLocation &DeadLoc,
1016 int64_t &KillingOff, int64_t &DeadOff);
1017
1018 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1019 const LocationSize StoreSize);
1020
1021 bool isInvisibleToCallerOnUnwind(const Value *V);
1022
1023 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
1024
1025 // Returns a list of <MemoryLocation, bool> pairs written by I.
1026 // The bool means whether the write is from Initializes attr.
1028 getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
1029
1030 /// Assuming this instruction has a dead analyzable write, can we delete
1031 /// this instruction?
1032 bool isRemovable(Instruction *I);
1033
1034 /// Returns true if \p UseInst completely overwrites \p DefLoc
1035 /// (stored by \p DefInst).
1036 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1037 Instruction *UseInst);
1038
1039 /// Returns true if \p Def is not read before returning from the function.
1040 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
1041
1042 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1043 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1044 /// indicating whether \p I is a free-like call.
1045 std::optional<std::pair<MemoryLocation, bool>>
1046 getLocForTerminator(Instruction *I) const;
1047
1048 /// Returns true if \p I is a memory terminator instruction like
1049 /// llvm.lifetime.end or free.
1050 bool isMemTerminatorInst(Instruction *I) const;
1051
1052 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1053 /// instruction \p AccessI.
1054 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1055 Instruction *MaybeTerm);
1056
1057 // Returns true if \p Use may read from \p DefLoc.
1058 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1059
1060 /// Returns true if a dependency between \p Current and \p KillingDef is
1061 /// guaranteed to be loop invariant for the loops that they are in. Either
1062 /// because they are known to be in the same block, in the same loop level or
1063 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1064 /// during execution of the containing function.
1065 bool isGuaranteedLoopIndependent(const Instruction *Current,
1066 const Instruction *KillingDef,
1067 const MemoryLocation &CurrentLoc);
1068
1069 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1070 /// loop. In particular, this guarantees that it only references a single
1071 /// MemoryLocation during execution of the containing function.
1072 bool isGuaranteedLoopInvariant(const Value *Ptr);
1073
1074 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1075 // with no read access between them or on any other path to a function exit
1076 // block if \p KillingLoc is not accessible after the function returns. If
1077 // there is no such MemoryDef, return std::nullopt. The returned value may not
1078 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1079 // encounter an aliasing MemoryUse (read).
1080 std::optional<MemoryAccess *>
1081 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1082 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1083 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1084 bool IsMemTerm, unsigned &PartialLimit,
1085 bool IsInitializesAttrMemLoc);
1086
1087 /// Delete dead memory defs and recursively add their operands to ToRemove if
1088 /// they became dead.
1089 void
1090 deleteDeadInstruction(Instruction *SI,
1091 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
1092
1093 // Check for any extra throws between \p KillingI and \p DeadI that block
1094 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1095 // MemoryDef that may throw are handled during the walk from one def to the
1096 // next.
1097 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1098 const Value *KillingUndObj);
1099
1100 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1101 // instructions act as barriers:
1102 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1103 // object.
1104 // * Atomic stores stronger that monotonic.
1105 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
1106
1107 /// Eliminate writes to objects that are not visible in the caller and are not
1108 /// accessed before returning from the function.
1109 bool eliminateDeadWritesAtEndOfFunction();
1110
1111 /// If we have a zero initializing memset following a call to malloc,
1112 /// try folding it into a call to calloc.
1113 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
1114
1115 /// \returns true if \p Def is a no-op store, either because it
1116 /// directly stores back a loaded value or stores zero to a calloced object.
1117 bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
1118
1119 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1120
1121 /// Eliminates writes to locations where the value that is being written
1122 /// is already stored at the same location.
1123 bool eliminateRedundantStoresOfExistingValues();
1124
1125 /// If there is a dominating condition that implies the value being stored in
1126 /// a pointer, and such a condition appears in a node that dominates the
1127 /// store, then the store may be redundant if no write occurs in between.
1128 bool eliminateRedundantStoresViaDominatingConditions();
1129
1130 // Return the locations written by the initializes attribute.
1131 // Note that this function considers:
1132 // 1. Unwind edge: use "initializes" attribute only if the callee has
1133 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1134 // or the argument is invisible to caller on unwind. That is, we don't
1135 // perform incorrect DSE on unwind edges in the current function.
1136 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1137 // the intersected range list of their "initializes" attributes.
1138 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
1139
1140 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1141 // killed by `KillingLocWrapper.MemDef`. Return whether
1142 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1143 std::pair<bool, bool>
1144 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1145
1146 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1147 // change state: whether make any change.
1148 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1149};
1150
1151} // end anonymous namespace
1152
1153static void pushMemUses(MemoryAccess *Acc,
1156 for (Use &U : Acc->uses()) {
1157 auto *MA = cast<MemoryAccess>(U.getUser());
1158 if (Visited.insert(MA).second)
1159 WorkList.push_back(MA);
1160 }
1161}
1162
1163// Return true if "Arg" is function local and isn't captured before "CB".
1164static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
1166 const Value *UnderlyingObj = getUnderlyingObject(Arg);
1167 return isIdentifiedFunctionLocal(UnderlyingObj) &&
1169 EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt*/ true));
1170}
1171
1172DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1174 const TargetLibraryInfo &TLI, const CycleInfo &CI)
1175 : F(F), AA(AA), EA(DT, nullptr, &CI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
1176 PDT(PDT), TLI(TLI), DL(F.getDataLayout()), CI(CI) {
1177 // Collect blocks with throwing instructions not modeled in MemorySSA and
1178 // alloc-like objects.
1179 unsigned PO = 0;
1180 for (BasicBlock *BB : post_order(&F)) {
1181 PostOrderNumbers[BB] = PO++;
1182 for (Instruction &I : *BB) {
1183 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1184 if (I.mayThrow() && !MA)
1185 ThrowingBlocks.insert(I.getParent());
1186
1187 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1188 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1189 (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
1191 MemDefs.push_back(MD);
1192 }
1193 }
1194
1195 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1196 // stores to them are dead at the end of the function.
1197 for (Argument &AI : F.args()) {
1198 if (AI.hasPassPointeeByValueCopyAttr()) {
1199 InvisibleToCallerAfterRet.insert({&AI, true});
1200 continue;
1201 }
1202
1203 if (!AI.getType()->isPointerTy())
1204 continue;
1205
1206 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1207 if (Info.coversAllReachableMemory())
1208 InvisibleToCallerAfterRet.insert({&AI, true});
1209 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1210 InvisibleToCallerAfterRetBounded.insert({&AI, DeadBytes});
1211 }
1212
1213 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
1214 return isa<UnreachableInst>(E->getTerminator());
1215 });
1216}
1217
1218LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1219 LocationSize Size) const {
1220 if (auto *CB = dyn_cast<CallBase>(I)) {
1221 LibFunc F;
1222 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
1223 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1224 // Use the precise location size specified by the 3rd argument
1225 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1226 // instruction. memset_chk will write either the amount specified as 3rd
1227 // argument or the function will immediately abort and exit the program.
1228 // NOTE: AA may determine NoAlias if it can prove that the access size
1229 // is larger than the allocation size due to that being UB. To avoid
1230 // returning potentially invalid NoAlias results by AA, limit the use of
1231 // the precise location size to isOverwrite.
1232 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1233 return LocationSize::precise(Len->getZExtValue());
1234 }
1235 }
1236 return Size;
1237}
1238
1239OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1240 const Instruction *DeadI,
1241 const MemoryLocation &KillingLoc,
1242 const MemoryLocation &DeadLoc,
1243 int64_t &KillingOff, int64_t &DeadOff) {
1244 // AliasAnalysis does not always account for loops. Limit overwrite checks
1245 // to dependencies for which we can guarantee they are independent of any
1246 // loops they are in.
1247 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1248 return OW_Unknown;
1249
1250 LocationSize KillingLocSize =
1251 strengthenLocationSize(KillingI, KillingLoc.Size);
1252 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1253 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1254 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1255 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1256
1257 // Check whether the killing store overwrites the whole object, in which
1258 // case the size/offset of the dead store does not matter.
1259 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1260 isIdentifiedObject(KillingUndObj)) {
1261 std::optional<TypeSize> KillingUndObjSize =
1262 getPointerSize(KillingUndObj, DL, TLI, &F);
1263 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1264 return OW_Complete;
1265 }
1266
1267 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1268 // get imprecise values here, though (except for unknown sizes).
1269 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1270 // In case no constant size is known, try to an IR values for the number
1271 // of bytes written and check if they match.
1272 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1273 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1274 if (KillingMemI && DeadMemI) {
1275 const Value *KillingV = KillingMemI->getLength();
1276 const Value *DeadV = DeadMemI->getLength();
1277 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1278 return OW_Complete;
1279 }
1280
1281 // Masked stores have imprecise locations, but we can reason about them
1282 // to some extent.
1283 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1284 }
1285
1286 const TypeSize KillingSize = KillingLocSize.getValue();
1287 const TypeSize DeadSize = DeadLoc.Size.getValue();
1288 // Bail on doing Size comparison which depends on AA for now
1289 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1290 const bool AnyScalable = DeadSize.isScalable() || KillingLocSize.isScalable();
1291
1292 if (AnyScalable)
1293 return OW_Unknown;
1294 // Query the alias information
1295 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1296
1297 // If the start pointers are the same, we just have to compare sizes to see if
1298 // the killing store was larger than the dead store.
1299 if (AAR == AliasResult::MustAlias) {
1300 // Make sure that the KillingSize size is >= the DeadSize size.
1301 if (KillingSize >= DeadSize)
1302 return OW_Complete;
1303 }
1304
1305 // If we hit a partial alias we may have a full overwrite
1306 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1307 int32_t Off = AAR.getOffset();
1308 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1309 return OW_Complete;
1310 }
1311
1312 // If we can't resolve the same pointers to the same object, then we can't
1313 // analyze them at all.
1314 if (DeadUndObj != KillingUndObj) {
1315 // Non aliasing stores to different objects don't overlap. Note that
1316 // if the killing store is known to overwrite whole object (out of
1317 // bounds access overwrites whole object as well) then it is assumed to
1318 // completely overwrite any store to the same object even if they don't
1319 // actually alias (see next check).
1320 if (AAR == AliasResult::NoAlias)
1321 return OW_None;
1322 return OW_Unknown;
1323 }
1324
1325 // Okay, we have stores to two completely different pointers. Try to
1326 // decompose the pointer into a "base + constant_offset" form. If the base
1327 // pointers are equal, then we can reason about the two stores.
1328 DeadOff = 0;
1329 KillingOff = 0;
1330 const Value *DeadBasePtr =
1331 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1332 const Value *KillingBasePtr =
1333 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1334
1335 // If the base pointers still differ, we have two completely different
1336 // stores.
1337 if (DeadBasePtr != KillingBasePtr)
1338 return OW_Unknown;
1339
1340 // The killing access completely overlaps the dead store if and only if
1341 // both start and end of the dead one is "inside" the killing one:
1342 // |<->|--dead--|<->|
1343 // |-----killing------|
1344 // Accesses may overlap if and only if start of one of them is "inside"
1345 // another one:
1346 // |<->|--dead--|<-------->|
1347 // |-------killing--------|
1348 // OR
1349 // |-------dead-------|
1350 // |<->|---killing---|<----->|
1351 //
1352 // We have to be careful here as *Off is signed while *.Size is unsigned.
1353
1354 // Check if the dead access starts "not before" the killing one.
1355 if (DeadOff >= KillingOff) {
1356 // If the dead access ends "not after" the killing access then the
1357 // dead one is completely overwritten by the killing one.
1358 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1359 return OW_Complete;
1360 // If start of the dead access is "before" end of the killing access
1361 // then accesses overlap.
1362 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1363 return OW_MaybePartial;
1364 }
1365 // If start of the killing access is "before" end of the dead access then
1366 // accesses overlap.
1367 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1368 return OW_MaybePartial;
1369 }
1370
1371 // Can reach here only if accesses are known not to overlap.
1372 return OW_None;
1373}
1374
1375bool DSEState::isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1376 const LocationSize StoreSize) {
1377 if (isa<AllocaInst>(V))
1378 return true;
1379
1380 auto IBounded = InvisibleToCallerAfterRetBounded.find(V);
1381 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1382 int64_t ValueOffset;
1383 [[maybe_unused]] const Value *BaseValue =
1384 GetPointerBaseWithConstantOffset(Ptr, ValueOffset, DL);
1385 // If we are not able to find a constant offset from the UO, we have to
1386 // pessimistically assume that the store writes to memory out of the
1387 // dead_on_return bounds.
1388 if (BaseValue != V)
1389 return false;
1390 // This store is only invisible after return if we are in bounds of the
1391 // range marked dead.
1392 if (StoreSize.hasValue() &&
1393 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1394 ValueOffset >= 0)
1395 return true;
1396 }
1397 auto I = InvisibleToCallerAfterRet.insert({V, false});
1398 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1399 I.first->second = capturesNothing(PointerMayBeCaptured(
1400 V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));
1401 return I.first->second;
1402}
1403
1404bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1405 bool RequiresNoCaptureBeforeUnwind;
1406 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1407 return false;
1408 if (!RequiresNoCaptureBeforeUnwind)
1409 return true;
1410
1411 auto I = CapturedBeforeReturn.insert({V, true});
1412 if (I.second)
1413 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1414 // with the killing MemoryDef. But we refrain from doing so for now to
1415 // limit compile-time and this does not cause any changes to the number
1416 // of stores removed on a large test set in practice.
1417 I.first->second = capturesAnything(PointerMayBeCaptured(
1418 V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));
1419 return !I.first->second;
1420}
1421
1422std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction *I) const {
1423 if (!I->mayWriteToMemory())
1424 return std::nullopt;
1425
1426 if (auto *CB = dyn_cast<CallBase>(I))
1427 return MemoryLocation::getForDest(CB, TLI);
1428
1430}
1431
1433DSEState::getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1435 if (isMemTerminatorInst(I)) {
1436 if (auto Loc = getLocForTerminator(I))
1437 Locations.push_back(std::make_pair(Loc->first, false));
1438 return Locations;
1439 }
1440
1441 if (auto Loc = getLocForWrite(I))
1442 Locations.push_back(std::make_pair(*Loc, false));
1443
1444 if (ConsiderInitializesAttr) {
1445 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1446 Locations.push_back(std::make_pair(MemLoc, true));
1447 }
1448 }
1449 return Locations;
1450}
1451
1452bool DSEState::isRemovable(Instruction *I) {
1453 assert(getLocForWrite(I) && "Must have analyzable write");
1454
1455 // Don't remove volatile/atomic stores.
1456 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1457 return SI->isUnordered();
1458
1459 if (auto *CB = dyn_cast<CallBase>(I)) {
1460 // Don't remove volatile memory intrinsics.
1461 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1462 return !MI->isVolatile();
1463
1464 // Never remove dead lifetime intrinsics, e.g. because they are followed
1465 // by a free.
1466 if (CB->isLifetimeStartOrEnd())
1467 return false;
1468
1469 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1470 !CB->isTerminator();
1471 }
1472
1473 return false;
1474}
1475
1476bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1477 Instruction *DefInst, Instruction *UseInst) {
1478 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1479 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1480 // MemoryDef.
1481 if (!UseInst->mayWriteToMemory())
1482 return false;
1483
1484 if (auto *CB = dyn_cast<CallBase>(UseInst))
1485 if (CB->onlyAccessesInaccessibleMemory())
1486 return false;
1487
1488 int64_t InstWriteOffset, DepWriteOffset;
1489 if (auto CC = getLocForWrite(UseInst))
1490 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1491 DepWriteOffset) == OW_Complete;
1492 return false;
1493}
1494
1495bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1496 const MemoryLocation &DefLoc) {
1497 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1498 << *Def->getMemoryInst()
1499 << ") is at the end the function \n");
1501 SmallPtrSet<MemoryAccess *, 8> Visited;
1502
1503 pushMemUses(Def, WorkList, Visited);
1504 for (unsigned I = 0; I < WorkList.size(); I++) {
1505 if (WorkList.size() >= MemorySSAScanLimit) {
1506 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1507 return false;
1508 }
1509
1510 MemoryAccess *UseAccess = WorkList[I];
1511 if (isa<MemoryPhi>(UseAccess)) {
1512 // AliasAnalysis does not account for loops. Limit elimination to
1513 // candidates for which we can guarantee they always store to the same
1514 // memory location.
1515 if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1516 return false;
1517
1518 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1519 continue;
1520 }
1521 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1522 // of times this is called and/or caching it.
1523 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1524 if (isReadClobber(DefLoc, UseInst)) {
1525 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1526 return false;
1527 }
1528
1529 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1530 pushMemUses(UseDef, WorkList, Visited);
1531 }
1532 return true;
1533}
1534
1535std::optional<std::pair<MemoryLocation, bool>>
1536DSEState::getLocForTerminator(Instruction *I) const {
1537 if (auto *CB = dyn_cast<CallBase>(I)) {
1538 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1539 return {
1540 std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};
1541 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1542 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1543 }
1544
1545 return std::nullopt;
1546}
1547
1548bool DSEState::isMemTerminatorInst(Instruction *I) const {
1549 auto *CB = dyn_cast<CallBase>(I);
1550 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1551 getFreedOperand(CB, &TLI) != nullptr);
1552}
1553
1554bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1555 Instruction *MaybeTerm) {
1556 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1557 getLocForTerminator(MaybeTerm);
1558
1559 if (!MaybeTermLoc)
1560 return false;
1561
1562 // If the terminator is a free-like call, all accesses to the underlying
1563 // object can be considered terminated.
1564 if (getUnderlyingObject(Loc.Ptr) !=
1565 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1566 return false;
1567
1568 auto TermLoc = MaybeTermLoc->first;
1569 if (MaybeTermLoc->second) {
1570 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1571 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1572 }
1573 int64_t InstWriteOffset = 0;
1574 int64_t DepWriteOffset = 0;
1575 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1576 DepWriteOffset) == OW_Complete;
1577}
1578
1579bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1580 Instruction *UseInst) {
1581 if (isNoopIntrinsic(UseInst))
1582 return false;
1583
1584 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1585 // treated as read clobber.
1586 if (auto SI = dyn_cast<StoreInst>(UseInst))
1587 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1588
1589 if (!UseInst->mayReadFromMemory())
1590 return false;
1591
1592 if (auto *CB = dyn_cast<CallBase>(UseInst))
1593 if (CB->onlyAccessesInaccessibleMemory())
1594 return false;
1595
1596 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1597}
1598
1599bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1600 const Instruction *KillingDef,
1601 const MemoryLocation &CurrentLoc) {
1602 // If the dependency is within the same block or loop level (being careful
1603 // of irreducible loops), we know that AA will return a valid result for the
1604 // memory dependency. (Both at the function level, outside of any loop,
1605 // would also be valid but we currently disable that to limit compile time).
1606 if (Current->getParent() == KillingDef->getParent())
1607 return true;
1608 const Cycle *CurrentC = CI.getCycle(Current->getParent());
1609 if (CurrentC && CurrentC == CI.getCycle(KillingDef->getParent()))
1610 return true;
1611 // Otherwise check the memory location is invariant to any loops.
1612 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1613}
1614
1615bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1616 Ptr = Ptr->stripPointerCasts();
1617 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1618 if (GEP->hasAllConstantIndices())
1619 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1620
1621 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1622 return I->getParent()->isEntryBlock() || !CI.getCycle(I->getParent());
1623 }
1624 return true;
1625}
1626
1627std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1628 MemoryDef *KillingDef, MemoryAccess *StartAccess,
1629 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1630 unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1631 unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1632 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1633 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1634 return std::nullopt;
1635 }
1636
1637 MemoryAccess *Current = StartAccess;
1638 Instruction *KillingI = KillingDef->getMemoryInst();
1639 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1640
1641 // Only optimize defining access of KillingDef when directly starting at its
1642 // defining access. The defining access also must only access KillingLoc. At
1643 // the moment we only support instructions with a single write location, so
1644 // it should be sufficient to disable optimizations for instructions that
1645 // also read from memory.
1646 bool CanOptimize = OptimizeMemorySSA &&
1647 KillingDef->getDefiningAccess() == StartAccess &&
1648 !KillingI->mayReadFromMemory();
1649
1650 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1651 std::optional<MemoryLocation> CurrentLoc;
1652 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1653 LLVM_DEBUG({
1654 dbgs() << " visiting " << *Current;
1655 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1656 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1657 << ")";
1658 dbgs() << "\n";
1659 });
1660
1661 // Reached TOP.
1662 if (MSSA.isLiveOnEntryDef(Current)) {
1663 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1664 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1665 // The first clobbering def is... none.
1666 KillingDef->setOptimized(Current);
1667 return std::nullopt;
1668 }
1669
1670 // Cost of a step. Accesses in the same block are more likely to be valid
1671 // candidates for elimination, hence consider them cheaper.
1672 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1675 if (WalkerStepLimit <= StepCost) {
1676 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1677 return std::nullopt;
1678 }
1679 WalkerStepLimit -= StepCost;
1680
1681 // Return for MemoryPhis. They cannot be eliminated directly and the
1682 // caller is responsible for traversing them.
1683 if (isa<MemoryPhi>(Current)) {
1684 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1685 return Current;
1686 }
1687
1688 // Below, check if CurrentDef is a valid candidate to be eliminated by
1689 // KillingDef. If it is not, check the next candidate.
1690 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1691 Instruction *CurrentI = CurrentDef->getMemoryInst();
1692
1693 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1694 CanOptimize = false;
1695 continue;
1696 }
1697
1698 // Before we try to remove anything, check for any extra throwing
1699 // instructions that block us from DSEing
1700 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1701 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1702 return std::nullopt;
1703 }
1704
1705 // Check for anything that looks like it will be a barrier to further
1706 // removal
1707 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1708 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1709 return std::nullopt;
1710 }
1711
1712 // If Current is known to be on path that reads DefLoc or is a read
1713 // clobber, bail out, as the path is not profitable. We skip this check
1714 // for intrinsic calls, because the code knows how to handle memcpy
1715 // intrinsics.
1716 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1717 return std::nullopt;
1718
1719 // Quick check if there are direct uses that are read-clobbers.
1720 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1721 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1722 return !MSSA.dominates(StartAccess, UseOrDef) &&
1723 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1724 return false;
1725 })) {
1726 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1727 return std::nullopt;
1728 }
1729
1730 // If Current does not have an analyzable write location or is not
1731 // removable, skip it.
1732 CurrentLoc = getLocForWrite(CurrentI);
1733 if (!CurrentLoc || !isRemovable(CurrentI)) {
1734 CanOptimize = false;
1735 continue;
1736 }
1737
1738 // AliasAnalysis does not account for loops. Limit elimination to
1739 // candidates for which we can guarantee they always store to the same
1740 // memory location and not located in different loops.
1741 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1742 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1743 CanOptimize = false;
1744 continue;
1745 }
1746
1747 if (IsMemTerm) {
1748 // If the killing def is a memory terminator (e.g. lifetime.end), check
1749 // the next candidate if the current Current does not write the same
1750 // underlying object as the terminator.
1751 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1752 CanOptimize = false;
1753 continue;
1754 }
1755 } else {
1756 int64_t KillingOffset = 0;
1757 int64_t DeadOffset = 0;
1758 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1759 KillingOffset, DeadOffset);
1760 if (CanOptimize) {
1761 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1762 // optimized access. Do not optimize if CurrentDef is already the
1763 // defining access of KillingDef.
1764 if (CurrentDef != KillingDef->getDefiningAccess() &&
1765 (OR == OW_Complete || OR == OW_MaybePartial))
1766 KillingDef->setOptimized(CurrentDef);
1767
1768 // Once a may-aliasing def is encountered do not set an optimized
1769 // access.
1770 if (OR != OW_None)
1771 CanOptimize = false;
1772 }
1773
1774 // If Current does not write to the same object as KillingDef, check
1775 // the next candidate.
1776 if (OR == OW_Unknown || OR == OW_None)
1777 continue;
1778 else if (OR == OW_MaybePartial) {
1779 // If KillingDef only partially overwrites Current, check the next
1780 // candidate if the partial step limit is exceeded. This aggressively
1781 // limits the number of candidates for partial store elimination,
1782 // which are less likely to be removable in the end.
1783 if (PartialLimit <= 1) {
1784 WalkerStepLimit -= 1;
1785 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1786 "next access\n");
1787 continue;
1788 }
1789 PartialLimit -= 1;
1790 }
1791 }
1792 break;
1793 };
1794
1795 // Accesses to objects accessible after the function returns can only be
1796 // eliminated if the access is dead along all paths to the exit. Collect
1797 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1798 // they cover all paths from MaybeDeadAccess to any function exit.
1799 SmallPtrSet<Instruction *, 16> KillingDefs;
1800 KillingDefs.insert(KillingDef->getMemoryInst());
1801 MemoryAccess *MaybeDeadAccess = Current;
1802 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1803 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1804 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1805 << *MaybeDeadI << ")\n");
1806
1808 SmallPtrSet<MemoryAccess *, 32> Visited;
1809 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1810
1811 // Check if DeadDef may be read.
1812 for (unsigned I = 0; I < WorkList.size(); I++) {
1813 MemoryAccess *UseAccess = WorkList[I];
1814
1815 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1816 // Bail out if the number of accesses to check exceeds the scan limit.
1817 if (ScanLimit < (WorkList.size() - I)) {
1818 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1819 return std::nullopt;
1820 }
1821 --ScanLimit;
1822 NumDomMemDefChecks++;
1823
1824 if (isa<MemoryPhi>(UseAccess)) {
1825 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1826 return DT.properlyDominates(KI->getParent(), UseAccess->getBlock());
1827 })) {
1828 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1829 continue;
1830 }
1831 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1832 pushMemUses(UseAccess, WorkList, Visited);
1833 continue;
1834 }
1835
1836 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1837 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1838
1839 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1840 return DT.dominates(KI, UseInst);
1841 })) {
1842 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1843 continue;
1844 }
1845
1846 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1847 // MemoryAccesses. We do not have to check it's users.
1848 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1849 LLVM_DEBUG(
1850 dbgs()
1851 << " ... skipping, memterminator invalidates following accesses\n");
1852 continue;
1853 }
1854
1855 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1856 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1857 pushMemUses(UseAccess, WorkList, Visited);
1858 continue;
1859 }
1860
1861 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1862 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1863 return std::nullopt;
1864 }
1865
1866 // Uses which may read the original MemoryDef mean we cannot eliminate the
1867 // original MD. Stop walk.
1868 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1869 // the callee would be dominated by initializations, so it should be safe.
1870 bool IsKillingDefFromInitAttr = false;
1871 if (IsInitializesAttrMemLoc) {
1872 if (KillingI == UseInst &&
1873 KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1874 IsKillingDefFromInitAttr = true;
1875 }
1876
1877 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1878 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1879 return std::nullopt;
1880 }
1881
1882 // If this worklist walks back to the original memory access (and the
1883 // pointer is not guarenteed loop invariant) then we cannot assume that a
1884 // store kills itself.
1885 if (MaybeDeadAccess == UseAccess &&
1886 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1887 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1888 return std::nullopt;
1889 }
1890 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1891 // if it reads the memory location.
1892 // TODO: It would probably be better to check for self-reads before
1893 // calling the function.
1894 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1895 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1896 continue;
1897 }
1898
1899 // Check all uses for MemoryDefs, except for defs completely overwriting
1900 // the original location. Otherwise we have to check uses of *all*
1901 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1902 // miss cases like the following
1903 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1904 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1905 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1906 // (The Use points to the *first* Def it may alias)
1907 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1908 // stores [0,1]
1909 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1910 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1911 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1912 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1913 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1914 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1915 KillingLoc.Size)) {
1917 << " ... found killing def " << *UseInst << "\n");
1918 KillingDefs.insert(UseInst);
1919 }
1920 } else {
1922 << " ... found preceeding def " << *UseInst << "\n");
1923 return std::nullopt;
1924 }
1925 } else
1926 pushMemUses(UseDef, WorkList, Visited);
1927 }
1928 }
1929
1930 // For accesses to locations visible after the function returns, make sure
1931 // that the location is dead (=overwritten) along all paths from
1932 // MaybeDeadAccess to the exit.
1933 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1934 KillingLoc.Size)) {
1935 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1936 for (Instruction *KD : KillingDefs)
1937 KillingBlocks.insert(KD->getParent());
1938 assert(!KillingBlocks.empty() &&
1939 "Expected at least a single killing block");
1940
1941 // Find the common post-dominator of all killing blocks.
1942 BasicBlock *CommonPred = *KillingBlocks.begin();
1943 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1944 if (!CommonPred)
1945 break;
1946 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1947 }
1948
1949 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1950 // there is a path from MaybeDeadAccess to an exit not going through a
1951 // killing block.
1952 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1953 if (!AnyUnreachableExit)
1954 return std::nullopt;
1955
1956 // Fall back to CFG scan starting at all non-unreachable roots if not
1957 // all paths to the exit go through CommonPred.
1958 CommonPred = nullptr;
1959 }
1960
1961 // If CommonPred itself is in the set of killing blocks, we're done.
1962 if (KillingBlocks.count(CommonPred))
1963 return {MaybeDeadAccess};
1964
1965 SetVector<BasicBlock *> WorkList;
1966 // If CommonPred is null, there are multiple exits from the function.
1967 // They all have to be added to the worklist.
1968 if (CommonPred)
1969 WorkList.insert(CommonPred);
1970 else
1971 for (BasicBlock *R : PDT.roots()) {
1972 if (!isa<UnreachableInst>(R->getTerminator()))
1973 WorkList.insert(R);
1974 }
1975
1976 NumCFGTries++;
1977 // Check if all paths starting from an exit node go through one of the
1978 // killing blocks before reaching MaybeDeadAccess.
1979 for (unsigned I = 0; I < WorkList.size(); I++) {
1980 NumCFGChecks++;
1981 BasicBlock *Current = WorkList[I];
1982 if (KillingBlocks.count(Current))
1983 continue;
1984 if (Current == MaybeDeadAccess->getBlock())
1985 return std::nullopt;
1986
1987 // MaybeDeadAccess is reachable from the entry, so we don't have to
1988 // explore unreachable blocks further.
1989 if (!DT.isReachableFromEntry(Current))
1990 continue;
1991
1992 WorkList.insert_range(predecessors(Current));
1993
1994 if (WorkList.size() >= MemorySSAPathCheckLimit)
1995 return std::nullopt;
1996 }
1997 NumCFGSuccess++;
1998 }
1999
2000 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
2001 // potentially dead.
2002 return {MaybeDeadAccess};
2003}
2004
2005void DSEState::deleteDeadInstruction(Instruction *SI,
2006 SmallPtrSetImpl<MemoryAccess *> *Deleted) {
2007 MemorySSAUpdater Updater(&MSSA);
2008 SmallVector<Instruction *, 32> NowDeadInsts;
2009 NowDeadInsts.push_back(SI);
2010 --NumFastOther;
2011
2012 while (!NowDeadInsts.empty()) {
2013 Instruction *DeadInst = NowDeadInsts.pop_back_val();
2014 ++NumFastOther;
2015
2016 // Try to preserve debug information attached to the dead instruction.
2017 salvageDebugInfo(*DeadInst);
2018 salvageKnowledge(DeadInst);
2019
2020 // Remove the Instruction from MSSA.
2021 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
2022 bool IsMemDef = MA && isa<MemoryDef>(MA);
2023 if (MA) {
2024 if (IsMemDef) {
2025 auto *MD = cast<MemoryDef>(MA);
2026 SkipStores.insert(MD);
2027 if (Deleted)
2028 Deleted->insert(MD);
2029 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
2030 if (SI->getValueOperand()->getType()->isPointerTy()) {
2031 const Value *UO = getUnderlyingObject(SI->getValueOperand());
2032 if (CapturedBeforeReturn.erase(UO))
2033 ShouldIterateEndOfFunctionDSE = true;
2034 InvisibleToCallerAfterRet.erase(UO);
2035 InvisibleToCallerAfterRetBounded.erase(UO);
2036 }
2037 }
2038 }
2039
2040 Updater.removeMemoryAccess(MA);
2041 }
2042
2043 auto I = IOLs.find(DeadInst->getParent());
2044 if (I != IOLs.end())
2045 I->second.erase(DeadInst);
2046 // Remove its operands
2047 for (Use &O : DeadInst->operands())
2048 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
2049 O.set(PoisonValue::get(O->getType()));
2050 if (isInstructionTriviallyDead(OpI, &TLI))
2051 NowDeadInsts.push_back(OpI);
2052 }
2053
2054 EA.removeInstruction(DeadInst);
2055 // Remove memory defs directly if they don't produce results, but only
2056 // queue other dead instructions for later removal. They may have been
2057 // used as memory locations that have been cached by BatchAA. Removing
2058 // them here may lead to newly created instructions to be allocated at the
2059 // same address, yielding stale cache entries.
2060 if (IsMemDef && DeadInst->getType()->isVoidTy())
2061 DeadInst->eraseFromParent();
2062 else
2063 ToRemove.push_back(DeadInst);
2064 }
2065}
2066
2067bool DSEState::mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
2068 const Value *KillingUndObj) {
2069 // First see if we can ignore it by using the fact that KillingI is an
2070 // alloca/alloca like object that is not visible to the caller during
2071 // execution of the function.
2072 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
2073 return false;
2074
2075 if (KillingI->getParent() == DeadI->getParent())
2076 return ThrowingBlocks.count(KillingI->getParent());
2077 return !ThrowingBlocks.empty();
2078}
2079
2080bool DSEState::isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
2081 // If DeadI may throw it acts as a barrier, unless we are to an
2082 // alloca/alloca like object that does not escape.
2083 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
2084 return true;
2085
2086 // If DeadI is an atomic load/store stronger than monotonic, do not try to
2087 // eliminate/reorder it.
2088 if (DeadI->isAtomic()) {
2089 if (auto *LI = dyn_cast<LoadInst>(DeadI))
2090 return isStrongerThanMonotonic(LI->getOrdering());
2091 if (auto *SI = dyn_cast<StoreInst>(DeadI))
2092 return isStrongerThanMonotonic(SI->getOrdering());
2093 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
2094 return isStrongerThanMonotonic(ARMW->getOrdering());
2095 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
2096 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
2097 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
2098 llvm_unreachable("other instructions should be skipped in MemorySSA");
2099 }
2100 return false;
2101}
2102
2103bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2104 bool MadeChange = false;
2105 LLVM_DEBUG(
2106 dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2107 do {
2108 ShouldIterateEndOfFunctionDSE = false;
2109 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
2110 if (SkipStores.contains(Def))
2111 continue;
2112
2113 Instruction *DefI = Def->getMemoryInst();
2114 auto DefLoc = getLocForWrite(DefI);
2115 if (!DefLoc || !isRemovable(DefI)) {
2116 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2117 "instruction not removable.\n");
2118 continue;
2119 }
2120
2121 // NOTE: Currently eliminating writes at the end of a function is
2122 // limited to MemoryDefs with a single underlying object, to save
2123 // compile-time. In practice it appears the case with multiple
2124 // underlying objects is very uncommon. If it turns out to be important,
2125 // we can use getUnderlyingObjects here instead.
2126 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
2127 if (!isInvisibleToCallerAfterRet(UO, DefLoc->Ptr, DefLoc->Size))
2128 continue;
2129
2130 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
2131 // See through pointer-to-pointer bitcasts
2132 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2133 "of the function\n");
2135 ++NumFastStores;
2136 MadeChange = true;
2137 }
2138 }
2139 } while (ShouldIterateEndOfFunctionDSE);
2140 return MadeChange;
2141}
2142
2143bool DSEState::eliminateRedundantStoresViaDominatingConditions() {
2144 bool MadeChange = false;
2145 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs whose value being "
2146 "written is implied by a dominating condition\n");
2147
2148 using ConditionInfo = std::pair<Value *, Value *>;
2149 using ScopedHTType = ScopedHashTable<ConditionInfo, Instruction *>;
2150
2151 // We maintain a scoped hash table of the active dominating conditions for a
2152 // given node.
2153 ScopedHTType ActiveConditions;
2154 auto GetDominatingCondition = [&](BasicBlock *BB)
2155 -> std::optional<std::tuple<ConditionInfo, Instruction *, BasicBlock *>> {
2156 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
2157 if (!BI)
2158 return std::nullopt;
2159
2160 // In case both blocks are the same, it is not possible to determine
2161 // if optimization is possible. (We would not want to optimize a store
2162 // in the FalseBB if condition is true and vice versa.)
2163 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2164 return std::nullopt;
2165
2166 Instruction *ICmpL;
2167 CmpPredicate Pred;
2168 Value *StorePtr, *StoreVal;
2169 if (!match(BI->getCondition(),
2170 m_c_ICmp(Pred, m_Instruction(ICmpL, m_Load(m_Value(StorePtr))),
2171 m_Value(StoreVal))) ||
2172 !ICmpInst::isEquality(Pred))
2173 return std::nullopt;
2174
2175 // Ensure the replacement is allowed when comparing pointers, as
2176 // the equality compares addresses only, not pointers' provenance.
2177 if (StoreVal->getType()->isPointerTy() &&
2178 !canReplacePointersIfEqual(StoreVal, ICmpL, DL))
2179 return std::nullopt;
2180
2181 unsigned ImpliedSuccIdx = Pred == ICmpInst::ICMP_EQ ? 0 : 1;
2182 BasicBlock *ImpliedSucc = BI->getSuccessor(ImpliedSuccIdx);
2183 return {{ConditionInfo(StorePtr, StoreVal), ICmpL, ImpliedSucc}};
2184 };
2185
2186 auto VisitNode = [&](DomTreeNode *Node, unsigned Depth, auto &Self) -> void {
2188 return;
2189
2190 BasicBlock *BB = Node->getBlock();
2191 // Check for redundant stores against active known conditions.
2192 if (auto *Accesses = MSSA.getBlockDefs(BB)) {
2193 for (auto &Access : make_early_inc_range(*Accesses)) {
2194 auto *Def = dyn_cast<MemoryDef>(&Access);
2195 if (!Def)
2196 continue;
2197
2198 auto *SI = dyn_cast<StoreInst>(Def->getMemoryInst());
2199 if (!SI || !SI->isUnordered())
2200 continue;
2201
2202 Instruction *LI = ActiveConditions.lookup(
2203 {SI->getPointerOperand(), SI->getValueOperand()});
2204 if (!LI)
2205 continue;
2206
2207 // Found a dominating condition that may imply the value being stored.
2208 // Make sure there does not exist any clobbering access between the
2209 // load and the potential redundant store.
2210 MemoryAccess *LoadAccess = MSSA.getMemoryAccess(LI);
2211 MemoryAccess *ClobberingAccess =
2212 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2213 if (MSSA.dominates(ClobberingAccess, LoadAccess)) {
2215 << "Removing No-Op Store:\n DEAD: " << *SI << '\n');
2217 NumRedundantStores++;
2218 MadeChange = true;
2219 }
2220 }
2221 }
2222
2223 // See whether this basic block establishes a dominating condition.
2224 auto MaybeCondition = GetDominatingCondition(BB);
2225
2226 for (DomTreeNode *Child : Node->children()) {
2227 // RAII scope for the active conditions.
2228 ScopedHTType::ScopeTy Scope(ActiveConditions);
2229 if (MaybeCondition) {
2230 const auto &[Cond, LI, ImpliedSucc] = *MaybeCondition;
2231 if (DT.dominates(BasicBlockEdge(BB, ImpliedSucc), Child->getBlock())) {
2232 // Found a condition that holds for this child, dominated by the
2233 // current node via the equality edge. Propagate the condition to
2234 // the children by pushing it onto the table.
2235 ActiveConditions.insert(Cond, LI);
2236 }
2237 }
2238
2239 // Recursively visit the children of this node. Upon destruction, the no
2240 // longer active condition before visiting any sibling nodes is popped
2241 // from the active scope.
2242 Self(Child, Depth + 1, Self);
2243 }
2244 };
2245
2246 // Do a DFS walk of the dom-tree.
2248
2249 return MadeChange;
2250}
2251
2252bool DSEState::tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2253 Instruction *DefI = Def->getMemoryInst();
2254 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2255 if (!MemSet)
2256 // TODO: Could handle zero store to small allocation as well.
2257 return false;
2258 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2259 if (!StoredConstant || !StoredConstant->isNullValue())
2260 return false;
2261
2262 if (!isRemovable(DefI))
2263 // The memset might be volatile..
2264 return false;
2265
2266 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
2267 F.hasFnAttribute(Attribute::SanitizeAddress) ||
2268 F.hasFnAttribute(Attribute::SanitizeHWAddress) || F.getName() == "calloc")
2269 return false;
2270 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
2271 if (!Malloc)
2272 return false;
2273 auto *InnerCallee = Malloc->getCalledFunction();
2274 if (!InnerCallee)
2275 return false;
2276 LibFunc Func = NotLibFunc;
2277 StringRef ZeroedVariantName;
2278 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
2279 Func != LibFunc_malloc) {
2280 Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");
2281 if (!Attr.isValid())
2282 return false;
2283 ZeroedVariantName = Attr.getValueAsString();
2284 if (ZeroedVariantName.empty())
2285 return false;
2286 }
2287
2288 // Gracefully handle malloc with unexpected memory attributes.
2289 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
2290 if (!MallocDef)
2291 return false;
2292
2293 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2294 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2295 // of malloc block
2296 auto *MallocBB = Malloc->getParent(), *MemsetBB = Memset->getParent();
2297 if (MallocBB == MemsetBB)
2298 return true;
2299 auto *Ptr = Memset->getArgOperand(0);
2300 auto *TI = MallocBB->getTerminator();
2301 BasicBlock *TrueBB, *FalseBB;
2302 if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2303 m_Zero()),
2304 TrueBB, FalseBB)))
2305 return false;
2306 if (MemsetBB != FalseBB)
2307 return false;
2308 return true;
2309 };
2310
2311 if (Malloc->getOperand(0) != MemSet->getLength())
2312 return false;
2313 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
2314 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2315 return false;
2316 IRBuilder<> IRB(Malloc);
2317 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2318 Value *Calloc = nullptr;
2319 if (!ZeroedVariantName.empty()) {
2320 LLVMContext &Ctx = Malloc->getContext();
2321 AttributeList Attrs = InnerCallee->getAttributes();
2322 AllocFnKind AllocKind =
2323 Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
2324 AllocFnKind::Zeroed;
2325 AllocKind &= ~AllocFnKind::Uninitialized;
2326 Attrs =
2327 Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
2328 .removeFnAttribute(Ctx, "alloc-variant-zeroed");
2329 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2330 ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);
2331 cast<Function>(ZeroedVariant.getCallee())
2332 ->setCallingConv(Malloc->getCallingConv());
2334 Args.append(Malloc->arg_begin(), Malloc->arg_end());
2335 CallInst *CI = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);
2336 CI->setCallingConv(Malloc->getCallingConv());
2337 Calloc = CI;
2338 } else {
2339 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2340 Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
2341 IRB, TLI, Malloc->getType()->getPointerAddressSpace());
2342 }
2343 if (!Calloc)
2344 return false;
2345
2346 MemorySSAUpdater Updater(&MSSA);
2347 auto *NewAccess = Updater.createMemoryAccessAfter(cast<Instruction>(Calloc),
2348 nullptr, MallocDef);
2349 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2350 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2351 Malloc->replaceAllUsesWith(Calloc);
2353 return true;
2354}
2355
2356bool DSEState::storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2357 Instruction *DefI = Def->getMemoryInst();
2358 StoreInst *Store = dyn_cast<StoreInst>(DefI);
2359 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2360 Constant *StoredConstant = nullptr;
2361 if (Store)
2362 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2363 else if (MemSet)
2364 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2365 else
2366 return false;
2367
2368 if (!isRemovable(DefI))
2369 return false;
2370
2371 if (StoredConstant) {
2372 Constant *InitC =
2373 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2374 // If the clobbering access is LiveOnEntry, no instructions between them
2375 // can modify the memory location.
2376 if (InitC && InitC == StoredConstant)
2377 return MSSA.isLiveOnEntryDef(
2378 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2379 }
2380
2381 if (!Store)
2382 return false;
2383
2384 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2385 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2386 // Get the defining access for the load.
2387 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2388 // Fast path: the defining accesses are the same.
2389 if (LoadAccess == Def->getDefiningAccess())
2390 return true;
2391
2392 // Look through phi accesses. Recursively scan all phi accesses by
2393 // adding them to a worklist. Bail when we run into a memory def that
2394 // does not match LoadAccess.
2395 SetVector<MemoryAccess *> ToCheck;
2396 MemoryAccess *Current =
2397 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2398 // We don't want to bail when we run into the store memory def. But,
2399 // the phi access may point to it. So, pretend like we've already
2400 // checked it.
2401 ToCheck.insert(Def);
2402 ToCheck.insert(Current);
2403 // Start at current (1) to simulate already having checked Def.
2404 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2405 Current = ToCheck[I];
2406 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2407 // Check all the operands.
2408 for (auto &Use : PhiAccess->incoming_values())
2409 ToCheck.insert(cast<MemoryAccess>(&Use));
2410 continue;
2411 }
2412
2413 // If we found a memory def, bail. This happens when we have an
2414 // unrelated write in between an otherwise noop store.
2415 assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2416 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2417 // We are searching for the definition of the store's destination.
2418 // So, if that is the same definition as the load, then this is a
2419 // noop. Otherwise, fail.
2420 if (LoadAccess != Current)
2421 return false;
2422 }
2423 return true;
2424 }
2425 }
2426
2427 return false;
2428}
2429
2430bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2431 bool Changed = false;
2432 for (auto OI : IOL) {
2433 Instruction *DeadI = OI.first;
2434 MemoryLocation Loc = *getLocForWrite(DeadI);
2435 assert(isRemovable(DeadI) && "Expect only removable instruction");
2436
2437 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2438 int64_t DeadStart = 0;
2439 uint64_t DeadSize = Loc.Size.getValue();
2440 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2441 OverlapIntervalsTy &IntervalMap = OI.second;
2442 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2443 if (IntervalMap.empty())
2444 continue;
2445 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2446 }
2447 return Changed;
2448}
2449
2450bool DSEState::eliminateRedundantStoresOfExistingValues() {
2451 bool MadeChange = false;
2452 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2453 "already existing value\n");
2454 for (auto *Def : MemDefs) {
2455 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2456 continue;
2457
2458 Instruction *DefInst = Def->getMemoryInst();
2459 auto MaybeDefLoc = getLocForWrite(DefInst);
2460 if (!MaybeDefLoc || !isRemovable(DefInst))
2461 continue;
2462
2463 MemoryDef *UpperDef;
2464 // To conserve compile-time, we avoid walking to the next clobbering def.
2465 // Instead, we just try to get the optimized access, if it exists. DSE
2466 // will try to optimize defs during the earlier traversal.
2467 if (Def->isOptimized())
2468 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2469 else
2470 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2471 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2472 continue;
2473
2474 Instruction *UpperInst = UpperDef->getMemoryInst();
2475 auto IsRedundantStore = [&]() {
2476 // We don't care about differences in call attributes here.
2477 if (DefInst->isIdenticalToWhenDefined(UpperInst,
2478 /*IntersectAttrs=*/true))
2479 return true;
2480 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2481 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2482 // MemSetInst must have a write location.
2483 auto UpperLoc = getLocForWrite(UpperInst);
2484 if (!UpperLoc)
2485 return false;
2486 int64_t InstWriteOffset = 0;
2487 int64_t DepWriteOffset = 0;
2488 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2489 InstWriteOffset, DepWriteOffset);
2490 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2491 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2492 OR == OW_Complete;
2493 }
2494 }
2495 return false;
2496 };
2497
2498 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2499 continue;
2500 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2501 << '\n');
2502 deleteDeadInstruction(DefInst);
2503 NumRedundantStores++;
2504 MadeChange = true;
2505 }
2506 return MadeChange;
2507}
2508
2510DSEState::getInitializesArgMemLoc(const Instruction *I) {
2511 const CallBase *CB = dyn_cast<CallBase>(I);
2512 if (!CB)
2513 return {};
2514
2515 // Collect aliasing arguments and their initializes ranges.
2516 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2517 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2518 Value *CurArg = CB->getArgOperand(Idx);
2519 if (!CurArg->getType()->isPointerTy())
2520 continue;
2521
2522 ConstantRangeList Inits;
2523 Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2524 // initializes on byval arguments refers to the callee copy, not the
2525 // original memory the caller passed in.
2526 if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))
2527 Inits = InitializesAttr.getValueAsConstantRangeList();
2528
2529 // Check whether "CurArg" could alias with global variables. We require
2530 // either it's function local and isn't captured before or the "CB" only
2531 // accesses arg or inaccessible mem.
2532 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2533 !isFuncLocalAndNotCaptured(CurArg, CB, EA))
2534 Inits = ConstantRangeList();
2535
2536 // We don't perform incorrect DSE on unwind edges in the current function,
2537 // and use the "initializes" attribute to kill dead stores if:
2538 // - The call does not throw exceptions, "CB->doesNotThrow()".
2539 // - Or the callee parameter has "dead_on_unwind" attribute.
2540 // - Or the argument is invisible to caller on unwind, and there are no
2541 // unwind edges from this call in the current function (e.g. `CallInst`).
2542 bool IsDeadOrInvisibleOnUnwind =
2543 CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2544 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2545 ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2546 bool FoundAliasing = false;
2547 for (auto &[Arg, AliasList] : Arguments) {
2548 auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2550 if (AAR == AliasResult::NoAlias) {
2551 continue;
2552 } else if (AAR == AliasResult::MustAlias) {
2553 FoundAliasing = true;
2554 AliasList.push_back(InitInfo);
2555 } else {
2556 // For PartialAlias and MayAlias, there is an offset or may be an
2557 // unknown offset between the arguments and we insert an empty init
2558 // range to discard the entire initializes info while intersecting.
2559 FoundAliasing = true;
2560 AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2561 ConstantRangeList()});
2562 }
2563 }
2564 if (!FoundAliasing)
2565 Arguments[CurArg] = {InitInfo};
2566 }
2567
2569 for (const auto &[_, Args] : Arguments) {
2570 auto IntersectedRanges =
2572 if (IntersectedRanges.empty())
2573 continue;
2574
2575 for (const auto &Arg : Args) {
2576 for (const auto &Range : IntersectedRanges) {
2577 int64_t Start = Range.getLower().getSExtValue();
2578 int64_t End = Range.getUpper().getSExtValue();
2579 // For now, we only handle locations starting at offset 0.
2580 if (Start == 0)
2581 Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2582 LocationSize::precise(End - Start),
2583 CB->getAAMetadata()));
2584 }
2585 }
2586 }
2587 return Locations;
2588}
2589
2590std::pair<bool, bool>
2591DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2592 bool Changed = false;
2593 bool DeletedKillingLoc = false;
2594 unsigned ScanLimit = MemorySSAScanLimit;
2595 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2596 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2597 // Worklist of MemoryAccesses that may be killed by
2598 // "KillingLocWrapper.MemDef".
2599 SmallSetVector<MemoryAccess *, 8> ToCheck;
2600 // Track MemoryAccesses that have been deleted in the loop below, so we can
2601 // skip them. Don't use SkipStores for this, which may contain reused
2602 // MemoryAccess addresses.
2603 SmallPtrSet<MemoryAccess *, 8> Deleted;
2604 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2605 ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2606
2607 // Check if MemoryAccesses in the worklist are killed by
2608 // "KillingLocWrapper.MemDef".
2609 for (unsigned I = 0; I < ToCheck.size(); I++) {
2610 MemoryAccess *Current = ToCheck[I];
2611 if (Deleted.contains(Current))
2612 continue;
2613 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2614 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2615 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2616 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2617 KillingLocWrapper.DefByInitializesAttr);
2618
2619 if (!MaybeDeadAccess) {
2620 LLVM_DEBUG(dbgs() << " finished walk\n");
2621 continue;
2622 }
2623 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2624 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2625 if (isa<MemoryPhi>(DeadAccess)) {
2626 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2627 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2628 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2629 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2630 BasicBlock *PhiBlock = DeadAccess->getBlock();
2631
2632 // We only consider incoming MemoryAccesses that come before the
2633 // MemoryPhi. Otherwise we could discover candidates that do not
2634 // strictly dominate our starting def.
2635 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2636 ToCheck.insert(IncomingAccess);
2637 }
2638 continue;
2639 }
2640 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2641 // It would incorrectly consider a call instruction as redundant store
2642 // and remove this call instruction.
2643 // TODO: this conflates the existence of a MemoryLocation with being able
2644 // to delete the instruction. Fix isRemovable() to consider calls with
2645 // side effects that cannot be removed, e.g. calls with the initializes
2646 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2647 MemoryDefWrapper DeadDefWrapper(
2648 cast<MemoryDef>(DeadAccess),
2649 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2650 /*ConsiderInitializesAttr=*/false));
2651 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2652 MemoryLocationWrapper &DeadLocWrapper =
2653 DeadDefWrapper.DefinedLocations.front();
2654 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2655 ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2656 NumGetDomMemoryDefPassed++;
2657
2658 if (!DebugCounter::shouldExecute(MemorySSACounter))
2659 continue;
2660 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2661 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2662 continue;
2663 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2664 << *DeadLocWrapper.DefInst << "\n KILLER: "
2665 << *KillingLocWrapper.DefInst << '\n');
2666 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2667 ++NumFastStores;
2668 Changed = true;
2669 } else {
2670 // Check if DeadI overwrites KillingI.
2671 int64_t KillingOffset = 0;
2672 int64_t DeadOffset = 0;
2673 OverwriteResult OR =
2674 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2675 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2676 KillingOffset, DeadOffset);
2677 if (OR == OW_MaybePartial) {
2678 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2679 OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2680 KillingOffset, DeadOffset,
2681 DeadLocWrapper.DefInst, IOL);
2682 }
2683 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2684 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2685 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2686 // We are re-using tryToMergePartialOverlappingStores, which requires
2687 // DeadSI to dominate KillingSI.
2688 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2689 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2690 if (Constant *Merged = tryToMergePartialOverlappingStores(
2691 KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2692 &DT)) {
2693
2694 // Update stored value of earlier store to merged constant.
2695 DeadSI->setOperand(0, Merged);
2696 ++NumModifiedStores;
2697 Changed = true;
2698 DeletedKillingLoc = true;
2699
2700 // Remove killing store and remove any outstanding overlap
2701 // intervals for the updated store.
2702 deleteDeadInstruction(KillingSI, &Deleted);
2703 auto I = IOLs.find(DeadSI->getParent());
2704 if (I != IOLs.end())
2705 I->second.erase(DeadSI);
2706 break;
2707 }
2708 }
2709 }
2710 if (OR == OW_Complete) {
2711 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2712 << *DeadLocWrapper.DefInst << "\n KILLER: "
2713 << *KillingLocWrapper.DefInst << '\n');
2714 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2715 ++NumFastStores;
2716 Changed = true;
2717 }
2718 }
2719 }
2720
2721 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2722 "SkipStores and Deleted out of sync?");
2723
2724 return {Changed, DeletedKillingLoc};
2725}
2726
2727bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2728 if (KillingDefWrapper.DefinedLocations.empty()) {
2729 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2730 << *KillingDefWrapper.DefInst << "\n");
2731 return false;
2732 }
2733
2734 bool MadeChange = false;
2735 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2736 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2737 << *KillingLocWrapper.MemDef << " ("
2738 << *KillingLocWrapper.DefInst << ")\n");
2739 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2740 MadeChange |= Changed;
2741
2742 // Check if the store is a no-op.
2743 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2744 KillingLocWrapper.UnderlyingObject)) {
2745 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2746 << *KillingLocWrapper.DefInst << '\n');
2747 deleteDeadInstruction(KillingLocWrapper.DefInst);
2748 NumRedundantStores++;
2749 MadeChange = true;
2750 continue;
2751 }
2752 // Can we form a calloc from a memset/malloc pair?
2753 if (!DeletedKillingLoc &&
2754 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2755 KillingLocWrapper.UnderlyingObject)) {
2756 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2757 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2758 deleteDeadInstruction(KillingLocWrapper.DefInst);
2759 MadeChange = true;
2760 continue;
2761 }
2762 }
2763 return MadeChange;
2764}
2765
2768 const TargetLibraryInfo &TLI,
2769 const CycleInfo &CI) {
2770 bool MadeChange = false;
2771 DSEState State(F, AA, MSSA, DT, PDT, TLI, CI);
2772 // For each store:
2773 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2774 MemoryDef *KillingDef = State.MemDefs[I];
2775 if (State.SkipStores.count(KillingDef))
2776 continue;
2777
2778 MemoryDefWrapper KillingDefWrapper(
2779 KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2781 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2782 }
2783
2785 for (auto &KV : State.IOLs)
2786 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2787
2788 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2789 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2790 MadeChange |= State.eliminateRedundantStoresViaDominatingConditions();
2791
2792 while (!State.ToRemove.empty()) {
2793 Instruction *DeadInst = State.ToRemove.pop_back_val();
2794 DeadInst->eraseFromParent();
2795 }
2796
2797 return MadeChange;
2798}
2799
2800//===----------------------------------------------------------------------===//
2801// DSE Pass
2802//===----------------------------------------------------------------------===//
2807 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2810
2811 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, CI);
2812
2813#ifdef LLVM_ENABLE_STATS
2815 for (auto &I : instructions(F))
2816 NumRemainingStores += isa<StoreInst>(&I);
2817#endif
2818
2819 if (!Changed)
2820 return PreservedAnalyses::all();
2821
2825 return PA;
2826}
2827
2828namespace {
2829
2830/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2831class DSELegacyPass : public FunctionPass {
2832public:
2833 static char ID; // Pass identification, replacement for typeid
2834
2835 DSELegacyPass() : FunctionPass(ID) {
2837 }
2838
2839 bool runOnFunction(Function &F) override {
2840 if (skipFunction(F))
2841 return false;
2842
2843 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2844 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2845 const TargetLibraryInfo &TLI =
2846 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2847 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2848 PostDominatorTree &PDT =
2849 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2850 CycleInfo &CI = getAnalysis<CycleInfoWrapperPass>().getResult();
2851
2852 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, CI);
2853
2854#ifdef LLVM_ENABLE_STATS
2856 for (auto &I : instructions(F))
2857 NumRemainingStores += isa<StoreInst>(&I);
2858#endif
2859
2860 return Changed;
2861 }
2862
2863 void getAnalysisUsage(AnalysisUsage &AU) const override {
2864 AU.setPreservesCFG();
2865 AU.addRequired<AAResultsWrapperPass>();
2866 AU.addRequired<TargetLibraryInfoWrapperPass>();
2867 AU.addPreserved<GlobalsAAWrapperPass>();
2868 AU.addRequired<DominatorTreeWrapperPass>();
2869 AU.addPreserved<DominatorTreeWrapperPass>();
2870 AU.addRequired<PostDominatorTreeWrapperPass>();
2871 AU.addRequired<MemorySSAWrapperPass>();
2872 AU.addPreserved<PostDominatorTreeWrapperPass>();
2873 AU.addPreserved<MemorySSAWrapperPass>();
2874 AU.addRequired<CycleInfoWrapperPass>();
2875 AU.addPreserved<CycleInfoWrapperPass>();
2876 AU.addRequired<AssumptionCacheTracker>();
2877 }
2878};
2879
2880} // end anonymous namespace
2881
2882char DSELegacyPass::ID = 0;
2883
2884INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2885 false)
2895INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2896 false)
2897
2899 return new DSELegacyPass();
2900}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
DXIL Forward Handle Accesses
DXIL Resource Access
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI, const CycleInfo &CI)
MapVector< Instruction *, OverlapIntervalsTy > InstOverlapIntervalsTy
static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller)
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static bool isNoopIntrinsic(Instruction *I)
static ConstantRangeList getIntersectedInitRangeList(ArrayRef< ArgumentInitInfo > Args, bool CallHasNoUnwindAttr)
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static void pushMemUses(MemoryAccess *Acc, SmallVectorImpl< MemoryAccess * > &WorkList, SmallPtrSetImpl< MemoryAccess * > &Visited)
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, EarliestEscapeAnalysis &EA)
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static cl::opt< unsigned > MaxDepthRecursion("dse-max-dom-cond-depth", cl::init(1024), cl::Hidden, cl::desc("Max dominator tree recursion depth for eliminating redundant " "stores via dominating conditions"))
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, uint64_t PtrOffset)
Update the attributes given that a memory access is updated (the dereferenced pointer could be moved ...
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static bool hasInitializesAttr(Instruction *I)
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
early cse Early CSE w MemorySSA
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool VisitNode(MachineDomTreeNode *Node, Register TLSBaseAddrReg)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An immutable pass that tracks lazily created AssumptionCache objects.
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
LLVM_ABI bool onlyAccessesInaccessibleMemOrArgMem() const
Determine if the function may only access memory that is either inaccessible from the IR or pointed t...
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
LLVM_ABI Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
This class represents a list of constant ranges.
bool empty() const
Return true if this list contains no members.
LLVM_ABI ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
Analysis pass which computes a CycleInfo.
Legacy analysis pass which computes a CycleInfo.
static DIAssignID * getDistinct(LLVMContext &Context)
DbgVariableFragmentInfo FragmentInfo
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
CaptureComponents getCapturesBefore(const Value *Object, const Instruction *I, bool OrAt) override
Return how Object may be captured before instruction I, considering only provenance captures.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
bool isEquality() const
Return true if this predicate is either EQ or NE.
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
const_iterator begin() const
bool empty() const
empty - Return true when no intervals are mapped.
const_iterator end() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool hasValue() const
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Value * getLength() const
Value * getValue() const
BasicBlock * getBlock() const
Definition MemorySSA.h:162
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
void setOptimized(MemoryAccess *MA)
Definition MemorySSA.h:392
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1039
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:979
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:765
LLVM_ABI MemorySSAWalker * getSkipSelfWalker()
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Value * getAddr() const
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
iterator begin() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
iterator_range< use_iterator > uses()
Definition Value.h:380
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
auto m_Value()
Match an arbitrary value and ignore it.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgVariableRecord *DVRAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
@ Uninitialized
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
AllocFnKind
Definition Attributes.h:53
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1749
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool canReplacePointersIfEqual(const Value *From, const Value *To, const DataLayout &DL)
Returns true if a pointer value From can be replaced with another pointer value \To if they are deeme...
Definition Loads.cpp:879
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto post_order(const T &G)
Post-order traversal of a graph.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition Alignment.h:186
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI FunctionPass * createDeadStoreEliminationPass()
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:379
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.