LLVM 19.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/BasicBlock.h"
54#include "llvm/IR/Constant.h"
55#include "llvm/IR/Constants.h"
56#include "llvm/IR/DataLayout.h"
57#include "llvm/IR/DebugInfo.h"
58#include "llvm/IR/Dominators.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/InstrTypes.h"
63#include "llvm/IR/Instruction.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/PassManager.h"
69#include "llvm/IR/Value.h"
72#include "llvm/Support/Debug.h"
79#include <algorithm>
80#include <cassert>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <utility>
86
87using namespace llvm;
88using namespace PatternMatch;
89
90#define DEBUG_TYPE "dse"
91
92STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94STATISTIC(NumFastStores, "Number of stores deleted");
95STATISTIC(NumFastOther, "Number of other instrs removed");
96STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97STATISTIC(NumModifiedStores, "Number of stores modified");
98STATISTIC(NumCFGChecks, "Number of stores modified");
99STATISTIC(NumCFGTries, "Number of stores modified");
100STATISTIC(NumCFGSuccess, "Number of stores modified");
101STATISTIC(NumGetDomMemoryDefPassed,
102 "Number of times a valid candidate is returned from getDomMemoryDef");
103STATISTIC(NumDomMemDefChecks,
104 "Number iterations check for reads in getDomMemoryDef");
105
106DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107 "Controls which MemoryDefs are eliminated.");
108
109static cl::opt<bool>
110EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111 cl::init(true), cl::Hidden,
112 cl::desc("Enable partial-overwrite tracking in DSE"));
113
114static cl::opt<bool>
115EnablePartialStoreMerging("enable-dse-partial-store-merging",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial store merging in DSE"));
118
120 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
121 cl::desc("The number of memory instructions to scan for "
122 "dead store elimination (default = 150)"));
124 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
125 cl::desc("The maximum number of steps while walking upwards to find "
126 "MemoryDefs that may be killed (default = 90)"));
127
129 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
130 cl::desc("The maximum number candidates that only partially overwrite the "
131 "killing MemoryDef to consider"
132 " (default = 5)"));
133
135 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
136 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
137 "other stores per basic block (default = 5000)"));
138
140 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
141 cl::desc(
142 "The cost of a step in the same basic block as the killing MemoryDef"
143 "(default = 1)"));
144
146 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
148 cl::desc("The cost of a step in a different basic "
149 "block than the killing MemoryDef"
150 "(default = 5)"));
151
153 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
154 cl::desc("The maximum number of blocks to check when trying to prove that "
155 "all paths to an exit go through a killing block (default = 50)"));
156
157// This flags allows or disallows DSE to optimize MemorySSA during its
158// traversal. Note that DSE optimizing MemorySSA may impact other passes
159// downstream of the DSE invocation and can lead to issues not being
160// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162// impact follow-up passes.
163static cl::opt<bool>
164 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
165 cl::desc("Allow DSE to optimize memory accesses."));
166
167//===----------------------------------------------------------------------===//
168// Helper functions
169//===----------------------------------------------------------------------===//
170using OverlapIntervalsTy = std::map<int64_t, int64_t>;
172
173/// Returns true if the end of this instruction can be safely shortened in
174/// length.
176 // Don't shorten stores for now
177 if (isa<StoreInst>(I))
178 return false;
179
180 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
181 switch (II->getIntrinsicID()) {
182 default: return false;
183 case Intrinsic::memset:
184 case Intrinsic::memcpy:
185 case Intrinsic::memcpy_element_unordered_atomic:
186 case Intrinsic::memset_element_unordered_atomic:
187 // Do shorten memory intrinsics.
188 // FIXME: Add memmove if it's also safe to transform.
189 return true;
190 }
191 }
192
193 // Don't shorten libcalls calls for now.
194
195 return false;
196}
197
198/// Returns true if the beginning of this instruction can be safely shortened
199/// in length.
201 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
202 // easily done by offsetting the source address.
203 return isa<AnyMemSetInst>(I);
204}
205
206static std::optional<TypeSize> getPointerSize(const Value *V,
207 const DataLayout &DL,
208 const TargetLibraryInfo &TLI,
209 const Function *F) {
211 ObjectSizeOpts Opts;
213
214 if (getObjectSize(V, Size, DL, &TLI, Opts))
215 return TypeSize::getFixed(Size);
216 return std::nullopt;
217}
218
219namespace {
220
221enum OverwriteResult {
222 OW_Begin,
223 OW_Complete,
224 OW_End,
225 OW_PartialEarlierWithFullLater,
226 OW_MaybePartial,
227 OW_None,
228 OW_Unknown
229};
230
231} // end anonymous namespace
232
233/// Check if two instruction are masked stores that completely
234/// overwrite one another. More specifically, \p KillingI has to
235/// overwrite \p DeadI.
236static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
237 const Instruction *DeadI,
238 BatchAAResults &AA) {
239 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
240 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
241 if (KillingII == nullptr || DeadII == nullptr)
242 return OW_Unknown;
243 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
244 return OW_Unknown;
245 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
246 // Type size.
247 VectorType *KillingTy =
248 cast<VectorType>(KillingII->getArgOperand(0)->getType());
249 VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
250 if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251 return OW_Unknown;
252 // Element count.
253 if (KillingTy->getElementCount() != DeadTy->getElementCount())
254 return OW_Unknown;
255 // Pointers.
256 Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
257 Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
258 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
259 return OW_Unknown;
260 // Masks.
261 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
262 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
263 return OW_Unknown;
264 return OW_Complete;
265 }
266 return OW_Unknown;
267}
268
269/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
270/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
271/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
272/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
273/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
274/// overwritten by a killing (smaller) store which doesn't write outside the big
275/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
276/// NOTE: This function must only be called if both \p KillingLoc and \p
277/// DeadLoc belong to the same underlying object with valid \p KillingOff and
278/// \p DeadOff.
279static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
280 const MemoryLocation &DeadLoc,
281 int64_t KillingOff, int64_t DeadOff,
282 Instruction *DeadI,
284 const uint64_t KillingSize = KillingLoc.Size.getValue();
285 const uint64_t DeadSize = DeadLoc.Size.getValue();
286 // We may now overlap, although the overlap is not complete. There might also
287 // be other incomplete overlaps, and together, they might cover the complete
288 // dead store.
289 // Note: The correctness of this logic depends on the fact that this function
290 // is not even called providing DepWrite when there are any intervening reads.
292 KillingOff < int64_t(DeadOff + DeadSize) &&
293 int64_t(KillingOff + KillingSize) >= DeadOff) {
294
295 // Insert our part of the overlap into the map.
296 auto &IM = IOL[DeadI];
297 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
298 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
299 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
300 << ")\n");
301
302 // Make sure that we only insert non-overlapping intervals and combine
303 // adjacent intervals. The intervals are stored in the map with the ending
304 // offset as the key (in the half-open sense) and the starting offset as
305 // the value.
306 int64_t KillingIntStart = KillingOff;
307 int64_t KillingIntEnd = KillingOff + KillingSize;
308
309 // Find any intervals ending at, or after, KillingIntStart which start
310 // before KillingIntEnd.
311 auto ILI = IM.lower_bound(KillingIntStart);
312 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
313 // This existing interval is overlapped with the current store somewhere
314 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
315 // intervals and adjusting our start and end.
316 KillingIntStart = std::min(KillingIntStart, ILI->second);
317 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
318 ILI = IM.erase(ILI);
319
320 // Continue erasing and adjusting our end in case other previous
321 // intervals are also overlapped with the current store.
322 //
323 // |--- dead 1 ---| |--- dead 2 ---|
324 // |------- killing---------|
325 //
326 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
327 assert(ILI->second > KillingIntStart && "Unexpected interval");
328 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
329 ILI = IM.erase(ILI);
330 }
331 }
332
333 IM[KillingIntEnd] = KillingIntStart;
334
335 ILI = IM.begin();
336 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
337 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
338 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
339 << ") Composite KillingLoc [" << ILI->second << ", "
340 << ILI->first << ")\n");
341 ++NumCompletePartials;
342 return OW_Complete;
343 }
344 }
345
346 // Check for a dead store which writes to all the memory locations that
347 // the killing store writes to.
348 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
349 int64_t(DeadOff + DeadSize) > KillingOff &&
350 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
351 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
352 << ", " << int64_t(DeadOff + DeadSize)
353 << ") by a killing store [" << KillingOff << ", "
354 << int64_t(KillingOff + KillingSize) << ")\n");
355 // TODO: Maybe come up with a better name?
356 return OW_PartialEarlierWithFullLater;
357 }
358
359 // Another interesting case is if the killing store overwrites the end of the
360 // dead store.
361 //
362 // |--dead--|
363 // |-- killing --|
364 //
365 // In this case we may want to trim the size of dead store to avoid
366 // generating stores to addresses which will definitely be overwritten killing
367 // store.
369 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
370 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
371 return OW_End;
372
373 // Finally, we also need to check if the killing store overwrites the
374 // beginning of the dead store.
375 //
376 // |--dead--|
377 // |-- killing --|
378 //
379 // In this case we may want to move the destination address and trim the size
380 // of dead store to avoid generating stores to addresses which will definitely
381 // be overwritten killing store.
383 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
384 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
385 "Expect to be handled as OW_Complete");
386 return OW_Begin;
387 }
388 // Otherwise, they don't completely overlap.
389 return OW_Unknown;
390}
391
392/// Returns true if the memory which is accessed by the second instruction is not
393/// modified between the first and the second instruction.
394/// Precondition: Second instruction must be dominated by the first
395/// instruction.
396static bool
398 BatchAAResults &AA, const DataLayout &DL,
399 DominatorTree *DT) {
400 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
401 // instructions which can modify the memory location accessed by SecondI.
402 //
403 // While doing the walk keep track of the address to check. It might be
404 // different in different basic blocks due to PHI translation.
405 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
407 // Keep track of the address we visited each block with. Bail out if we
408 // visit a block with different addresses.
410
411 BasicBlock::iterator FirstBBI(FirstI);
412 ++FirstBBI;
413 BasicBlock::iterator SecondBBI(SecondI);
414 BasicBlock *FirstBB = FirstI->getParent();
415 BasicBlock *SecondBB = SecondI->getParent();
416 MemoryLocation MemLoc;
417 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
418 MemLoc = MemoryLocation::getForDest(MemSet);
419 else
420 MemLoc = MemoryLocation::get(SecondI);
421
422 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
423
424 // Start checking the SecondBB.
425 WorkList.push_back(
426 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
427 bool isFirstBlock = true;
428
429 // Check all blocks going backward until we reach the FirstBB.
430 while (!WorkList.empty()) {
431 BlockAddressPair Current = WorkList.pop_back_val();
432 BasicBlock *B = Current.first;
433 PHITransAddr &Addr = Current.second;
434 Value *Ptr = Addr.getAddr();
435
436 // Ignore instructions before FirstI if this is the FirstBB.
437 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
438
440 if (isFirstBlock) {
441 // Ignore instructions after SecondI if this is the first visit of SecondBB.
442 assert(B == SecondBB && "first block is not the store block");
443 EI = SecondBBI;
444 isFirstBlock = false;
445 } else {
446 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
447 // In this case we also have to look at instructions after SecondI.
448 EI = B->end();
449 }
450 for (; BI != EI; ++BI) {
451 Instruction *I = &*BI;
452 if (I->mayWriteToMemory() && I != SecondI)
453 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
454 return false;
455 }
456 if (B != FirstBB) {
457 assert(B != &FirstBB->getParent()->getEntryBlock() &&
458 "Should not hit the entry block because SI must be dominated by LI");
459 for (BasicBlock *Pred : predecessors(B)) {
460 PHITransAddr PredAddr = Addr;
461 if (PredAddr.needsPHITranslationFromBlock(B)) {
462 if (!PredAddr.isPotentiallyPHITranslatable())
463 return false;
464 if (!PredAddr.translateValue(B, Pred, DT, false))
465 return false;
466 }
467 Value *TranslatedPtr = PredAddr.getAddr();
468 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
469 if (!Inserted.second) {
470 // We already visited this block before. If it was with a different
471 // address - bail out!
472 if (TranslatedPtr != Inserted.first->second)
473 return false;
474 // ... otherwise just skip it.
475 continue;
476 }
477 WorkList.push_back(std::make_pair(Pred, PredAddr));
478 }
479 }
480 }
481 return true;
482}
483
484static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
485 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
486 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
487 const DataLayout &DL = Inst->getModule()->getDataLayout();
488 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
489 uint64_t DeadSliceOffsetInBits =
490 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
491 auto SetDeadFragExpr = [](auto *Assign,
492 DIExpression::FragmentInfo DeadFragment) {
493 // createFragmentExpression expects an offset relative to the existing
494 // fragment offset if there is one.
495 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
496 Assign->getExpression()
497 ->getFragmentInfo()
498 .value_or(DIExpression::FragmentInfo(0, 0))
499 .OffsetInBits;
501 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
502 Assign->setExpression(*NewExpr);
503 return;
504 }
505 // Failed to create a fragment expression for this so discard the value,
506 // making this a kill location.
508 DIExpression::get(Assign->getContext(), std::nullopt),
509 DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
510 Assign->setExpression(Expr);
511 Assign->setKillLocation();
512 };
513
514 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
515 // link to any instructions. Created in the loop below (once).
516 DIAssignID *LinkToNothing = nullptr;
517 LLVMContext &Ctx = Inst->getContext();
518 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
519 if (!LinkToNothing)
520 LinkToNothing = DIAssignID::getDistinct(Ctx);
521 return LinkToNothing;
522 };
523
524 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
525 // overlapping dbg.assign intrinsic. The loop invalidates the iterators
526 // returned by getAssignmentMarkers so save a copy of the markers to iterate
527 // over.
528 auto LinkedRange = at::getAssignmentMarkers(Inst);
530 SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
531 LinkedRange.end());
532 auto InsertAssignForOverlap = [&](auto *Assign) {
533 std::optional<DIExpression::FragmentInfo> NewFragment;
534 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
535 DeadSliceSizeInBits, Assign,
536 NewFragment) ||
537 !NewFragment) {
538 // We couldn't calculate the intersecting fragment for some reason. Be
539 // cautious and unlink the whole assignment from the store.
540 Assign->setKillAddress();
541 Assign->setAssignId(GetDeadLink());
542 return;
543 }
544 // No intersect.
545 if (NewFragment->SizeInBits == 0)
546 return;
547
548 // Fragments overlap: insert a new dbg.assign for this dead part.
549 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
550 NewAssign->insertAfter(Assign);
551 NewAssign->setAssignId(GetDeadLink());
552 if (NewFragment)
553 SetDeadFragExpr(NewAssign, *NewFragment);
554 NewAssign->setKillAddress();
555 };
556 for_each(Linked, InsertAssignForOverlap);
557 for_each(LinkedDPVAssigns, InsertAssignForOverlap);
558}
559
560static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
561 uint64_t &DeadSize, int64_t KillingStart,
562 uint64_t KillingSize, bool IsOverwriteEnd) {
563 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
564 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
565
566 // We assume that memet/memcpy operates in chunks of the "largest" native
567 // type size and aligned on the same value. That means optimal start and size
568 // of memset/memcpy should be modulo of preferred alignment of that type. That
569 // is it there is no any sense in trying to reduce store size any further
570 // since any "extra" stores comes for free anyway.
571 // On the other hand, maximum alignment we can achieve is limited by alignment
572 // of initial store.
573
574 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
575 // "largest" native type.
576 // Note: What is the proper way to get that value?
577 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
578 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
579
580 int64_t ToRemoveStart = 0;
581 uint64_t ToRemoveSize = 0;
582 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
583 // maintained on the remaining store.
584 if (IsOverwriteEnd) {
585 // Calculate required adjustment for 'KillingStart' in order to keep
586 // remaining store size aligned on 'PerfAlign'.
587 uint64_t Off =
588 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
589 ToRemoveStart = KillingStart + Off;
590 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
591 return false;
592 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
593 } else {
594 ToRemoveStart = DeadStart;
595 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
596 "Not overlapping accesses?");
597 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
598 // Calculate required adjustment for 'ToRemoveSize'in order to keep
599 // start of the remaining store aligned on 'PerfAlign'.
600 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
601 if (Off != 0) {
602 if (ToRemoveSize <= (PrefAlign.value() - Off))
603 return false;
604 ToRemoveSize -= PrefAlign.value() - Off;
605 }
606 assert(isAligned(PrefAlign, ToRemoveSize) &&
607 "Should preserve selected alignment");
608 }
609
610 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
611 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
612
613 uint64_t NewSize = DeadSize - ToRemoveSize;
614 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
615 // When shortening an atomic memory intrinsic, the newly shortened
616 // length must remain an integer multiple of the element size.
617 const uint32_t ElementSize = AMI->getElementSizeInBytes();
618 if (0 != NewSize % ElementSize)
619 return false;
620 }
621
622 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
623 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
624 << "\n KILLER [" << ToRemoveStart << ", "
625 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
626
627 Value *DeadWriteLength = DeadIntrinsic->getLength();
628 Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
629 DeadIntrinsic->setLength(TrimmedLength);
630 DeadIntrinsic->setDestAlignment(PrefAlign);
631
632 Value *OrigDest = DeadIntrinsic->getRawDest();
633 if (!IsOverwriteEnd) {
634 Value *Indices[1] = {
635 ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
637 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
638 DeadI->getIterator());
639 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
640 DeadIntrinsic->setDest(NewDestGEP);
641 }
642
643 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
644 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
645 IsOverwriteEnd);
646
647 // Finally update start and size of dead access.
648 if (!IsOverwriteEnd)
649 DeadStart += ToRemoveSize;
650 DeadSize = NewSize;
651
652 return true;
653}
654
656 int64_t &DeadStart, uint64_t &DeadSize) {
657 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
658 return false;
659
660 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
661 int64_t KillingStart = OII->second;
662 uint64_t KillingSize = OII->first - KillingStart;
663
664 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
665
666 if (KillingStart > DeadStart &&
667 // Note: "KillingStart - KillingStart" is known to be positive due to
668 // preceding check.
669 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
670 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
671 // be non negative due to preceding checks.
672 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
673 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
674 true)) {
675 IntervalMap.erase(OII);
676 return true;
677 }
678 }
679 return false;
680}
681
684 int64_t &DeadStart, uint64_t &DeadSize) {
686 return false;
687
688 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
689 int64_t KillingStart = OII->second;
690 uint64_t KillingSize = OII->first - KillingStart;
691
692 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
693
694 if (KillingStart <= DeadStart &&
695 // Note: "DeadStart - KillingStart" is known to be non negative due to
696 // preceding check.
697 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
698 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
699 // be positive due to preceding checks.
700 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
701 "Should have been handled as OW_Complete");
702 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
703 false)) {
704 IntervalMap.erase(OII);
705 return true;
706 }
707 }
708 return false;
709}
710
711static Constant *
713 int64_t KillingOffset, int64_t DeadOffset,
714 const DataLayout &DL, BatchAAResults &AA,
715 DominatorTree *DT) {
716
717 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
718 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
719 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
720 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
721 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
722 // If the store we find is:
723 // a) partially overwritten by the store to 'Loc'
724 // b) the killing store is fully contained in the dead one and
725 // c) they both have a constant value
726 // d) none of the two stores need padding
727 // Merge the two stores, replacing the dead store's value with a
728 // merge of both values.
729 // TODO: Deal with other constant types (vectors, etc), and probably
730 // some mem intrinsics (if needed)
731
732 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
733 APInt KillingValue =
734 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
735 unsigned KillingBits = KillingValue.getBitWidth();
736 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
737 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
738
739 // Offset of the smaller store inside the larger store
740 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
741 unsigned LShiftAmount =
742 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
743 : BitOffsetDiff;
744 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
745 LShiftAmount + KillingBits);
746 // Clear the bits we'll be replacing, then OR with the smaller
747 // store, shifted appropriately.
748 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
749 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
750 << "\n Killing: " << *KillingI
751 << "\n Merged Value: " << Merged << '\n');
752 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
753 }
754 return nullptr;
755}
756
757namespace {
758// Returns true if \p I is an intrinsic that does not read or write memory.
759bool isNoopIntrinsic(Instruction *I) {
760 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
761 switch (II->getIntrinsicID()) {
762 case Intrinsic::lifetime_start:
763 case Intrinsic::lifetime_end:
764 case Intrinsic::invariant_end:
765 case Intrinsic::launder_invariant_group:
766 case Intrinsic::assume:
767 return true;
768 case Intrinsic::dbg_declare:
769 case Intrinsic::dbg_label:
770 case Intrinsic::dbg_value:
771 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
772 default:
773 return false;
774 }
775 }
776 return false;
777}
778
779// Check if we can ignore \p D for DSE.
780bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
781 Instruction *DI = D->getMemoryInst();
782 // Calls that only access inaccessible memory cannot read or write any memory
783 // locations we consider for elimination.
784 if (auto *CB = dyn_cast<CallBase>(DI))
785 if (CB->onlyAccessesInaccessibleMemory())
786 return true;
787
788 // We can eliminate stores to locations not visible to the caller across
789 // throwing instructions.
790 if (DI->mayThrow() && !DefVisibleToCaller)
791 return true;
792
793 // We can remove the dead stores, irrespective of the fence and its ordering
794 // (release/acquire/seq_cst). Fences only constraints the ordering of
795 // already visible stores, it does not make a store visible to other
796 // threads. So, skipping over a fence does not change a store from being
797 // dead.
798 if (isa<FenceInst>(DI))
799 return true;
800
801 // Skip intrinsics that do not really read or modify memory.
802 if (isNoopIntrinsic(DI))
803 return true;
804
805 return false;
806}
807
808struct DSEState {
809 Function &F;
810 AliasAnalysis &AA;
812
813 /// The single BatchAA instance that is used to cache AA queries. It will
814 /// not be invalidated over the whole run. This is safe, because:
815 /// 1. Only memory writes are removed, so the alias cache for memory
816 /// locations remains valid.
817 /// 2. No new instructions are added (only instructions removed), so cached
818 /// information for a deleted value cannot be accessed by a re-used new
819 /// value pointer.
820 BatchAAResults BatchAA;
821
822 MemorySSA &MSSA;
823 DominatorTree &DT;
825 const TargetLibraryInfo &TLI;
826 const DataLayout &DL;
827 const LoopInfo &LI;
828
829 // Whether the function contains any irreducible control flow, useful for
830 // being accurately able to detect loops.
831 bool ContainsIrreducibleLoops;
832
833 // All MemoryDefs that potentially could kill other MemDefs.
835 // Any that should be skipped as they are already deleted
837 // Keep track whether a given object is captured before return or not.
838 DenseMap<const Value *, bool> CapturedBeforeReturn;
839 // Keep track of all of the objects that are invisible to the caller after
840 // the function returns.
841 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
842 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
843 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
844 // Post-order numbers for each basic block. Used to figure out if memory
845 // accesses are executed before another access.
846 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
847
848 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
849 /// basic block.
851 // Check if there are root nodes that are terminated by UnreachableInst.
852 // Those roots pessimize post-dominance queries. If there are such roots,
853 // fall back to CFG scan starting from all non-unreachable roots.
854 bool AnyUnreachableExit;
855
856 // Whether or not we should iterate on removing dead stores at the end of the
857 // function due to removing a store causing a previously captured pointer to
858 // no longer be captured.
859 bool ShouldIterateEndOfFunctionDSE;
860
861 /// Dead instructions to be removed at the end of DSE.
863
864 // Class contains self-reference, make sure it's not copied/moved.
865 DSEState(const DSEState &) = delete;
866 DSEState &operator=(const DSEState &) = delete;
867
868 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
869 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
870 const LoopInfo &LI)
871 : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
872 PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
873 // Collect blocks with throwing instructions not modeled in MemorySSA and
874 // alloc-like objects.
875 unsigned PO = 0;
876 for (BasicBlock *BB : post_order(&F)) {
877 PostOrderNumbers[BB] = PO++;
878 for (Instruction &I : *BB) {
879 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
880 if (I.mayThrow() && !MA)
881 ThrowingBlocks.insert(I.getParent());
882
883 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
884 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
885 (getLocForWrite(&I) || isMemTerminatorInst(&I)))
886 MemDefs.push_back(MD);
887 }
888 }
889
890 // Treat byval or inalloca arguments the same as Allocas, stores to them are
891 // dead at the end of the function.
892 for (Argument &AI : F.args())
893 if (AI.hasPassPointeeByValueCopyAttr())
894 InvisibleToCallerAfterRet.insert({&AI, true});
895
896 // Collect whether there is any irreducible control flow in the function.
897 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
898
899 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
900 return isa<UnreachableInst>(E->getTerminator());
901 });
902 }
903
904 LocationSize strengthenLocationSize(const Instruction *I,
905 LocationSize Size) const {
906 if (auto *CB = dyn_cast<CallBase>(I)) {
907 LibFunc F;
908 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
909 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
910 // Use the precise location size specified by the 3rd argument
911 // for determining KillingI overwrites DeadLoc if it is a memset_chk
912 // instruction. memset_chk will write either the amount specified as 3rd
913 // argument or the function will immediately abort and exit the program.
914 // NOTE: AA may determine NoAlias if it can prove that the access size
915 // is larger than the allocation size due to that being UB. To avoid
916 // returning potentially invalid NoAlias results by AA, limit the use of
917 // the precise location size to isOverwrite.
918 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
919 return LocationSize::precise(Len->getZExtValue());
920 }
921 }
922 return Size;
923 }
924
925 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
926 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
927 /// location (by \p DeadI instruction).
928 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
929 /// \p DeadI, but they both write to the same underlying object. In that
930 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
931 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
932 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
933 OverwriteResult isOverwrite(const Instruction *KillingI,
934 const Instruction *DeadI,
935 const MemoryLocation &KillingLoc,
936 const MemoryLocation &DeadLoc,
937 int64_t &KillingOff, int64_t &DeadOff) {
938 // AliasAnalysis does not always account for loops. Limit overwrite checks
939 // to dependencies for which we can guarantee they are independent of any
940 // loops they are in.
941 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
942 return OW_Unknown;
943
944 LocationSize KillingLocSize =
945 strengthenLocationSize(KillingI, KillingLoc.Size);
946 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
947 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
948 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
949 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
950
951 // Check whether the killing store overwrites the whole object, in which
952 // case the size/offset of the dead store does not matter.
953 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
954 isIdentifiedObject(KillingUndObj)) {
955 std::optional<TypeSize> KillingUndObjSize =
956 getPointerSize(KillingUndObj, DL, TLI, &F);
957 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
958 return OW_Complete;
959 }
960
961 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
962 // get imprecise values here, though (except for unknown sizes).
963 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
964 // In case no constant size is known, try to an IR values for the number
965 // of bytes written and check if they match.
966 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
967 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
968 if (KillingMemI && DeadMemI) {
969 const Value *KillingV = KillingMemI->getLength();
970 const Value *DeadV = DeadMemI->getLength();
971 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
972 return OW_Complete;
973 }
974
975 // Masked stores have imprecise locations, but we can reason about them
976 // to some extent.
977 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
978 }
979
980 const TypeSize KillingSize = KillingLocSize.getValue();
981 const TypeSize DeadSize = DeadLoc.Size.getValue();
982 // Bail on doing Size comparison which depends on AA for now
983 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
984 const bool AnyScalable =
985 DeadSize.isScalable() || KillingLocSize.isScalable();
986
987 if (AnyScalable)
988 return OW_Unknown;
989 // Query the alias information
990 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
991
992 // If the start pointers are the same, we just have to compare sizes to see if
993 // the killing store was larger than the dead store.
994 if (AAR == AliasResult::MustAlias) {
995 // Make sure that the KillingSize size is >= the DeadSize size.
996 if (KillingSize >= DeadSize)
997 return OW_Complete;
998 }
999
1000 // If we hit a partial alias we may have a full overwrite
1001 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1002 int32_t Off = AAR.getOffset();
1003 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1004 return OW_Complete;
1005 }
1006
1007 // If we can't resolve the same pointers to the same object, then we can't
1008 // analyze them at all.
1009 if (DeadUndObj != KillingUndObj) {
1010 // Non aliasing stores to different objects don't overlap. Note that
1011 // if the killing store is known to overwrite whole object (out of
1012 // bounds access overwrites whole object as well) then it is assumed to
1013 // completely overwrite any store to the same object even if they don't
1014 // actually alias (see next check).
1015 if (AAR == AliasResult::NoAlias)
1016 return OW_None;
1017 return OW_Unknown;
1018 }
1019
1020 // Okay, we have stores to two completely different pointers. Try to
1021 // decompose the pointer into a "base + constant_offset" form. If the base
1022 // pointers are equal, then we can reason about the two stores.
1023 DeadOff = 0;
1024 KillingOff = 0;
1025 const Value *DeadBasePtr =
1026 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1027 const Value *KillingBasePtr =
1028 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1029
1030 // If the base pointers still differ, we have two completely different
1031 // stores.
1032 if (DeadBasePtr != KillingBasePtr)
1033 return OW_Unknown;
1034
1035 // The killing access completely overlaps the dead store if and only if
1036 // both start and end of the dead one is "inside" the killing one:
1037 // |<->|--dead--|<->|
1038 // |-----killing------|
1039 // Accesses may overlap if and only if start of one of them is "inside"
1040 // another one:
1041 // |<->|--dead--|<-------->|
1042 // |-------killing--------|
1043 // OR
1044 // |-------dead-------|
1045 // |<->|---killing---|<----->|
1046 //
1047 // We have to be careful here as *Off is signed while *.Size is unsigned.
1048
1049 // Check if the dead access starts "not before" the killing one.
1050 if (DeadOff >= KillingOff) {
1051 // If the dead access ends "not after" the killing access then the
1052 // dead one is completely overwritten by the killing one.
1053 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1054 return OW_Complete;
1055 // If start of the dead access is "before" end of the killing access
1056 // then accesses overlap.
1057 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1058 return OW_MaybePartial;
1059 }
1060 // If start of the killing access is "before" end of the dead access then
1061 // accesses overlap.
1062 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1063 return OW_MaybePartial;
1064 }
1065
1066 // Can reach here only if accesses are known not to overlap.
1067 return OW_None;
1068 }
1069
1070 bool isInvisibleToCallerAfterRet(const Value *V) {
1071 if (isa<AllocaInst>(V))
1072 return true;
1073 auto I = InvisibleToCallerAfterRet.insert({V, false});
1074 if (I.second) {
1075 if (!isInvisibleToCallerOnUnwind(V)) {
1076 I.first->second = false;
1077 } else if (isNoAliasCall(V)) {
1078 I.first->second = !PointerMayBeCaptured(V, true, false);
1079 }
1080 }
1081 return I.first->second;
1082 }
1083
1084 bool isInvisibleToCallerOnUnwind(const Value *V) {
1085 bool RequiresNoCaptureBeforeUnwind;
1086 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1087 return false;
1088 if (!RequiresNoCaptureBeforeUnwind)
1089 return true;
1090
1091 auto I = CapturedBeforeReturn.insert({V, true});
1092 if (I.second)
1093 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1094 // with the killing MemoryDef. But we refrain from doing so for now to
1095 // limit compile-time and this does not cause any changes to the number
1096 // of stores removed on a large test set in practice.
1097 I.first->second = PointerMayBeCaptured(V, false, true);
1098 return !I.first->second;
1099 }
1100
1101 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1102 if (!I->mayWriteToMemory())
1103 return std::nullopt;
1104
1105 if (auto *CB = dyn_cast<CallBase>(I))
1106 return MemoryLocation::getForDest(CB, TLI);
1107
1109 }
1110
1111 /// Assuming this instruction has a dead analyzable write, can we delete
1112 /// this instruction?
1113 bool isRemovable(Instruction *I) {
1114 assert(getLocForWrite(I) && "Must have analyzable write");
1115
1116 // Don't remove volatile/atomic stores.
1117 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1118 return SI->isUnordered();
1119
1120 if (auto *CB = dyn_cast<CallBase>(I)) {
1121 // Don't remove volatile memory intrinsics.
1122 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1123 return !MI->isVolatile();
1124
1125 // Never remove dead lifetime intrinsics, e.g. because they are followed
1126 // by a free.
1127 if (CB->isLifetimeStartOrEnd())
1128 return false;
1129
1130 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1131 !CB->isTerminator();
1132 }
1133
1134 return false;
1135 }
1136
1137 /// Returns true if \p UseInst completely overwrites \p DefLoc
1138 /// (stored by \p DefInst).
1139 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1140 Instruction *UseInst) {
1141 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1142 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1143 // MemoryDef.
1144 if (!UseInst->mayWriteToMemory())
1145 return false;
1146
1147 if (auto *CB = dyn_cast<CallBase>(UseInst))
1148 if (CB->onlyAccessesInaccessibleMemory())
1149 return false;
1150
1151 int64_t InstWriteOffset, DepWriteOffset;
1152 if (auto CC = getLocForWrite(UseInst))
1153 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1154 DepWriteOffset) == OW_Complete;
1155 return false;
1156 }
1157
1158 /// Returns true if \p Def is not read before returning from the function.
1159 bool isWriteAtEndOfFunction(MemoryDef *Def) {
1160 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1161 << *Def->getMemoryInst()
1162 << ") is at the end the function \n");
1163
1164 auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1165 if (!MaybeLoc) {
1166 LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1167 return false;
1168 }
1169
1172 auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1173 if (!Visited.insert(Acc).second)
1174 return;
1175 for (Use &U : Acc->uses())
1176 WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1177 };
1178 PushMemUses(Def);
1179 for (unsigned I = 0; I < WorkList.size(); I++) {
1180 if (WorkList.size() >= MemorySSAScanLimit) {
1181 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1182 return false;
1183 }
1184
1185 MemoryAccess *UseAccess = WorkList[I];
1186 if (isa<MemoryPhi>(UseAccess)) {
1187 // AliasAnalysis does not account for loops. Limit elimination to
1188 // candidates for which we can guarantee they always store to the same
1189 // memory location.
1190 if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
1191 return false;
1192
1193 PushMemUses(cast<MemoryPhi>(UseAccess));
1194 continue;
1195 }
1196 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1197 // of times this is called and/or caching it.
1198 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1199 if (isReadClobber(*MaybeLoc, UseInst)) {
1200 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1201 return false;
1202 }
1203
1204 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1205 PushMemUses(UseDef);
1206 }
1207 return true;
1208 }
1209
1210 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1211 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1212 /// indicating whether \p I is a free-like call.
1213 std::optional<std::pair<MemoryLocation, bool>>
1214 getLocForTerminator(Instruction *I) const {
1215 uint64_t Len;
1216 Value *Ptr;
1217 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1218 m_Value(Ptr))))
1219 return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1220
1221 if (auto *CB = dyn_cast<CallBase>(I)) {
1222 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1223 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1224 }
1225
1226 return std::nullopt;
1227 }
1228
1229 /// Returns true if \p I is a memory terminator instruction like
1230 /// llvm.lifetime.end or free.
1231 bool isMemTerminatorInst(Instruction *I) const {
1232 auto *CB = dyn_cast<CallBase>(I);
1233 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1234 getFreedOperand(CB, &TLI) != nullptr);
1235 }
1236
1237 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1238 /// instruction \p AccessI.
1239 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1240 Instruction *MaybeTerm) {
1241 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1242 getLocForTerminator(MaybeTerm);
1243
1244 if (!MaybeTermLoc)
1245 return false;
1246
1247 // If the terminator is a free-like call, all accesses to the underlying
1248 // object can be considered terminated.
1249 if (getUnderlyingObject(Loc.Ptr) !=
1250 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1251 return false;
1252
1253 auto TermLoc = MaybeTermLoc->first;
1254 if (MaybeTermLoc->second) {
1255 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1256 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1257 }
1258 int64_t InstWriteOffset = 0;
1259 int64_t DepWriteOffset = 0;
1260 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1261 DepWriteOffset) == OW_Complete;
1262 }
1263
1264 // Returns true if \p Use may read from \p DefLoc.
1265 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1266 if (isNoopIntrinsic(UseInst))
1267 return false;
1268
1269 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1270 // treated as read clobber.
1271 if (auto SI = dyn_cast<StoreInst>(UseInst))
1272 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1273
1274 if (!UseInst->mayReadFromMemory())
1275 return false;
1276
1277 if (auto *CB = dyn_cast<CallBase>(UseInst))
1278 if (CB->onlyAccessesInaccessibleMemory())
1279 return false;
1280
1281 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1282 }
1283
1284 /// Returns true if a dependency between \p Current and \p KillingDef is
1285 /// guaranteed to be loop invariant for the loops that they are in. Either
1286 /// because they are known to be in the same block, in the same loop level or
1287 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1288 /// during execution of the containing function.
1289 bool isGuaranteedLoopIndependent(const Instruction *Current,
1290 const Instruction *KillingDef,
1291 const MemoryLocation &CurrentLoc) {
1292 // If the dependency is within the same block or loop level (being careful
1293 // of irreducible loops), we know that AA will return a valid result for the
1294 // memory dependency. (Both at the function level, outside of any loop,
1295 // would also be valid but we currently disable that to limit compile time).
1296 if (Current->getParent() == KillingDef->getParent())
1297 return true;
1298 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1299 if (!ContainsIrreducibleLoops && CurrentLI &&
1300 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1301 return true;
1302 // Otherwise check the memory location is invariant to any loops.
1303 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1304 }
1305
1306 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1307 /// loop. In particular, this guarantees that it only references a single
1308 /// MemoryLocation during execution of the containing function.
1309 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1310 Ptr = Ptr->stripPointerCasts();
1311 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1312 if (GEP->hasAllConstantIndices())
1313 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1314
1315 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1316 return I->getParent()->isEntryBlock() ||
1317 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1318 }
1319 return true;
1320 }
1321
1322 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1323 // with no read access between them or on any other path to a function exit
1324 // block if \p KillingLoc is not accessible after the function returns. If
1325 // there is no such MemoryDef, return std::nullopt. The returned value may not
1326 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1327 // encounter an aliasing MemoryUse (read).
1328 std::optional<MemoryAccess *>
1329 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1330 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1331 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1332 bool IsMemTerm, unsigned &PartialLimit) {
1333 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1334 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1335 return std::nullopt;
1336 }
1337
1338 MemoryAccess *Current = StartAccess;
1339 Instruction *KillingI = KillingDef->getMemoryInst();
1340 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1341
1342 // Only optimize defining access of KillingDef when directly starting at its
1343 // defining access. The defining access also must only access KillingLoc. At
1344 // the moment we only support instructions with a single write location, so
1345 // it should be sufficient to disable optimizations for instructions that
1346 // also read from memory.
1347 bool CanOptimize = OptimizeMemorySSA &&
1348 KillingDef->getDefiningAccess() == StartAccess &&
1349 !KillingI->mayReadFromMemory();
1350
1351 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1352 std::optional<MemoryLocation> CurrentLoc;
1353 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1354 LLVM_DEBUG({
1355 dbgs() << " visiting " << *Current;
1356 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1357 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1358 << ")";
1359 dbgs() << "\n";
1360 });
1361
1362 // Reached TOP.
1363 if (MSSA.isLiveOnEntryDef(Current)) {
1364 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1365 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1366 // The first clobbering def is... none.
1367 KillingDef->setOptimized(Current);
1368 return std::nullopt;
1369 }
1370
1371 // Cost of a step. Accesses in the same block are more likely to be valid
1372 // candidates for elimination, hence consider them cheaper.
1373 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1376 if (WalkerStepLimit <= StepCost) {
1377 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1378 return std::nullopt;
1379 }
1380 WalkerStepLimit -= StepCost;
1381
1382 // Return for MemoryPhis. They cannot be eliminated directly and the
1383 // caller is responsible for traversing them.
1384 if (isa<MemoryPhi>(Current)) {
1385 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1386 return Current;
1387 }
1388
1389 // Below, check if CurrentDef is a valid candidate to be eliminated by
1390 // KillingDef. If it is not, check the next candidate.
1391 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1392 Instruction *CurrentI = CurrentDef->getMemoryInst();
1393
1394 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1395 CanOptimize = false;
1396 continue;
1397 }
1398
1399 // Before we try to remove anything, check for any extra throwing
1400 // instructions that block us from DSEing
1401 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1402 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1403 return std::nullopt;
1404 }
1405
1406 // Check for anything that looks like it will be a barrier to further
1407 // removal
1408 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1409 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1410 return std::nullopt;
1411 }
1412
1413 // If Current is known to be on path that reads DefLoc or is a read
1414 // clobber, bail out, as the path is not profitable. We skip this check
1415 // for intrinsic calls, because the code knows how to handle memcpy
1416 // intrinsics.
1417 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1418 return std::nullopt;
1419
1420 // Quick check if there are direct uses that are read-clobbers.
1421 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1422 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1423 return !MSSA.dominates(StartAccess, UseOrDef) &&
1424 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1425 return false;
1426 })) {
1427 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1428 return std::nullopt;
1429 }
1430
1431 // If Current does not have an analyzable write location or is not
1432 // removable, skip it.
1433 CurrentLoc = getLocForWrite(CurrentI);
1434 if (!CurrentLoc || !isRemovable(CurrentI)) {
1435 CanOptimize = false;
1436 continue;
1437 }
1438
1439 // AliasAnalysis does not account for loops. Limit elimination to
1440 // candidates for which we can guarantee they always store to the same
1441 // memory location and not located in different loops.
1442 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1443 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1444 CanOptimize = false;
1445 continue;
1446 }
1447
1448 if (IsMemTerm) {
1449 // If the killing def is a memory terminator (e.g. lifetime.end), check
1450 // the next candidate if the current Current does not write the same
1451 // underlying object as the terminator.
1452 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1453 CanOptimize = false;
1454 continue;
1455 }
1456 } else {
1457 int64_t KillingOffset = 0;
1458 int64_t DeadOffset = 0;
1459 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1460 KillingOffset, DeadOffset);
1461 if (CanOptimize) {
1462 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1463 // optimized access. Do not optimize if CurrentDef is already the
1464 // defining access of KillingDef.
1465 if (CurrentDef != KillingDef->getDefiningAccess() &&
1466 (OR == OW_Complete || OR == OW_MaybePartial))
1467 KillingDef->setOptimized(CurrentDef);
1468
1469 // Once a may-aliasing def is encountered do not set an optimized
1470 // access.
1471 if (OR != OW_None)
1472 CanOptimize = false;
1473 }
1474
1475 // If Current does not write to the same object as KillingDef, check
1476 // the next candidate.
1477 if (OR == OW_Unknown || OR == OW_None)
1478 continue;
1479 else if (OR == OW_MaybePartial) {
1480 // If KillingDef only partially overwrites Current, check the next
1481 // candidate if the partial step limit is exceeded. This aggressively
1482 // limits the number of candidates for partial store elimination,
1483 // which are less likely to be removable in the end.
1484 if (PartialLimit <= 1) {
1485 WalkerStepLimit -= 1;
1486 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1487 continue;
1488 }
1489 PartialLimit -= 1;
1490 }
1491 }
1492 break;
1493 };
1494
1495 // Accesses to objects accessible after the function returns can only be
1496 // eliminated if the access is dead along all paths to the exit. Collect
1497 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1498 // they cover all paths from MaybeDeadAccess to any function exit.
1500 KillingDefs.insert(KillingDef->getMemoryInst());
1501 MemoryAccess *MaybeDeadAccess = Current;
1502 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1503 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1504 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1505 << *MaybeDeadI << ")\n");
1506
1508 auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1509 for (Use &U : Acc->uses())
1510 WorkList.insert(cast<MemoryAccess>(U.getUser()));
1511 };
1512 PushMemUses(MaybeDeadAccess);
1513
1514 // Check if DeadDef may be read.
1515 for (unsigned I = 0; I < WorkList.size(); I++) {
1516 MemoryAccess *UseAccess = WorkList[I];
1517
1518 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1519 // Bail out if the number of accesses to check exceeds the scan limit.
1520 if (ScanLimit < (WorkList.size() - I)) {
1521 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1522 return std::nullopt;
1523 }
1524 --ScanLimit;
1525 NumDomMemDefChecks++;
1526
1527 if (isa<MemoryPhi>(UseAccess)) {
1528 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1529 return DT.properlyDominates(KI->getParent(),
1530 UseAccess->getBlock());
1531 })) {
1532 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1533 continue;
1534 }
1535 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1536 PushMemUses(UseAccess);
1537 continue;
1538 }
1539
1540 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1541 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1542
1543 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1544 return DT.dominates(KI, UseInst);
1545 })) {
1546 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1547 continue;
1548 }
1549
1550 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1551 // MemoryAccesses. We do not have to check it's users.
1552 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1553 LLVM_DEBUG(
1554 dbgs()
1555 << " ... skipping, memterminator invalidates following accesses\n");
1556 continue;
1557 }
1558
1559 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1560 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1561 PushMemUses(UseAccess);
1562 continue;
1563 }
1564
1565 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1566 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1567 return std::nullopt;
1568 }
1569
1570 // Uses which may read the original MemoryDef mean we cannot eliminate the
1571 // original MD. Stop walk.
1572 if (isReadClobber(MaybeDeadLoc, UseInst)) {
1573 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1574 return std::nullopt;
1575 }
1576
1577 // If this worklist walks back to the original memory access (and the
1578 // pointer is not guarenteed loop invariant) then we cannot assume that a
1579 // store kills itself.
1580 if (MaybeDeadAccess == UseAccess &&
1581 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1582 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1583 return std::nullopt;
1584 }
1585 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1586 // if it reads the memory location.
1587 // TODO: It would probably be better to check for self-reads before
1588 // calling the function.
1589 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1590 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1591 continue;
1592 }
1593
1594 // Check all uses for MemoryDefs, except for defs completely overwriting
1595 // the original location. Otherwise we have to check uses of *all*
1596 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1597 // miss cases like the following
1598 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1599 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1600 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1601 // (The Use points to the *first* Def it may alias)
1602 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1603 // stores [0,1]
1604 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1605 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1606 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1607 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1608 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1609 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1611 << " ... found killing def " << *UseInst << "\n");
1612 KillingDefs.insert(UseInst);
1613 }
1614 } else {
1616 << " ... found preceeding def " << *UseInst << "\n");
1617 return std::nullopt;
1618 }
1619 } else
1620 PushMemUses(UseDef);
1621 }
1622 }
1623
1624 // For accesses to locations visible after the function returns, make sure
1625 // that the location is dead (=overwritten) along all paths from
1626 // MaybeDeadAccess to the exit.
1627 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1628 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1629 for (Instruction *KD : KillingDefs)
1630 KillingBlocks.insert(KD->getParent());
1631 assert(!KillingBlocks.empty() &&
1632 "Expected at least a single killing block");
1633
1634 // Find the common post-dominator of all killing blocks.
1635 BasicBlock *CommonPred = *KillingBlocks.begin();
1636 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1637 if (!CommonPred)
1638 break;
1639 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1640 }
1641
1642 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1643 // there is a path from MaybeDeadAccess to an exit not going through a
1644 // killing block.
1645 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1646 if (!AnyUnreachableExit)
1647 return std::nullopt;
1648
1649 // Fall back to CFG scan starting at all non-unreachable roots if not
1650 // all paths to the exit go through CommonPred.
1651 CommonPred = nullptr;
1652 }
1653
1654 // If CommonPred itself is in the set of killing blocks, we're done.
1655 if (KillingBlocks.count(CommonPred))
1656 return {MaybeDeadAccess};
1657
1658 SetVector<BasicBlock *> WorkList;
1659 // If CommonPred is null, there are multiple exits from the function.
1660 // They all have to be added to the worklist.
1661 if (CommonPred)
1662 WorkList.insert(CommonPred);
1663 else
1664 for (BasicBlock *R : PDT.roots()) {
1665 if (!isa<UnreachableInst>(R->getTerminator()))
1666 WorkList.insert(R);
1667 }
1668
1669 NumCFGTries++;
1670 // Check if all paths starting from an exit node go through one of the
1671 // killing blocks before reaching MaybeDeadAccess.
1672 for (unsigned I = 0; I < WorkList.size(); I++) {
1673 NumCFGChecks++;
1674 BasicBlock *Current = WorkList[I];
1675 if (KillingBlocks.count(Current))
1676 continue;
1677 if (Current == MaybeDeadAccess->getBlock())
1678 return std::nullopt;
1679
1680 // MaybeDeadAccess is reachable from the entry, so we don't have to
1681 // explore unreachable blocks further.
1682 if (!DT.isReachableFromEntry(Current))
1683 continue;
1684
1685 for (BasicBlock *Pred : predecessors(Current))
1686 WorkList.insert(Pred);
1687
1688 if (WorkList.size() >= MemorySSAPathCheckLimit)
1689 return std::nullopt;
1690 }
1691 NumCFGSuccess++;
1692 }
1693
1694 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1695 // potentially dead.
1696 return {MaybeDeadAccess};
1697 }
1698
1699 /// Delete dead memory defs and recursively add their operands to ToRemove if
1700 /// they became dead.
1702 MemorySSAUpdater Updater(&MSSA);
1703 SmallVector<Instruction *, 32> NowDeadInsts;
1704 NowDeadInsts.push_back(SI);
1705 --NumFastOther;
1706
1707 while (!NowDeadInsts.empty()) {
1708 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1709 ++NumFastOther;
1710
1711 // Try to preserve debug information attached to the dead instruction.
1712 salvageDebugInfo(*DeadInst);
1713 salvageKnowledge(DeadInst);
1714
1715 // Remove the Instruction from MSSA.
1716 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1717 bool IsMemDef = MA && isa<MemoryDef>(MA);
1718 if (MA) {
1719 if (IsMemDef) {
1720 auto *MD = cast<MemoryDef>(MA);
1721 SkipStores.insert(MD);
1722 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1723 if (SI->getValueOperand()->getType()->isPointerTy()) {
1724 const Value *UO = getUnderlyingObject(SI->getValueOperand());
1725 if (CapturedBeforeReturn.erase(UO))
1726 ShouldIterateEndOfFunctionDSE = true;
1727 InvisibleToCallerAfterRet.erase(UO);
1728 }
1729 }
1730 }
1731
1732 Updater.removeMemoryAccess(MA);
1733 }
1734
1735 auto I = IOLs.find(DeadInst->getParent());
1736 if (I != IOLs.end())
1737 I->second.erase(DeadInst);
1738 // Remove its operands
1739 for (Use &O : DeadInst->operands())
1740 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1741 O.set(PoisonValue::get(O->getType()));
1742 if (isInstructionTriviallyDead(OpI, &TLI))
1743 NowDeadInsts.push_back(OpI);
1744 }
1745
1746 EI.removeInstruction(DeadInst);
1747 // Remove memory defs directly if they don't produce results, but only
1748 // queue other dead instructions for later removal. They may have been
1749 // used as memory locations that have been cached by BatchAA. Removing
1750 // them here may lead to newly created instructions to be allocated at the
1751 // same address, yielding stale cache entries.
1752 if (IsMemDef && DeadInst->getType()->isVoidTy())
1753 DeadInst->eraseFromParent();
1754 else
1755 ToRemove.push_back(DeadInst);
1756 }
1757 }
1758
1759 // Check for any extra throws between \p KillingI and \p DeadI that block
1760 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1761 // MemoryDef that may throw are handled during the walk from one def to the
1762 // next.
1763 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1764 const Value *KillingUndObj) {
1765 // First see if we can ignore it by using the fact that KillingI is an
1766 // alloca/alloca like object that is not visible to the caller during
1767 // execution of the function.
1768 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1769 return false;
1770
1771 if (KillingI->getParent() == DeadI->getParent())
1772 return ThrowingBlocks.count(KillingI->getParent());
1773 return !ThrowingBlocks.empty();
1774 }
1775
1776 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1777 // instructions act as barriers:
1778 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1779 // object.
1780 // * Atomic stores stronger that monotonic.
1781 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1782 // If DeadI may throw it acts as a barrier, unless we are to an
1783 // alloca/alloca like object that does not escape.
1784 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1785 return true;
1786
1787 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1788 // eliminate/reorder it.
1789 if (DeadI->isAtomic()) {
1790 if (auto *LI = dyn_cast<LoadInst>(DeadI))
1791 return isStrongerThanMonotonic(LI->getOrdering());
1792 if (auto *SI = dyn_cast<StoreInst>(DeadI))
1793 return isStrongerThanMonotonic(SI->getOrdering());
1794 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1795 return isStrongerThanMonotonic(ARMW->getOrdering());
1796 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1797 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1798 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1799 llvm_unreachable("other instructions should be skipped in MemorySSA");
1800 }
1801 return false;
1802 }
1803
1804 /// Eliminate writes to objects that are not visible in the caller and are not
1805 /// accessed before returning from the function.
1806 bool eliminateDeadWritesAtEndOfFunction() {
1807 bool MadeChange = false;
1808 LLVM_DEBUG(
1809 dbgs()
1810 << "Trying to eliminate MemoryDefs at the end of the function\n");
1811 do {
1812 ShouldIterateEndOfFunctionDSE = false;
1813 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1814 if (SkipStores.contains(Def))
1815 continue;
1816
1817 Instruction *DefI = Def->getMemoryInst();
1818 auto DefLoc = getLocForWrite(DefI);
1819 if (!DefLoc || !isRemovable(DefI))
1820 continue;
1821
1822 // NOTE: Currently eliminating writes at the end of a function is
1823 // limited to MemoryDefs with a single underlying object, to save
1824 // compile-time. In practice it appears the case with multiple
1825 // underlying objects is very uncommon. If it turns out to be important,
1826 // we can use getUnderlyingObjects here instead.
1827 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1828 if (!isInvisibleToCallerAfterRet(UO))
1829 continue;
1830
1831 if (isWriteAtEndOfFunction(Def)) {
1832 // See through pointer-to-pointer bitcasts
1833 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1834 "of the function\n");
1836 ++NumFastStores;
1837 MadeChange = true;
1838 }
1839 }
1840 } while (ShouldIterateEndOfFunctionDSE);
1841 return MadeChange;
1842 }
1843
1844 /// If we have a zero initializing memset following a call to malloc,
1845 /// try folding it into a call to calloc.
1846 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1847 Instruction *DefI = Def->getMemoryInst();
1848 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1849 if (!MemSet)
1850 // TODO: Could handle zero store to small allocation as well.
1851 return false;
1852 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1853 if (!StoredConstant || !StoredConstant->isNullValue())
1854 return false;
1855
1856 if (!isRemovable(DefI))
1857 // The memset might be volatile..
1858 return false;
1859
1860 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1861 F.hasFnAttribute(Attribute::SanitizeAddress) ||
1862 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1863 F.getName() == "calloc")
1864 return false;
1865 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1866 if (!Malloc)
1867 return false;
1868 auto *InnerCallee = Malloc->getCalledFunction();
1869 if (!InnerCallee)
1870 return false;
1871 LibFunc Func;
1872 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1873 Func != LibFunc_malloc)
1874 return false;
1875 // Gracefully handle malloc with unexpected memory attributes.
1876 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
1877 if (!MallocDef)
1878 return false;
1879
1880 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1881 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1882 // of malloc block
1883 auto *MallocBB = Malloc->getParent(),
1884 *MemsetBB = Memset->getParent();
1885 if (MallocBB == MemsetBB)
1886 return true;
1887 auto *Ptr = Memset->getArgOperand(0);
1888 auto *TI = MallocBB->getTerminator();
1890 BasicBlock *TrueBB, *FalseBB;
1891 if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1892 FalseBB)))
1893 return false;
1894 if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1895 return false;
1896 return true;
1897 };
1898
1899 if (Malloc->getOperand(0) != MemSet->getLength())
1900 return false;
1901 if (!shouldCreateCalloc(Malloc, MemSet) ||
1902 !DT.dominates(Malloc, MemSet) ||
1903 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1904 return false;
1905 IRBuilder<> IRB(Malloc);
1906 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
1907 auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
1908 Malloc->getArgOperand(0), IRB, TLI);
1909 if (!Calloc)
1910 return false;
1911
1912 MemorySSAUpdater Updater(&MSSA);
1913 auto *NewAccess =
1914 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
1915 MallocDef);
1916 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1917 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1918 Malloc->replaceAllUsesWith(Calloc);
1920 return true;
1921 }
1922
1923 // Check if there is a dominating condition, that implies that the value
1924 // being stored in a ptr is already present in the ptr.
1925 bool dominatingConditionImpliesValue(MemoryDef *Def) {
1926 auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
1927 BasicBlock *StoreBB = StoreI->getParent();
1928 Value *StorePtr = StoreI->getPointerOperand();
1929 Value *StoreVal = StoreI->getValueOperand();
1930
1931 DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
1932 if (!IDom)
1933 return false;
1934
1935 auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
1936 if (!BI || !BI->isConditional())
1937 return false;
1938
1939 // In case both blocks are the same, it is not possible to determine
1940 // if optimization is possible. (We would not want to optimize a store
1941 // in the FalseBB if condition is true and vice versa.)
1942 if (BI->getSuccessor(0) == BI->getSuccessor(1))
1943 return false;
1944
1945 Instruction *ICmpL;
1947 if (!match(BI->getCondition(),
1948 m_c_ICmp(Pred,
1949 m_CombineAnd(m_Load(m_Specific(StorePtr)),
1950 m_Instruction(ICmpL)),
1951 m_Specific(StoreVal))) ||
1952 !ICmpInst::isEquality(Pred))
1953 return false;
1954
1955 // In case the else blocks also branches to the if block or the other way
1956 // around it is not possible to determine if the optimization is possible.
1957 if (Pred == ICmpInst::ICMP_EQ &&
1958 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
1959 StoreBB))
1960 return false;
1961
1962 if (Pred == ICmpInst::ICMP_NE &&
1963 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
1964 StoreBB))
1965 return false;
1966
1967 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
1968 MemoryAccess *ClobAcc =
1969 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
1970
1971 return MSSA.dominates(ClobAcc, LoadAcc);
1972 }
1973
1974 /// \returns true if \p Def is a no-op store, either because it
1975 /// directly stores back a loaded value or stores zero to a calloced object.
1976 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1977 Instruction *DefI = Def->getMemoryInst();
1978 StoreInst *Store = dyn_cast<StoreInst>(DefI);
1979 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1980 Constant *StoredConstant = nullptr;
1981 if (Store)
1982 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1983 else if (MemSet)
1984 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1985 else
1986 return false;
1987
1988 if (!isRemovable(DefI))
1989 return false;
1990
1991 if (StoredConstant) {
1992 Constant *InitC =
1993 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1994 // If the clobbering access is LiveOnEntry, no instructions between them
1995 // can modify the memory location.
1996 if (InitC && InitC == StoredConstant)
1997 return MSSA.isLiveOnEntryDef(
1998 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
1999 }
2000
2001 if (!Store)
2002 return false;
2003
2004 if (dominatingConditionImpliesValue(Def))
2005 return true;
2006
2007 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2008 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2009 // Get the defining access for the load.
2010 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2011 // Fast path: the defining accesses are the same.
2012 if (LoadAccess == Def->getDefiningAccess())
2013 return true;
2014
2015 // Look through phi accesses. Recursively scan all phi accesses by
2016 // adding them to a worklist. Bail when we run into a memory def that
2017 // does not match LoadAccess.
2019 MemoryAccess *Current =
2020 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2021 // We don't want to bail when we run into the store memory def. But,
2022 // the phi access may point to it. So, pretend like we've already
2023 // checked it.
2024 ToCheck.insert(Def);
2025 ToCheck.insert(Current);
2026 // Start at current (1) to simulate already having checked Def.
2027 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2028 Current = ToCheck[I];
2029 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2030 // Check all the operands.
2031 for (auto &Use : PhiAccess->incoming_values())
2032 ToCheck.insert(cast<MemoryAccess>(&Use));
2033 continue;
2034 }
2035
2036 // If we found a memory def, bail. This happens when we have an
2037 // unrelated write in between an otherwise noop store.
2038 assert(isa<MemoryDef>(Current) &&
2039 "Only MemoryDefs should reach here.");
2040 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2041 // We are searching for the definition of the store's destination.
2042 // So, if that is the same definition as the load, then this is a
2043 // noop. Otherwise, fail.
2044 if (LoadAccess != Current)
2045 return false;
2046 }
2047 return true;
2048 }
2049 }
2050
2051 return false;
2052 }
2053
2054 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2055 bool Changed = false;
2056 for (auto OI : IOL) {
2057 Instruction *DeadI = OI.first;
2058 MemoryLocation Loc = *getLocForWrite(DeadI);
2059 assert(isRemovable(DeadI) && "Expect only removable instruction");
2060
2061 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2062 int64_t DeadStart = 0;
2063 uint64_t DeadSize = Loc.Size.getValue();
2064 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2065 OverlapIntervalsTy &IntervalMap = OI.second;
2066 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2067 if (IntervalMap.empty())
2068 continue;
2069 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2070 }
2071 return Changed;
2072 }
2073
2074 /// Eliminates writes to locations where the value that is being written
2075 /// is already stored at the same location.
2076 bool eliminateRedundantStoresOfExistingValues() {
2077 bool MadeChange = false;
2078 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2079 "already existing value\n");
2080 for (auto *Def : MemDefs) {
2081 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2082 continue;
2083
2084 Instruction *DefInst = Def->getMemoryInst();
2085 auto MaybeDefLoc = getLocForWrite(DefInst);
2086 if (!MaybeDefLoc || !isRemovable(DefInst))
2087 continue;
2088
2089 MemoryDef *UpperDef;
2090 // To conserve compile-time, we avoid walking to the next clobbering def.
2091 // Instead, we just try to get the optimized access, if it exists. DSE
2092 // will try to optimize defs during the earlier traversal.
2093 if (Def->isOptimized())
2094 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2095 else
2096 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2097 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2098 continue;
2099
2100 Instruction *UpperInst = UpperDef->getMemoryInst();
2101 auto IsRedundantStore = [&]() {
2102 if (DefInst->isIdenticalTo(UpperInst))
2103 return true;
2104 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2105 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2106 // MemSetInst must have a write location.
2107 MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
2108 int64_t InstWriteOffset = 0;
2109 int64_t DepWriteOffset = 0;
2110 auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
2111 InstWriteOffset, DepWriteOffset);
2112 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2113 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2114 OR == OW_Complete;
2115 }
2116 }
2117 return false;
2118 };
2119
2120 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2121 continue;
2122 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2123 << '\n');
2124 deleteDeadInstruction(DefInst);
2125 NumRedundantStores++;
2126 MadeChange = true;
2127 }
2128 return MadeChange;
2129 }
2130};
2131
2132static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2134 const TargetLibraryInfo &TLI,
2135 const LoopInfo &LI) {
2136 bool MadeChange = false;
2137
2138 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2139 // For each store:
2140 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2141 MemoryDef *KillingDef = State.MemDefs[I];
2142 if (State.SkipStores.count(KillingDef))
2143 continue;
2144 Instruction *KillingI = KillingDef->getMemoryInst();
2145
2146 std::optional<MemoryLocation> MaybeKillingLoc;
2147 if (State.isMemTerminatorInst(KillingI)) {
2148 if (auto KillingLoc = State.getLocForTerminator(KillingI))
2149 MaybeKillingLoc = KillingLoc->first;
2150 } else {
2151 MaybeKillingLoc = State.getLocForWrite(KillingI);
2152 }
2153
2154 if (!MaybeKillingLoc) {
2155 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2156 << *KillingI << "\n");
2157 continue;
2158 }
2159 MemoryLocation KillingLoc = *MaybeKillingLoc;
2160 assert(KillingLoc.Ptr && "KillingLoc should not be null");
2161 const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
2162 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2163 << *KillingDef << " (" << *KillingI << ")\n");
2164
2165 unsigned ScanLimit = MemorySSAScanLimit;
2166 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2167 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2168 // Worklist of MemoryAccesses that may be killed by KillingDef.
2170 ToCheck.insert(KillingDef->getDefiningAccess());
2171
2172 bool Shortend = false;
2173 bool IsMemTerm = State.isMemTerminatorInst(KillingI);
2174 // Check if MemoryAccesses in the worklist are killed by KillingDef.
2175 for (unsigned I = 0; I < ToCheck.size(); I++) {
2176 MemoryAccess *Current = ToCheck[I];
2177 if (State.SkipStores.count(Current))
2178 continue;
2179
2180 std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2181 KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
2182 WalkerStepLimit, IsMemTerm, PartialLimit);
2183
2184 if (!MaybeDeadAccess) {
2185 LLVM_DEBUG(dbgs() << " finished walk\n");
2186 continue;
2187 }
2188
2189 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2190 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2191 if (isa<MemoryPhi>(DeadAccess)) {
2192 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2193 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2194 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2195 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2196 BasicBlock *PhiBlock = DeadAccess->getBlock();
2197
2198 // We only consider incoming MemoryAccesses that come before the
2199 // MemoryPhi. Otherwise we could discover candidates that do not
2200 // strictly dominate our starting def.
2201 if (State.PostOrderNumbers[IncomingBlock] >
2202 State.PostOrderNumbers[PhiBlock])
2203 ToCheck.insert(IncomingAccess);
2204 }
2205 continue;
2206 }
2207 auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2208 Instruction *DeadI = DeadDefAccess->getMemoryInst();
2209 LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2210 ToCheck.insert(DeadDefAccess->getDefiningAccess());
2211 NumGetDomMemoryDefPassed++;
2212
2213 if (!DebugCounter::shouldExecute(MemorySSACounter))
2214 continue;
2215
2216 MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2217
2218 if (IsMemTerm) {
2219 const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2220 if (KillingUndObj != DeadUndObj)
2221 continue;
2222 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2223 << "\n KILLER: " << *KillingI << '\n');
2224 State.deleteDeadInstruction(DeadI);
2225 ++NumFastStores;
2226 MadeChange = true;
2227 } else {
2228 // Check if DeadI overwrites KillingI.
2229 int64_t KillingOffset = 0;
2230 int64_t DeadOffset = 0;
2231 OverwriteResult OR = State.isOverwrite(
2232 KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2233 if (OR == OW_MaybePartial) {
2234 auto Iter = State.IOLs.insert(
2235 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2236 DeadI->getParent(), InstOverlapIntervalsTy()));
2237 auto &IOL = Iter.first->second;
2238 OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2239 DeadOffset, DeadI, IOL);
2240 }
2241
2242 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2243 auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2244 auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2245 // We are re-using tryToMergePartialOverlappingStores, which requires
2246 // DeadSI to dominate KillingSI.
2247 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2248 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2250 KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2251 State.BatchAA, &DT)) {
2252
2253 // Update stored value of earlier store to merged constant.
2254 DeadSI->setOperand(0, Merged);
2255 ++NumModifiedStores;
2256 MadeChange = true;
2257
2258 Shortend = true;
2259 // Remove killing store and remove any outstanding overlap
2260 // intervals for the updated store.
2261 State.deleteDeadInstruction(KillingSI);
2262 auto I = State.IOLs.find(DeadSI->getParent());
2263 if (I != State.IOLs.end())
2264 I->second.erase(DeadSI);
2265 break;
2266 }
2267 }
2268 }
2269
2270 if (OR == OW_Complete) {
2271 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2272 << "\n KILLER: " << *KillingI << '\n');
2273 State.deleteDeadInstruction(DeadI);
2274 ++NumFastStores;
2275 MadeChange = true;
2276 }
2277 }
2278 }
2279
2280 // Check if the store is a no-op.
2281 if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2282 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2283 << '\n');
2284 State.deleteDeadInstruction(KillingI);
2285 NumRedundantStores++;
2286 MadeChange = true;
2287 continue;
2288 }
2289
2290 // Can we form a calloc from a memset/malloc pair?
2291 if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2292 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2293 << " DEAD: " << *KillingI << '\n');
2294 State.deleteDeadInstruction(KillingI);
2295 MadeChange = true;
2296 continue;
2297 }
2298 }
2299
2301 for (auto &KV : State.IOLs)
2302 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2303
2304 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2305 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2306
2307 while (!State.ToRemove.empty()) {
2308 Instruction *DeadInst = State.ToRemove.pop_back_val();
2309 DeadInst->eraseFromParent();
2310 }
2311
2312 return MadeChange;
2313}
2314} // end anonymous namespace
2315
2316//===----------------------------------------------------------------------===//
2317// DSE Pass
2318//===----------------------------------------------------------------------===//
2323 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2325 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2326
2327 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2328
2329#ifdef LLVM_ENABLE_STATS
2331 for (auto &I : instructions(F))
2332 NumRemainingStores += isa<StoreInst>(&I);
2333#endif
2334
2335 if (!Changed)
2336 return PreservedAnalyses::all();
2337
2341 PA.preserve<LoopAnalysis>();
2342 return PA;
2343}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefAnalysis InstSet & ToRemove
Expand Atomic instructions
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
The possible results of an alias query.
Definition: AliasAnalysis.h:81
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:99
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
void removeInstruction(Instruction *I)
const BasicBlock & getEntryBlock() const
Definition: Function.h:782
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Create an "inbounds" getelementptr.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
const_iterator begin() const
Definition: IntervalMap.h:1146
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1101
const_iterator end() const
Definition: IntervalMap.h:1158
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
iterator find(const KeyT &Key)
Definition: MapVector.h:167
Value * getLength() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1040
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1560
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2109
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1547
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:62
Value * getAddr() const
Definition: PHITransAddr.h:58
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
iterator begin() const
Definition: SmallPtrSet.h:380
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Value * getValueOperand()
Definition: Instructions.h:414
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:765
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:163
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:240
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate, true > m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:561
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1787
SmallVector< DPValue * > getDPVAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:234
bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DbgAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
Definition: DebugInfo.cpp:2010
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1724
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1581
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< po_iterator< T > > post_order(const T &G)
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2014
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Holds the characteristics of one fragment of a larger variable.
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.