LLVM 19.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/BasicBlock.h"
54#include "llvm/IR/Constant.h"
55#include "llvm/IR/Constants.h"
56#include "llvm/IR/DataLayout.h"
57#include "llvm/IR/DebugInfo.h"
58#include "llvm/IR/Dominators.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/InstrTypes.h"
63#include "llvm/IR/Instruction.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/PassManager.h"
69#include "llvm/IR/Value.h"
72#include "llvm/Support/Debug.h"
79#include <algorithm>
80#include <cassert>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <utility>
86
87using namespace llvm;
88using namespace PatternMatch;
89
90#define DEBUG_TYPE "dse"
91
92STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94STATISTIC(NumFastStores, "Number of stores deleted");
95STATISTIC(NumFastOther, "Number of other instrs removed");
96STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97STATISTIC(NumModifiedStores, "Number of stores modified");
98STATISTIC(NumCFGChecks, "Number of stores modified");
99STATISTIC(NumCFGTries, "Number of stores modified");
100STATISTIC(NumCFGSuccess, "Number of stores modified");
101STATISTIC(NumGetDomMemoryDefPassed,
102 "Number of times a valid candidate is returned from getDomMemoryDef");
103STATISTIC(NumDomMemDefChecks,
104 "Number iterations check for reads in getDomMemoryDef");
105
106DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107 "Controls which MemoryDefs are eliminated.");
108
109static cl::opt<bool>
110EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111 cl::init(true), cl::Hidden,
112 cl::desc("Enable partial-overwrite tracking in DSE"));
113
114static cl::opt<bool>
115EnablePartialStoreMerging("enable-dse-partial-store-merging",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial store merging in DSE"));
118
120 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
121 cl::desc("The number of memory instructions to scan for "
122 "dead store elimination (default = 150)"));
124 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
125 cl::desc("The maximum number of steps while walking upwards to find "
126 "MemoryDefs that may be killed (default = 90)"));
127
129 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
130 cl::desc("The maximum number candidates that only partially overwrite the "
131 "killing MemoryDef to consider"
132 " (default = 5)"));
133
135 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
136 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
137 "other stores per basic block (default = 5000)"));
138
140 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
141 cl::desc(
142 "The cost of a step in the same basic block as the killing MemoryDef"
143 "(default = 1)"));
144
146 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
148 cl::desc("The cost of a step in a different basic "
149 "block than the killing MemoryDef"
150 "(default = 5)"));
151
153 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
154 cl::desc("The maximum number of blocks to check when trying to prove that "
155 "all paths to an exit go through a killing block (default = 50)"));
156
157// This flags allows or disallows DSE to optimize MemorySSA during its
158// traversal. Note that DSE optimizing MemorySSA may impact other passes
159// downstream of the DSE invocation and can lead to issues not being
160// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162// impact follow-up passes.
163static cl::opt<bool>
164 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
165 cl::desc("Allow DSE to optimize memory accesses."));
166
167//===----------------------------------------------------------------------===//
168// Helper functions
169//===----------------------------------------------------------------------===//
170using OverlapIntervalsTy = std::map<int64_t, int64_t>;
172
173/// Returns true if the end of this instruction can be safely shortened in
174/// length.
176 // Don't shorten stores for now
177 if (isa<StoreInst>(I))
178 return false;
179
180 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
181 switch (II->getIntrinsicID()) {
182 default: return false;
183 case Intrinsic::memset:
184 case Intrinsic::memcpy:
185 case Intrinsic::memcpy_element_unordered_atomic:
186 case Intrinsic::memset_element_unordered_atomic:
187 // Do shorten memory intrinsics.
188 // FIXME: Add memmove if it's also safe to transform.
189 return true;
190 }
191 }
192
193 // Don't shorten libcalls calls for now.
194
195 return false;
196}
197
198/// Returns true if the beginning of this instruction can be safely shortened
199/// in length.
201 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
202 // easily done by offsetting the source address.
203 return isa<AnyMemSetInst>(I);
204}
205
206static std::optional<TypeSize> getPointerSize(const Value *V,
207 const DataLayout &DL,
208 const TargetLibraryInfo &TLI,
209 const Function *F) {
211 ObjectSizeOpts Opts;
213
214 if (getObjectSize(V, Size, DL, &TLI, Opts))
215 return TypeSize::getFixed(Size);
216 return std::nullopt;
217}
218
219namespace {
220
221enum OverwriteResult {
222 OW_Begin,
223 OW_Complete,
224 OW_End,
225 OW_PartialEarlierWithFullLater,
226 OW_MaybePartial,
227 OW_None,
228 OW_Unknown
229};
230
231} // end anonymous namespace
232
233/// Check if two instruction are masked stores that completely
234/// overwrite one another. More specifically, \p KillingI has to
235/// overwrite \p DeadI.
236static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
237 const Instruction *DeadI,
238 BatchAAResults &AA) {
239 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
240 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
241 if (KillingII == nullptr || DeadII == nullptr)
242 return OW_Unknown;
243 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
244 return OW_Unknown;
245 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
246 // Type size.
247 VectorType *KillingTy =
248 cast<VectorType>(KillingII->getArgOperand(0)->getType());
249 VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
250 if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251 return OW_Unknown;
252 // Element count.
253 if (KillingTy->getElementCount() != DeadTy->getElementCount())
254 return OW_Unknown;
255 // Pointers.
256 Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
257 Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
258 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
259 return OW_Unknown;
260 // Masks.
261 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
262 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
263 return OW_Unknown;
264 return OW_Complete;
265 }
266 return OW_Unknown;
267}
268
269/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
270/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
271/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
272/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
273/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
274/// overwritten by a killing (smaller) store which doesn't write outside the big
275/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
276/// NOTE: This function must only be called if both \p KillingLoc and \p
277/// DeadLoc belong to the same underlying object with valid \p KillingOff and
278/// \p DeadOff.
279static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
280 const MemoryLocation &DeadLoc,
281 int64_t KillingOff, int64_t DeadOff,
282 Instruction *DeadI,
284 const uint64_t KillingSize = KillingLoc.Size.getValue();
285 const uint64_t DeadSize = DeadLoc.Size.getValue();
286 // We may now overlap, although the overlap is not complete. There might also
287 // be other incomplete overlaps, and together, they might cover the complete
288 // dead store.
289 // Note: The correctness of this logic depends on the fact that this function
290 // is not even called providing DepWrite when there are any intervening reads.
292 KillingOff < int64_t(DeadOff + DeadSize) &&
293 int64_t(KillingOff + KillingSize) >= DeadOff) {
294
295 // Insert our part of the overlap into the map.
296 auto &IM = IOL[DeadI];
297 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
298 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
299 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
300 << ")\n");
301
302 // Make sure that we only insert non-overlapping intervals and combine
303 // adjacent intervals. The intervals are stored in the map with the ending
304 // offset as the key (in the half-open sense) and the starting offset as
305 // the value.
306 int64_t KillingIntStart = KillingOff;
307 int64_t KillingIntEnd = KillingOff + KillingSize;
308
309 // Find any intervals ending at, or after, KillingIntStart which start
310 // before KillingIntEnd.
311 auto ILI = IM.lower_bound(KillingIntStart);
312 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
313 // This existing interval is overlapped with the current store somewhere
314 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
315 // intervals and adjusting our start and end.
316 KillingIntStart = std::min(KillingIntStart, ILI->second);
317 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
318 ILI = IM.erase(ILI);
319
320 // Continue erasing and adjusting our end in case other previous
321 // intervals are also overlapped with the current store.
322 //
323 // |--- dead 1 ---| |--- dead 2 ---|
324 // |------- killing---------|
325 //
326 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
327 assert(ILI->second > KillingIntStart && "Unexpected interval");
328 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
329 ILI = IM.erase(ILI);
330 }
331 }
332
333 IM[KillingIntEnd] = KillingIntStart;
334
335 ILI = IM.begin();
336 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
337 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
338 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
339 << ") Composite KillingLoc [" << ILI->second << ", "
340 << ILI->first << ")\n");
341 ++NumCompletePartials;
342 return OW_Complete;
343 }
344 }
345
346 // Check for a dead store which writes to all the memory locations that
347 // the killing store writes to.
348 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
349 int64_t(DeadOff + DeadSize) > KillingOff &&
350 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
351 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
352 << ", " << int64_t(DeadOff + DeadSize)
353 << ") by a killing store [" << KillingOff << ", "
354 << int64_t(KillingOff + KillingSize) << ")\n");
355 // TODO: Maybe come up with a better name?
356 return OW_PartialEarlierWithFullLater;
357 }
358
359 // Another interesting case is if the killing store overwrites the end of the
360 // dead store.
361 //
362 // |--dead--|
363 // |-- killing --|
364 //
365 // In this case we may want to trim the size of dead store to avoid
366 // generating stores to addresses which will definitely be overwritten killing
367 // store.
369 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
370 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
371 return OW_End;
372
373 // Finally, we also need to check if the killing store overwrites the
374 // beginning of the dead store.
375 //
376 // |--dead--|
377 // |-- killing --|
378 //
379 // In this case we may want to move the destination address and trim the size
380 // of dead store to avoid generating stores to addresses which will definitely
381 // be overwritten killing store.
383 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
384 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
385 "Expect to be handled as OW_Complete");
386 return OW_Begin;
387 }
388 // Otherwise, they don't completely overlap.
389 return OW_Unknown;
390}
391
392/// Returns true if the memory which is accessed by the second instruction is not
393/// modified between the first and the second instruction.
394/// Precondition: Second instruction must be dominated by the first
395/// instruction.
396static bool
398 BatchAAResults &AA, const DataLayout &DL,
399 DominatorTree *DT) {
400 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
401 // instructions which can modify the memory location accessed by SecondI.
402 //
403 // While doing the walk keep track of the address to check. It might be
404 // different in different basic blocks due to PHI translation.
405 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
407 // Keep track of the address we visited each block with. Bail out if we
408 // visit a block with different addresses.
410
411 BasicBlock::iterator FirstBBI(FirstI);
412 ++FirstBBI;
413 BasicBlock::iterator SecondBBI(SecondI);
414 BasicBlock *FirstBB = FirstI->getParent();
415 BasicBlock *SecondBB = SecondI->getParent();
416 MemoryLocation MemLoc;
417 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
418 MemLoc = MemoryLocation::getForDest(MemSet);
419 else
420 MemLoc = MemoryLocation::get(SecondI);
421
422 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
423
424 // Start checking the SecondBB.
425 WorkList.push_back(
426 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
427 bool isFirstBlock = true;
428
429 // Check all blocks going backward until we reach the FirstBB.
430 while (!WorkList.empty()) {
431 BlockAddressPair Current = WorkList.pop_back_val();
432 BasicBlock *B = Current.first;
433 PHITransAddr &Addr = Current.second;
434 Value *Ptr = Addr.getAddr();
435
436 // Ignore instructions before FirstI if this is the FirstBB.
437 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
438
440 if (isFirstBlock) {
441 // Ignore instructions after SecondI if this is the first visit of SecondBB.
442 assert(B == SecondBB && "first block is not the store block");
443 EI = SecondBBI;
444 isFirstBlock = false;
445 } else {
446 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
447 // In this case we also have to look at instructions after SecondI.
448 EI = B->end();
449 }
450 for (; BI != EI; ++BI) {
451 Instruction *I = &*BI;
452 if (I->mayWriteToMemory() && I != SecondI)
453 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
454 return false;
455 }
456 if (B != FirstBB) {
457 assert(B != &FirstBB->getParent()->getEntryBlock() &&
458 "Should not hit the entry block because SI must be dominated by LI");
459 for (BasicBlock *Pred : predecessors(B)) {
460 PHITransAddr PredAddr = Addr;
461 if (PredAddr.needsPHITranslationFromBlock(B)) {
462 if (!PredAddr.isPotentiallyPHITranslatable())
463 return false;
464 if (!PredAddr.translateValue(B, Pred, DT, false))
465 return false;
466 }
467 Value *TranslatedPtr = PredAddr.getAddr();
468 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
469 if (!Inserted.second) {
470 // We already visited this block before. If it was with a different
471 // address - bail out!
472 if (TranslatedPtr != Inserted.first->second)
473 return false;
474 // ... otherwise just skip it.
475 continue;
476 }
477 WorkList.push_back(std::make_pair(Pred, PredAddr));
478 }
479 }
480 }
481 return true;
482}
483
484static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
485 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
486 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
487 const DataLayout &DL = Inst->getModule()->getDataLayout();
488 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
489 uint64_t DeadSliceOffsetInBits =
490 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
491 auto SetDeadFragExpr = [](auto *Assign,
492 DIExpression::FragmentInfo DeadFragment) {
493 // createFragmentExpression expects an offset relative to the existing
494 // fragment offset if there is one.
495 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
496 Assign->getExpression()
497 ->getFragmentInfo()
498 .value_or(DIExpression::FragmentInfo(0, 0))
499 .OffsetInBits;
501 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
502 Assign->setExpression(*NewExpr);
503 return;
504 }
505 // Failed to create a fragment expression for this so discard the value,
506 // making this a kill location.
508 DIExpression::get(Assign->getContext(), std::nullopt),
509 DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
510 Assign->setExpression(Expr);
511 Assign->setKillLocation();
512 };
513
514 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
515 // link to any instructions. Created in the loop below (once).
516 DIAssignID *LinkToNothing = nullptr;
517 LLVMContext &Ctx = Inst->getContext();
518 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
519 if (!LinkToNothing)
520 LinkToNothing = DIAssignID::getDistinct(Ctx);
521 return LinkToNothing;
522 };
523
524 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
525 // overlapping dbg.assign intrinsic. The loop invalidates the iterators
526 // returned by getAssignmentMarkers so save a copy of the markers to iterate
527 // over.
528 auto LinkedRange = at::getAssignmentMarkers(Inst);
529 SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
531 SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
532 LinkedRange.end());
533 auto InsertAssignForOverlap = [&](auto *Assign) {
534 std::optional<DIExpression::FragmentInfo> NewFragment;
535 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
536 DeadSliceSizeInBits, Assign,
537 NewFragment) ||
538 !NewFragment) {
539 // We couldn't calculate the intersecting fragment for some reason. Be
540 // cautious and unlink the whole assignment from the store.
541 Assign->setKillAddress();
542 Assign->setAssignId(GetDeadLink());
543 return;
544 }
545 // No intersect.
546 if (NewFragment->SizeInBits == 0)
547 return;
548
549 // Fragments overlap: insert a new dbg.assign for this dead part.
550 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
551 NewAssign->insertAfter(Assign);
552 NewAssign->setAssignId(GetDeadLink());
553 if (NewFragment)
554 SetDeadFragExpr(NewAssign, *NewFragment);
555 NewAssign->setKillAddress();
556 };
557 for_each(Linked, InsertAssignForOverlap);
558 for_each(LinkedDVRAssigns, InsertAssignForOverlap);
559}
560
561static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
562 uint64_t &DeadSize, int64_t KillingStart,
563 uint64_t KillingSize, bool IsOverwriteEnd) {
564 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
565 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
566
567 // We assume that memet/memcpy operates in chunks of the "largest" native
568 // type size and aligned on the same value. That means optimal start and size
569 // of memset/memcpy should be modulo of preferred alignment of that type. That
570 // is it there is no any sense in trying to reduce store size any further
571 // since any "extra" stores comes for free anyway.
572 // On the other hand, maximum alignment we can achieve is limited by alignment
573 // of initial store.
574
575 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
576 // "largest" native type.
577 // Note: What is the proper way to get that value?
578 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
579 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
580
581 int64_t ToRemoveStart = 0;
582 uint64_t ToRemoveSize = 0;
583 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
584 // maintained on the remaining store.
585 if (IsOverwriteEnd) {
586 // Calculate required adjustment for 'KillingStart' in order to keep
587 // remaining store size aligned on 'PerfAlign'.
588 uint64_t Off =
589 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
590 ToRemoveStart = KillingStart + Off;
591 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
592 return false;
593 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
594 } else {
595 ToRemoveStart = DeadStart;
596 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
597 "Not overlapping accesses?");
598 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
599 // Calculate required adjustment for 'ToRemoveSize'in order to keep
600 // start of the remaining store aligned on 'PerfAlign'.
601 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
602 if (Off != 0) {
603 if (ToRemoveSize <= (PrefAlign.value() - Off))
604 return false;
605 ToRemoveSize -= PrefAlign.value() - Off;
606 }
607 assert(isAligned(PrefAlign, ToRemoveSize) &&
608 "Should preserve selected alignment");
609 }
610
611 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
612 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
613
614 uint64_t NewSize = DeadSize - ToRemoveSize;
615 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
616 // When shortening an atomic memory intrinsic, the newly shortened
617 // length must remain an integer multiple of the element size.
618 const uint32_t ElementSize = AMI->getElementSizeInBytes();
619 if (0 != NewSize % ElementSize)
620 return false;
621 }
622
623 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
624 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
625 << "\n KILLER [" << ToRemoveStart << ", "
626 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
627
628 Value *DeadWriteLength = DeadIntrinsic->getLength();
629 Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
630 DeadIntrinsic->setLength(TrimmedLength);
631 DeadIntrinsic->setDestAlignment(PrefAlign);
632
633 Value *OrigDest = DeadIntrinsic->getRawDest();
634 if (!IsOverwriteEnd) {
635 Value *Indices[1] = {
636 ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
638 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
639 DeadI->getIterator());
640 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
641 DeadIntrinsic->setDest(NewDestGEP);
642 }
643
644 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
645 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
646 IsOverwriteEnd);
647
648 // Finally update start and size of dead access.
649 if (!IsOverwriteEnd)
650 DeadStart += ToRemoveSize;
651 DeadSize = NewSize;
652
653 return true;
654}
655
657 int64_t &DeadStart, uint64_t &DeadSize) {
658 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
659 return false;
660
661 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
662 int64_t KillingStart = OII->second;
663 uint64_t KillingSize = OII->first - KillingStart;
664
665 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
666
667 if (KillingStart > DeadStart &&
668 // Note: "KillingStart - KillingStart" is known to be positive due to
669 // preceding check.
670 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
671 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
672 // be non negative due to preceding checks.
673 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
674 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
675 true)) {
676 IntervalMap.erase(OII);
677 return true;
678 }
679 }
680 return false;
681}
682
685 int64_t &DeadStart, uint64_t &DeadSize) {
687 return false;
688
689 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
690 int64_t KillingStart = OII->second;
691 uint64_t KillingSize = OII->first - KillingStart;
692
693 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
694
695 if (KillingStart <= DeadStart &&
696 // Note: "DeadStart - KillingStart" is known to be non negative due to
697 // preceding check.
698 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
699 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
700 // be positive due to preceding checks.
701 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
702 "Should have been handled as OW_Complete");
703 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
704 false)) {
705 IntervalMap.erase(OII);
706 return true;
707 }
708 }
709 return false;
710}
711
712static Constant *
714 int64_t KillingOffset, int64_t DeadOffset,
715 const DataLayout &DL, BatchAAResults &AA,
716 DominatorTree *DT) {
717
718 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
719 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
720 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
721 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
722 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
723 // If the store we find is:
724 // a) partially overwritten by the store to 'Loc'
725 // b) the killing store is fully contained in the dead one and
726 // c) they both have a constant value
727 // d) none of the two stores need padding
728 // Merge the two stores, replacing the dead store's value with a
729 // merge of both values.
730 // TODO: Deal with other constant types (vectors, etc), and probably
731 // some mem intrinsics (if needed)
732
733 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
734 APInt KillingValue =
735 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
736 unsigned KillingBits = KillingValue.getBitWidth();
737 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
738 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
739
740 // Offset of the smaller store inside the larger store
741 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
742 unsigned LShiftAmount =
743 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
744 : BitOffsetDiff;
745 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
746 LShiftAmount + KillingBits);
747 // Clear the bits we'll be replacing, then OR with the smaller
748 // store, shifted appropriately.
749 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
750 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
751 << "\n Killing: " << *KillingI
752 << "\n Merged Value: " << Merged << '\n');
753 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
754 }
755 return nullptr;
756}
757
758namespace {
759// Returns true if \p I is an intrinsic that does not read or write memory.
760bool isNoopIntrinsic(Instruction *I) {
761 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
762 switch (II->getIntrinsicID()) {
763 case Intrinsic::lifetime_start:
764 case Intrinsic::lifetime_end:
765 case Intrinsic::invariant_end:
766 case Intrinsic::launder_invariant_group:
767 case Intrinsic::assume:
768 return true;
769 case Intrinsic::dbg_declare:
770 case Intrinsic::dbg_label:
771 case Intrinsic::dbg_value:
772 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
773 default:
774 return false;
775 }
776 }
777 return false;
778}
779
780// Check if we can ignore \p D for DSE.
781bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
782 Instruction *DI = D->getMemoryInst();
783 // Calls that only access inaccessible memory cannot read or write any memory
784 // locations we consider for elimination.
785 if (auto *CB = dyn_cast<CallBase>(DI))
786 if (CB->onlyAccessesInaccessibleMemory())
787 return true;
788
789 // We can eliminate stores to locations not visible to the caller across
790 // throwing instructions.
791 if (DI->mayThrow() && !DefVisibleToCaller)
792 return true;
793
794 // We can remove the dead stores, irrespective of the fence and its ordering
795 // (release/acquire/seq_cst). Fences only constraints the ordering of
796 // already visible stores, it does not make a store visible to other
797 // threads. So, skipping over a fence does not change a store from being
798 // dead.
799 if (isa<FenceInst>(DI))
800 return true;
801
802 // Skip intrinsics that do not really read or modify memory.
803 if (isNoopIntrinsic(DI))
804 return true;
805
806 return false;
807}
808
809struct DSEState {
810 Function &F;
811 AliasAnalysis &AA;
813
814 /// The single BatchAA instance that is used to cache AA queries. It will
815 /// not be invalidated over the whole run. This is safe, because:
816 /// 1. Only memory writes are removed, so the alias cache for memory
817 /// locations remains valid.
818 /// 2. No new instructions are added (only instructions removed), so cached
819 /// information for a deleted value cannot be accessed by a re-used new
820 /// value pointer.
821 BatchAAResults BatchAA;
822
823 MemorySSA &MSSA;
824 DominatorTree &DT;
826 const TargetLibraryInfo &TLI;
827 const DataLayout &DL;
828 const LoopInfo &LI;
829
830 // Whether the function contains any irreducible control flow, useful for
831 // being accurately able to detect loops.
832 bool ContainsIrreducibleLoops;
833
834 // All MemoryDefs that potentially could kill other MemDefs.
836 // Any that should be skipped as they are already deleted
838 // Keep track whether a given object is captured before return or not.
839 DenseMap<const Value *, bool> CapturedBeforeReturn;
840 // Keep track of all of the objects that are invisible to the caller after
841 // the function returns.
842 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
843 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
844 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
845 // Post-order numbers for each basic block. Used to figure out if memory
846 // accesses are executed before another access.
847 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
848
849 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
850 /// basic block.
852 // Check if there are root nodes that are terminated by UnreachableInst.
853 // Those roots pessimize post-dominance queries. If there are such roots,
854 // fall back to CFG scan starting from all non-unreachable roots.
855 bool AnyUnreachableExit;
856
857 // Whether or not we should iterate on removing dead stores at the end of the
858 // function due to removing a store causing a previously captured pointer to
859 // no longer be captured.
860 bool ShouldIterateEndOfFunctionDSE;
861
862 /// Dead instructions to be removed at the end of DSE.
864
865 // Class contains self-reference, make sure it's not copied/moved.
866 DSEState(const DSEState &) = delete;
867 DSEState &operator=(const DSEState &) = delete;
868
869 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
870 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
871 const LoopInfo &LI)
872 : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
873 PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
874 // Collect blocks with throwing instructions not modeled in MemorySSA and
875 // alloc-like objects.
876 unsigned PO = 0;
877 for (BasicBlock *BB : post_order(&F)) {
878 PostOrderNumbers[BB] = PO++;
879 for (Instruction &I : *BB) {
880 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
881 if (I.mayThrow() && !MA)
882 ThrowingBlocks.insert(I.getParent());
883
884 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
885 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
886 (getLocForWrite(&I) || isMemTerminatorInst(&I)))
887 MemDefs.push_back(MD);
888 }
889 }
890
891 // Treat byval or inalloca arguments the same as Allocas, stores to them are
892 // dead at the end of the function.
893 for (Argument &AI : F.args())
894 if (AI.hasPassPointeeByValueCopyAttr())
895 InvisibleToCallerAfterRet.insert({&AI, true});
896
897 // Collect whether there is any irreducible control flow in the function.
898 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
899
900 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
901 return isa<UnreachableInst>(E->getTerminator());
902 });
903 }
904
905 LocationSize strengthenLocationSize(const Instruction *I,
906 LocationSize Size) const {
907 if (auto *CB = dyn_cast<CallBase>(I)) {
908 LibFunc F;
909 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
910 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
911 // Use the precise location size specified by the 3rd argument
912 // for determining KillingI overwrites DeadLoc if it is a memset_chk
913 // instruction. memset_chk will write either the amount specified as 3rd
914 // argument or the function will immediately abort and exit the program.
915 // NOTE: AA may determine NoAlias if it can prove that the access size
916 // is larger than the allocation size due to that being UB. To avoid
917 // returning potentially invalid NoAlias results by AA, limit the use of
918 // the precise location size to isOverwrite.
919 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
920 return LocationSize::precise(Len->getZExtValue());
921 }
922 }
923 return Size;
924 }
925
926 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
927 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
928 /// location (by \p DeadI instruction).
929 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
930 /// \p DeadI, but they both write to the same underlying object. In that
931 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
932 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
933 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
934 OverwriteResult isOverwrite(const Instruction *KillingI,
935 const Instruction *DeadI,
936 const MemoryLocation &KillingLoc,
937 const MemoryLocation &DeadLoc,
938 int64_t &KillingOff, int64_t &DeadOff) {
939 // AliasAnalysis does not always account for loops. Limit overwrite checks
940 // to dependencies for which we can guarantee they are independent of any
941 // loops they are in.
942 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
943 return OW_Unknown;
944
945 LocationSize KillingLocSize =
946 strengthenLocationSize(KillingI, KillingLoc.Size);
947 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
948 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
949 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
950 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
951
952 // Check whether the killing store overwrites the whole object, in which
953 // case the size/offset of the dead store does not matter.
954 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
955 isIdentifiedObject(KillingUndObj)) {
956 std::optional<TypeSize> KillingUndObjSize =
957 getPointerSize(KillingUndObj, DL, TLI, &F);
958 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
959 return OW_Complete;
960 }
961
962 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
963 // get imprecise values here, though (except for unknown sizes).
964 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
965 // In case no constant size is known, try to an IR values for the number
966 // of bytes written and check if they match.
967 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
968 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
969 if (KillingMemI && DeadMemI) {
970 const Value *KillingV = KillingMemI->getLength();
971 const Value *DeadV = DeadMemI->getLength();
972 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
973 return OW_Complete;
974 }
975
976 // Masked stores have imprecise locations, but we can reason about them
977 // to some extent.
978 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
979 }
980
981 const TypeSize KillingSize = KillingLocSize.getValue();
982 const TypeSize DeadSize = DeadLoc.Size.getValue();
983 // Bail on doing Size comparison which depends on AA for now
984 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
985 const bool AnyScalable =
986 DeadSize.isScalable() || KillingLocSize.isScalable();
987
988 if (AnyScalable)
989 return OW_Unknown;
990 // Query the alias information
991 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
992
993 // If the start pointers are the same, we just have to compare sizes to see if
994 // the killing store was larger than the dead store.
995 if (AAR == AliasResult::MustAlias) {
996 // Make sure that the KillingSize size is >= the DeadSize size.
997 if (KillingSize >= DeadSize)
998 return OW_Complete;
999 }
1000
1001 // If we hit a partial alias we may have a full overwrite
1002 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1003 int32_t Off = AAR.getOffset();
1004 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1005 return OW_Complete;
1006 }
1007
1008 // If we can't resolve the same pointers to the same object, then we can't
1009 // analyze them at all.
1010 if (DeadUndObj != KillingUndObj) {
1011 // Non aliasing stores to different objects don't overlap. Note that
1012 // if the killing store is known to overwrite whole object (out of
1013 // bounds access overwrites whole object as well) then it is assumed to
1014 // completely overwrite any store to the same object even if they don't
1015 // actually alias (see next check).
1016 if (AAR == AliasResult::NoAlias)
1017 return OW_None;
1018 return OW_Unknown;
1019 }
1020
1021 // Okay, we have stores to two completely different pointers. Try to
1022 // decompose the pointer into a "base + constant_offset" form. If the base
1023 // pointers are equal, then we can reason about the two stores.
1024 DeadOff = 0;
1025 KillingOff = 0;
1026 const Value *DeadBasePtr =
1027 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1028 const Value *KillingBasePtr =
1029 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1030
1031 // If the base pointers still differ, we have two completely different
1032 // stores.
1033 if (DeadBasePtr != KillingBasePtr)
1034 return OW_Unknown;
1035
1036 // The killing access completely overlaps the dead store if and only if
1037 // both start and end of the dead one is "inside" the killing one:
1038 // |<->|--dead--|<->|
1039 // |-----killing------|
1040 // Accesses may overlap if and only if start of one of them is "inside"
1041 // another one:
1042 // |<->|--dead--|<-------->|
1043 // |-------killing--------|
1044 // OR
1045 // |-------dead-------|
1046 // |<->|---killing---|<----->|
1047 //
1048 // We have to be careful here as *Off is signed while *.Size is unsigned.
1049
1050 // Check if the dead access starts "not before" the killing one.
1051 if (DeadOff >= KillingOff) {
1052 // If the dead access ends "not after" the killing access then the
1053 // dead one is completely overwritten by the killing one.
1054 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1055 return OW_Complete;
1056 // If start of the dead access is "before" end of the killing access
1057 // then accesses overlap.
1058 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1059 return OW_MaybePartial;
1060 }
1061 // If start of the killing access is "before" end of the dead access then
1062 // accesses overlap.
1063 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1064 return OW_MaybePartial;
1065 }
1066
1067 // Can reach here only if accesses are known not to overlap.
1068 return OW_None;
1069 }
1070
1071 bool isInvisibleToCallerAfterRet(const Value *V) {
1072 if (isa<AllocaInst>(V))
1073 return true;
1074 auto I = InvisibleToCallerAfterRet.insert({V, false});
1075 if (I.second) {
1076 if (!isInvisibleToCallerOnUnwind(V)) {
1077 I.first->second = false;
1078 } else if (isNoAliasCall(V)) {
1079 I.first->second = !PointerMayBeCaptured(V, true, false);
1080 }
1081 }
1082 return I.first->second;
1083 }
1084
1085 bool isInvisibleToCallerOnUnwind(const Value *V) {
1086 bool RequiresNoCaptureBeforeUnwind;
1087 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1088 return false;
1089 if (!RequiresNoCaptureBeforeUnwind)
1090 return true;
1091
1092 auto I = CapturedBeforeReturn.insert({V, true});
1093 if (I.second)
1094 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1095 // with the killing MemoryDef. But we refrain from doing so for now to
1096 // limit compile-time and this does not cause any changes to the number
1097 // of stores removed on a large test set in practice.
1098 I.first->second = PointerMayBeCaptured(V, false, true);
1099 return !I.first->second;
1100 }
1101
1102 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1103 if (!I->mayWriteToMemory())
1104 return std::nullopt;
1105
1106 if (auto *CB = dyn_cast<CallBase>(I))
1107 return MemoryLocation::getForDest(CB, TLI);
1108
1110 }
1111
1112 /// Assuming this instruction has a dead analyzable write, can we delete
1113 /// this instruction?
1114 bool isRemovable(Instruction *I) {
1115 assert(getLocForWrite(I) && "Must have analyzable write");
1116
1117 // Don't remove volatile/atomic stores.
1118 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1119 return SI->isUnordered();
1120
1121 if (auto *CB = dyn_cast<CallBase>(I)) {
1122 // Don't remove volatile memory intrinsics.
1123 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1124 return !MI->isVolatile();
1125
1126 // Never remove dead lifetime intrinsics, e.g. because they are followed
1127 // by a free.
1128 if (CB->isLifetimeStartOrEnd())
1129 return false;
1130
1131 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1132 !CB->isTerminator();
1133 }
1134
1135 return false;
1136 }
1137
1138 /// Returns true if \p UseInst completely overwrites \p DefLoc
1139 /// (stored by \p DefInst).
1140 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1141 Instruction *UseInst) {
1142 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1143 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1144 // MemoryDef.
1145 if (!UseInst->mayWriteToMemory())
1146 return false;
1147
1148 if (auto *CB = dyn_cast<CallBase>(UseInst))
1149 if (CB->onlyAccessesInaccessibleMemory())
1150 return false;
1151
1152 int64_t InstWriteOffset, DepWriteOffset;
1153 if (auto CC = getLocForWrite(UseInst))
1154 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1155 DepWriteOffset) == OW_Complete;
1156 return false;
1157 }
1158
1159 /// Returns true if \p Def is not read before returning from the function.
1160 bool isWriteAtEndOfFunction(MemoryDef *Def) {
1161 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1162 << *Def->getMemoryInst()
1163 << ") is at the end the function \n");
1164
1165 auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1166 if (!MaybeLoc) {
1167 LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1168 return false;
1169 }
1170
1173 auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1174 if (!Visited.insert(Acc).second)
1175 return;
1176 for (Use &U : Acc->uses())
1177 WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1178 };
1179 PushMemUses(Def);
1180 for (unsigned I = 0; I < WorkList.size(); I++) {
1181 if (WorkList.size() >= MemorySSAScanLimit) {
1182 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1183 return false;
1184 }
1185
1186 MemoryAccess *UseAccess = WorkList[I];
1187 if (isa<MemoryPhi>(UseAccess)) {
1188 // AliasAnalysis does not account for loops. Limit elimination to
1189 // candidates for which we can guarantee they always store to the same
1190 // memory location.
1191 if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
1192 return false;
1193
1194 PushMemUses(cast<MemoryPhi>(UseAccess));
1195 continue;
1196 }
1197 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1198 // of times this is called and/or caching it.
1199 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1200 if (isReadClobber(*MaybeLoc, UseInst)) {
1201 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1202 return false;
1203 }
1204
1205 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1206 PushMemUses(UseDef);
1207 }
1208 return true;
1209 }
1210
1211 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1212 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1213 /// indicating whether \p I is a free-like call.
1214 std::optional<std::pair<MemoryLocation, bool>>
1215 getLocForTerminator(Instruction *I) const {
1216 uint64_t Len;
1217 Value *Ptr;
1218 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1219 m_Value(Ptr))))
1220 return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1221
1222 if (auto *CB = dyn_cast<CallBase>(I)) {
1223 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1224 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1225 }
1226
1227 return std::nullopt;
1228 }
1229
1230 /// Returns true if \p I is a memory terminator instruction like
1231 /// llvm.lifetime.end or free.
1232 bool isMemTerminatorInst(Instruction *I) const {
1233 auto *CB = dyn_cast<CallBase>(I);
1234 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1235 getFreedOperand(CB, &TLI) != nullptr);
1236 }
1237
1238 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1239 /// instruction \p AccessI.
1240 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1241 Instruction *MaybeTerm) {
1242 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1243 getLocForTerminator(MaybeTerm);
1244
1245 if (!MaybeTermLoc)
1246 return false;
1247
1248 // If the terminator is a free-like call, all accesses to the underlying
1249 // object can be considered terminated.
1250 if (getUnderlyingObject(Loc.Ptr) !=
1251 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1252 return false;
1253
1254 auto TermLoc = MaybeTermLoc->first;
1255 if (MaybeTermLoc->second) {
1256 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1257 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1258 }
1259 int64_t InstWriteOffset = 0;
1260 int64_t DepWriteOffset = 0;
1261 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1262 DepWriteOffset) == OW_Complete;
1263 }
1264
1265 // Returns true if \p Use may read from \p DefLoc.
1266 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1267 if (isNoopIntrinsic(UseInst))
1268 return false;
1269
1270 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1271 // treated as read clobber.
1272 if (auto SI = dyn_cast<StoreInst>(UseInst))
1273 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1274
1275 if (!UseInst->mayReadFromMemory())
1276 return false;
1277
1278 if (auto *CB = dyn_cast<CallBase>(UseInst))
1279 if (CB->onlyAccessesInaccessibleMemory())
1280 return false;
1281
1282 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1283 }
1284
1285 /// Returns true if a dependency between \p Current and \p KillingDef is
1286 /// guaranteed to be loop invariant for the loops that they are in. Either
1287 /// because they are known to be in the same block, in the same loop level or
1288 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1289 /// during execution of the containing function.
1290 bool isGuaranteedLoopIndependent(const Instruction *Current,
1291 const Instruction *KillingDef,
1292 const MemoryLocation &CurrentLoc) {
1293 // If the dependency is within the same block or loop level (being careful
1294 // of irreducible loops), we know that AA will return a valid result for the
1295 // memory dependency. (Both at the function level, outside of any loop,
1296 // would also be valid but we currently disable that to limit compile time).
1297 if (Current->getParent() == KillingDef->getParent())
1298 return true;
1299 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1300 if (!ContainsIrreducibleLoops && CurrentLI &&
1301 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1302 return true;
1303 // Otherwise check the memory location is invariant to any loops.
1304 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1305 }
1306
1307 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1308 /// loop. In particular, this guarantees that it only references a single
1309 /// MemoryLocation during execution of the containing function.
1310 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1311 Ptr = Ptr->stripPointerCasts();
1312 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1313 if (GEP->hasAllConstantIndices())
1314 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1315
1316 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1317 return I->getParent()->isEntryBlock() ||
1318 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1319 }
1320 return true;
1321 }
1322
1323 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1324 // with no read access between them or on any other path to a function exit
1325 // block if \p KillingLoc is not accessible after the function returns. If
1326 // there is no such MemoryDef, return std::nullopt. The returned value may not
1327 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1328 // encounter an aliasing MemoryUse (read).
1329 std::optional<MemoryAccess *>
1330 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1331 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1332 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1333 bool IsMemTerm, unsigned &PartialLimit) {
1334 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1335 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1336 return std::nullopt;
1337 }
1338
1339 MemoryAccess *Current = StartAccess;
1340 Instruction *KillingI = KillingDef->getMemoryInst();
1341 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1342
1343 // Only optimize defining access of KillingDef when directly starting at its
1344 // defining access. The defining access also must only access KillingLoc. At
1345 // the moment we only support instructions with a single write location, so
1346 // it should be sufficient to disable optimizations for instructions that
1347 // also read from memory.
1348 bool CanOptimize = OptimizeMemorySSA &&
1349 KillingDef->getDefiningAccess() == StartAccess &&
1350 !KillingI->mayReadFromMemory();
1351
1352 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1353 std::optional<MemoryLocation> CurrentLoc;
1354 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1355 LLVM_DEBUG({
1356 dbgs() << " visiting " << *Current;
1357 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1358 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1359 << ")";
1360 dbgs() << "\n";
1361 });
1362
1363 // Reached TOP.
1364 if (MSSA.isLiveOnEntryDef(Current)) {
1365 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1366 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1367 // The first clobbering def is... none.
1368 KillingDef->setOptimized(Current);
1369 return std::nullopt;
1370 }
1371
1372 // Cost of a step. Accesses in the same block are more likely to be valid
1373 // candidates for elimination, hence consider them cheaper.
1374 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1377 if (WalkerStepLimit <= StepCost) {
1378 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1379 return std::nullopt;
1380 }
1381 WalkerStepLimit -= StepCost;
1382
1383 // Return for MemoryPhis. They cannot be eliminated directly and the
1384 // caller is responsible for traversing them.
1385 if (isa<MemoryPhi>(Current)) {
1386 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1387 return Current;
1388 }
1389
1390 // Below, check if CurrentDef is a valid candidate to be eliminated by
1391 // KillingDef. If it is not, check the next candidate.
1392 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1393 Instruction *CurrentI = CurrentDef->getMemoryInst();
1394
1395 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1396 CanOptimize = false;
1397 continue;
1398 }
1399
1400 // Before we try to remove anything, check for any extra throwing
1401 // instructions that block us from DSEing
1402 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1403 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1404 return std::nullopt;
1405 }
1406
1407 // Check for anything that looks like it will be a barrier to further
1408 // removal
1409 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1410 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1411 return std::nullopt;
1412 }
1413
1414 // If Current is known to be on path that reads DefLoc or is a read
1415 // clobber, bail out, as the path is not profitable. We skip this check
1416 // for intrinsic calls, because the code knows how to handle memcpy
1417 // intrinsics.
1418 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1419 return std::nullopt;
1420
1421 // Quick check if there are direct uses that are read-clobbers.
1422 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1423 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1424 return !MSSA.dominates(StartAccess, UseOrDef) &&
1425 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1426 return false;
1427 })) {
1428 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1429 return std::nullopt;
1430 }
1431
1432 // If Current does not have an analyzable write location or is not
1433 // removable, skip it.
1434 CurrentLoc = getLocForWrite(CurrentI);
1435 if (!CurrentLoc || !isRemovable(CurrentI)) {
1436 CanOptimize = false;
1437 continue;
1438 }
1439
1440 // AliasAnalysis does not account for loops. Limit elimination to
1441 // candidates for which we can guarantee they always store to the same
1442 // memory location and not located in different loops.
1443 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1444 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1445 CanOptimize = false;
1446 continue;
1447 }
1448
1449 if (IsMemTerm) {
1450 // If the killing def is a memory terminator (e.g. lifetime.end), check
1451 // the next candidate if the current Current does not write the same
1452 // underlying object as the terminator.
1453 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1454 CanOptimize = false;
1455 continue;
1456 }
1457 } else {
1458 int64_t KillingOffset = 0;
1459 int64_t DeadOffset = 0;
1460 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1461 KillingOffset, DeadOffset);
1462 if (CanOptimize) {
1463 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1464 // optimized access. Do not optimize if CurrentDef is already the
1465 // defining access of KillingDef.
1466 if (CurrentDef != KillingDef->getDefiningAccess() &&
1467 (OR == OW_Complete || OR == OW_MaybePartial))
1468 KillingDef->setOptimized(CurrentDef);
1469
1470 // Once a may-aliasing def is encountered do not set an optimized
1471 // access.
1472 if (OR != OW_None)
1473 CanOptimize = false;
1474 }
1475
1476 // If Current does not write to the same object as KillingDef, check
1477 // the next candidate.
1478 if (OR == OW_Unknown || OR == OW_None)
1479 continue;
1480 else if (OR == OW_MaybePartial) {
1481 // If KillingDef only partially overwrites Current, check the next
1482 // candidate if the partial step limit is exceeded. This aggressively
1483 // limits the number of candidates for partial store elimination,
1484 // which are less likely to be removable in the end.
1485 if (PartialLimit <= 1) {
1486 WalkerStepLimit -= 1;
1487 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1488 continue;
1489 }
1490 PartialLimit -= 1;
1491 }
1492 }
1493 break;
1494 };
1495
1496 // Accesses to objects accessible after the function returns can only be
1497 // eliminated if the access is dead along all paths to the exit. Collect
1498 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1499 // they cover all paths from MaybeDeadAccess to any function exit.
1501 KillingDefs.insert(KillingDef->getMemoryInst());
1502 MemoryAccess *MaybeDeadAccess = Current;
1503 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1504 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1505 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1506 << *MaybeDeadI << ")\n");
1507
1509 auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1510 for (Use &U : Acc->uses())
1511 WorkList.insert(cast<MemoryAccess>(U.getUser()));
1512 };
1513 PushMemUses(MaybeDeadAccess);
1514
1515 // Check if DeadDef may be read.
1516 for (unsigned I = 0; I < WorkList.size(); I++) {
1517 MemoryAccess *UseAccess = WorkList[I];
1518
1519 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1520 // Bail out if the number of accesses to check exceeds the scan limit.
1521 if (ScanLimit < (WorkList.size() - I)) {
1522 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1523 return std::nullopt;
1524 }
1525 --ScanLimit;
1526 NumDomMemDefChecks++;
1527
1528 if (isa<MemoryPhi>(UseAccess)) {
1529 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1530 return DT.properlyDominates(KI->getParent(),
1531 UseAccess->getBlock());
1532 })) {
1533 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1534 continue;
1535 }
1536 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1537 PushMemUses(UseAccess);
1538 continue;
1539 }
1540
1541 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1542 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1543
1544 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1545 return DT.dominates(KI, UseInst);
1546 })) {
1547 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1548 continue;
1549 }
1550
1551 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1552 // MemoryAccesses. We do not have to check it's users.
1553 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1554 LLVM_DEBUG(
1555 dbgs()
1556 << " ... skipping, memterminator invalidates following accesses\n");
1557 continue;
1558 }
1559
1560 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1561 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1562 PushMemUses(UseAccess);
1563 continue;
1564 }
1565
1566 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1567 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1568 return std::nullopt;
1569 }
1570
1571 // Uses which may read the original MemoryDef mean we cannot eliminate the
1572 // original MD. Stop walk.
1573 if (isReadClobber(MaybeDeadLoc, UseInst)) {
1574 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1575 return std::nullopt;
1576 }
1577
1578 // If this worklist walks back to the original memory access (and the
1579 // pointer is not guarenteed loop invariant) then we cannot assume that a
1580 // store kills itself.
1581 if (MaybeDeadAccess == UseAccess &&
1582 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1583 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1584 return std::nullopt;
1585 }
1586 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1587 // if it reads the memory location.
1588 // TODO: It would probably be better to check for self-reads before
1589 // calling the function.
1590 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1591 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1592 continue;
1593 }
1594
1595 // Check all uses for MemoryDefs, except for defs completely overwriting
1596 // the original location. Otherwise we have to check uses of *all*
1597 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1598 // miss cases like the following
1599 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1600 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1601 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1602 // (The Use points to the *first* Def it may alias)
1603 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1604 // stores [0,1]
1605 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1606 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1607 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1608 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1609 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1610 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1612 << " ... found killing def " << *UseInst << "\n");
1613 KillingDefs.insert(UseInst);
1614 }
1615 } else {
1617 << " ... found preceeding def " << *UseInst << "\n");
1618 return std::nullopt;
1619 }
1620 } else
1621 PushMemUses(UseDef);
1622 }
1623 }
1624
1625 // For accesses to locations visible after the function returns, make sure
1626 // that the location is dead (=overwritten) along all paths from
1627 // MaybeDeadAccess to the exit.
1628 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1629 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1630 for (Instruction *KD : KillingDefs)
1631 KillingBlocks.insert(KD->getParent());
1632 assert(!KillingBlocks.empty() &&
1633 "Expected at least a single killing block");
1634
1635 // Find the common post-dominator of all killing blocks.
1636 BasicBlock *CommonPred = *KillingBlocks.begin();
1637 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1638 if (!CommonPred)
1639 break;
1640 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1641 }
1642
1643 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1644 // there is a path from MaybeDeadAccess to an exit not going through a
1645 // killing block.
1646 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1647 if (!AnyUnreachableExit)
1648 return std::nullopt;
1649
1650 // Fall back to CFG scan starting at all non-unreachable roots if not
1651 // all paths to the exit go through CommonPred.
1652 CommonPred = nullptr;
1653 }
1654
1655 // If CommonPred itself is in the set of killing blocks, we're done.
1656 if (KillingBlocks.count(CommonPred))
1657 return {MaybeDeadAccess};
1658
1659 SetVector<BasicBlock *> WorkList;
1660 // If CommonPred is null, there are multiple exits from the function.
1661 // They all have to be added to the worklist.
1662 if (CommonPred)
1663 WorkList.insert(CommonPred);
1664 else
1665 for (BasicBlock *R : PDT.roots()) {
1666 if (!isa<UnreachableInst>(R->getTerminator()))
1667 WorkList.insert(R);
1668 }
1669
1670 NumCFGTries++;
1671 // Check if all paths starting from an exit node go through one of the
1672 // killing blocks before reaching MaybeDeadAccess.
1673 for (unsigned I = 0; I < WorkList.size(); I++) {
1674 NumCFGChecks++;
1675 BasicBlock *Current = WorkList[I];
1676 if (KillingBlocks.count(Current))
1677 continue;
1678 if (Current == MaybeDeadAccess->getBlock())
1679 return std::nullopt;
1680
1681 // MaybeDeadAccess is reachable from the entry, so we don't have to
1682 // explore unreachable blocks further.
1683 if (!DT.isReachableFromEntry(Current))
1684 continue;
1685
1686 for (BasicBlock *Pred : predecessors(Current))
1687 WorkList.insert(Pred);
1688
1689 if (WorkList.size() >= MemorySSAPathCheckLimit)
1690 return std::nullopt;
1691 }
1692 NumCFGSuccess++;
1693 }
1694
1695 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1696 // potentially dead.
1697 return {MaybeDeadAccess};
1698 }
1699
1700 /// Delete dead memory defs and recursively add their operands to ToRemove if
1701 /// they became dead.
1702 void
1705 MemorySSAUpdater Updater(&MSSA);
1706 SmallVector<Instruction *, 32> NowDeadInsts;
1707 NowDeadInsts.push_back(SI);
1708 --NumFastOther;
1709
1710 while (!NowDeadInsts.empty()) {
1711 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1712 ++NumFastOther;
1713
1714 // Try to preserve debug information attached to the dead instruction.
1715 salvageDebugInfo(*DeadInst);
1716 salvageKnowledge(DeadInst);
1717
1718 // Remove the Instruction from MSSA.
1719 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1720 bool IsMemDef = MA && isa<MemoryDef>(MA);
1721 if (MA) {
1722 if (IsMemDef) {
1723 auto *MD = cast<MemoryDef>(MA);
1724 SkipStores.insert(MD);
1725 if (Deleted)
1726 Deleted->insert(MD);
1727 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1728 if (SI->getValueOperand()->getType()->isPointerTy()) {
1729 const Value *UO = getUnderlyingObject(SI->getValueOperand());
1730 if (CapturedBeforeReturn.erase(UO))
1731 ShouldIterateEndOfFunctionDSE = true;
1732 InvisibleToCallerAfterRet.erase(UO);
1733 }
1734 }
1735 }
1736
1737 Updater.removeMemoryAccess(MA);
1738 }
1739
1740 auto I = IOLs.find(DeadInst->getParent());
1741 if (I != IOLs.end())
1742 I->second.erase(DeadInst);
1743 // Remove its operands
1744 for (Use &O : DeadInst->operands())
1745 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1746 O.set(PoisonValue::get(O->getType()));
1747 if (isInstructionTriviallyDead(OpI, &TLI))
1748 NowDeadInsts.push_back(OpI);
1749 }
1750
1751 EI.removeInstruction(DeadInst);
1752 // Remove memory defs directly if they don't produce results, but only
1753 // queue other dead instructions for later removal. They may have been
1754 // used as memory locations that have been cached by BatchAA. Removing
1755 // them here may lead to newly created instructions to be allocated at the
1756 // same address, yielding stale cache entries.
1757 if (IsMemDef && DeadInst->getType()->isVoidTy())
1758 DeadInst->eraseFromParent();
1759 else
1760 ToRemove.push_back(DeadInst);
1761 }
1762 }
1763
1764 // Check for any extra throws between \p KillingI and \p DeadI that block
1765 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1766 // MemoryDef that may throw are handled during the walk from one def to the
1767 // next.
1768 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1769 const Value *KillingUndObj) {
1770 // First see if we can ignore it by using the fact that KillingI is an
1771 // alloca/alloca like object that is not visible to the caller during
1772 // execution of the function.
1773 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1774 return false;
1775
1776 if (KillingI->getParent() == DeadI->getParent())
1777 return ThrowingBlocks.count(KillingI->getParent());
1778 return !ThrowingBlocks.empty();
1779 }
1780
1781 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1782 // instructions act as barriers:
1783 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1784 // object.
1785 // * Atomic stores stronger that monotonic.
1786 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1787 // If DeadI may throw it acts as a barrier, unless we are to an
1788 // alloca/alloca like object that does not escape.
1789 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1790 return true;
1791
1792 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1793 // eliminate/reorder it.
1794 if (DeadI->isAtomic()) {
1795 if (auto *LI = dyn_cast<LoadInst>(DeadI))
1796 return isStrongerThanMonotonic(LI->getOrdering());
1797 if (auto *SI = dyn_cast<StoreInst>(DeadI))
1798 return isStrongerThanMonotonic(SI->getOrdering());
1799 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1800 return isStrongerThanMonotonic(ARMW->getOrdering());
1801 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1802 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1803 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1804 llvm_unreachable("other instructions should be skipped in MemorySSA");
1805 }
1806 return false;
1807 }
1808
1809 /// Eliminate writes to objects that are not visible in the caller and are not
1810 /// accessed before returning from the function.
1811 bool eliminateDeadWritesAtEndOfFunction() {
1812 bool MadeChange = false;
1813 LLVM_DEBUG(
1814 dbgs()
1815 << "Trying to eliminate MemoryDefs at the end of the function\n");
1816 do {
1817 ShouldIterateEndOfFunctionDSE = false;
1818 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1819 if (SkipStores.contains(Def))
1820 continue;
1821
1822 Instruction *DefI = Def->getMemoryInst();
1823 auto DefLoc = getLocForWrite(DefI);
1824 if (!DefLoc || !isRemovable(DefI))
1825 continue;
1826
1827 // NOTE: Currently eliminating writes at the end of a function is
1828 // limited to MemoryDefs with a single underlying object, to save
1829 // compile-time. In practice it appears the case with multiple
1830 // underlying objects is very uncommon. If it turns out to be important,
1831 // we can use getUnderlyingObjects here instead.
1832 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1833 if (!isInvisibleToCallerAfterRet(UO))
1834 continue;
1835
1836 if (isWriteAtEndOfFunction(Def)) {
1837 // See through pointer-to-pointer bitcasts
1838 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1839 "of the function\n");
1841 ++NumFastStores;
1842 MadeChange = true;
1843 }
1844 }
1845 } while (ShouldIterateEndOfFunctionDSE);
1846 return MadeChange;
1847 }
1848
1849 /// If we have a zero initializing memset following a call to malloc,
1850 /// try folding it into a call to calloc.
1851 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1852 Instruction *DefI = Def->getMemoryInst();
1853 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1854 if (!MemSet)
1855 // TODO: Could handle zero store to small allocation as well.
1856 return false;
1857 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1858 if (!StoredConstant || !StoredConstant->isNullValue())
1859 return false;
1860
1861 if (!isRemovable(DefI))
1862 // The memset might be volatile..
1863 return false;
1864
1865 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1866 F.hasFnAttribute(Attribute::SanitizeAddress) ||
1867 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1868 F.getName() == "calloc")
1869 return false;
1870 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1871 if (!Malloc)
1872 return false;
1873 auto *InnerCallee = Malloc->getCalledFunction();
1874 if (!InnerCallee)
1875 return false;
1876 LibFunc Func;
1877 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1878 Func != LibFunc_malloc)
1879 return false;
1880 // Gracefully handle malloc with unexpected memory attributes.
1881 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
1882 if (!MallocDef)
1883 return false;
1884
1885 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1886 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1887 // of malloc block
1888 auto *MallocBB = Malloc->getParent(),
1889 *MemsetBB = Memset->getParent();
1890 if (MallocBB == MemsetBB)
1891 return true;
1892 auto *Ptr = Memset->getArgOperand(0);
1893 auto *TI = MallocBB->getTerminator();
1895 BasicBlock *TrueBB, *FalseBB;
1896 if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1897 FalseBB)))
1898 return false;
1899 if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1900 return false;
1901 return true;
1902 };
1903
1904 if (Malloc->getOperand(0) != MemSet->getLength())
1905 return false;
1906 if (!shouldCreateCalloc(Malloc, MemSet) ||
1907 !DT.dominates(Malloc, MemSet) ||
1908 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1909 return false;
1910 IRBuilder<> IRB(Malloc);
1911 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
1912 auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
1913 Malloc->getArgOperand(0), IRB, TLI);
1914 if (!Calloc)
1915 return false;
1916
1917 MemorySSAUpdater Updater(&MSSA);
1918 auto *NewAccess =
1919 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
1920 MallocDef);
1921 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1922 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1923 Malloc->replaceAllUsesWith(Calloc);
1925 return true;
1926 }
1927
1928 // Check if there is a dominating condition, that implies that the value
1929 // being stored in a ptr is already present in the ptr.
1930 bool dominatingConditionImpliesValue(MemoryDef *Def) {
1931 auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
1932 BasicBlock *StoreBB = StoreI->getParent();
1933 Value *StorePtr = StoreI->getPointerOperand();
1934 Value *StoreVal = StoreI->getValueOperand();
1935
1936 DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
1937 if (!IDom)
1938 return false;
1939
1940 auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
1941 if (!BI || !BI->isConditional())
1942 return false;
1943
1944 // In case both blocks are the same, it is not possible to determine
1945 // if optimization is possible. (We would not want to optimize a store
1946 // in the FalseBB if condition is true and vice versa.)
1947 if (BI->getSuccessor(0) == BI->getSuccessor(1))
1948 return false;
1949
1950 Instruction *ICmpL;
1952 if (!match(BI->getCondition(),
1953 m_c_ICmp(Pred,
1954 m_CombineAnd(m_Load(m_Specific(StorePtr)),
1955 m_Instruction(ICmpL)),
1956 m_Specific(StoreVal))) ||
1957 !ICmpInst::isEquality(Pred))
1958 return false;
1959
1960 // In case the else blocks also branches to the if block or the other way
1961 // around it is not possible to determine if the optimization is possible.
1962 if (Pred == ICmpInst::ICMP_EQ &&
1963 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
1964 StoreBB))
1965 return false;
1966
1967 if (Pred == ICmpInst::ICMP_NE &&
1968 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
1969 StoreBB))
1970 return false;
1971
1972 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
1973 MemoryAccess *ClobAcc =
1974 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
1975
1976 return MSSA.dominates(ClobAcc, LoadAcc);
1977 }
1978
1979 /// \returns true if \p Def is a no-op store, either because it
1980 /// directly stores back a loaded value or stores zero to a calloced object.
1981 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1982 Instruction *DefI = Def->getMemoryInst();
1983 StoreInst *Store = dyn_cast<StoreInst>(DefI);
1984 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1985 Constant *StoredConstant = nullptr;
1986 if (Store)
1987 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1988 else if (MemSet)
1989 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1990 else
1991 return false;
1992
1993 if (!isRemovable(DefI))
1994 return false;
1995
1996 if (StoredConstant) {
1997 Constant *InitC =
1998 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1999 // If the clobbering access is LiveOnEntry, no instructions between them
2000 // can modify the memory location.
2001 if (InitC && InitC == StoredConstant)
2002 return MSSA.isLiveOnEntryDef(
2003 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2004 }
2005
2006 if (!Store)
2007 return false;
2008
2009 if (dominatingConditionImpliesValue(Def))
2010 return true;
2011
2012 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2013 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2014 // Get the defining access for the load.
2015 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2016 // Fast path: the defining accesses are the same.
2017 if (LoadAccess == Def->getDefiningAccess())
2018 return true;
2019
2020 // Look through phi accesses. Recursively scan all phi accesses by
2021 // adding them to a worklist. Bail when we run into a memory def that
2022 // does not match LoadAccess.
2024 MemoryAccess *Current =
2025 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2026 // We don't want to bail when we run into the store memory def. But,
2027 // the phi access may point to it. So, pretend like we've already
2028 // checked it.
2029 ToCheck.insert(Def);
2030 ToCheck.insert(Current);
2031 // Start at current (1) to simulate already having checked Def.
2032 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2033 Current = ToCheck[I];
2034 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2035 // Check all the operands.
2036 for (auto &Use : PhiAccess->incoming_values())
2037 ToCheck.insert(cast<MemoryAccess>(&Use));
2038 continue;
2039 }
2040
2041 // If we found a memory def, bail. This happens when we have an
2042 // unrelated write in between an otherwise noop store.
2043 assert(isa<MemoryDef>(Current) &&
2044 "Only MemoryDefs should reach here.");
2045 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2046 // We are searching for the definition of the store's destination.
2047 // So, if that is the same definition as the load, then this is a
2048 // noop. Otherwise, fail.
2049 if (LoadAccess != Current)
2050 return false;
2051 }
2052 return true;
2053 }
2054 }
2055
2056 return false;
2057 }
2058
2059 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2060 bool Changed = false;
2061 for (auto OI : IOL) {
2062 Instruction *DeadI = OI.first;
2063 MemoryLocation Loc = *getLocForWrite(DeadI);
2064 assert(isRemovable(DeadI) && "Expect only removable instruction");
2065
2066 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2067 int64_t DeadStart = 0;
2068 uint64_t DeadSize = Loc.Size.getValue();
2069 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2070 OverlapIntervalsTy &IntervalMap = OI.second;
2071 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2072 if (IntervalMap.empty())
2073 continue;
2074 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2075 }
2076 return Changed;
2077 }
2078
2079 /// Eliminates writes to locations where the value that is being written
2080 /// is already stored at the same location.
2081 bool eliminateRedundantStoresOfExistingValues() {
2082 bool MadeChange = false;
2083 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2084 "already existing value\n");
2085 for (auto *Def : MemDefs) {
2086 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2087 continue;
2088
2089 Instruction *DefInst = Def->getMemoryInst();
2090 auto MaybeDefLoc = getLocForWrite(DefInst);
2091 if (!MaybeDefLoc || !isRemovable(DefInst))
2092 continue;
2093
2094 MemoryDef *UpperDef;
2095 // To conserve compile-time, we avoid walking to the next clobbering def.
2096 // Instead, we just try to get the optimized access, if it exists. DSE
2097 // will try to optimize defs during the earlier traversal.
2098 if (Def->isOptimized())
2099 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2100 else
2101 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2102 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2103 continue;
2104
2105 Instruction *UpperInst = UpperDef->getMemoryInst();
2106 auto IsRedundantStore = [&]() {
2107 if (DefInst->isIdenticalTo(UpperInst))
2108 return true;
2109 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2110 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2111 // MemSetInst must have a write location.
2112 MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
2113 int64_t InstWriteOffset = 0;
2114 int64_t DepWriteOffset = 0;
2115 auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
2116 InstWriteOffset, DepWriteOffset);
2117 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2118 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2119 OR == OW_Complete;
2120 }
2121 }
2122 return false;
2123 };
2124
2125 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2126 continue;
2127 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2128 << '\n');
2129 deleteDeadInstruction(DefInst);
2130 NumRedundantStores++;
2131 MadeChange = true;
2132 }
2133 return MadeChange;
2134 }
2135};
2136
2137static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2139 const TargetLibraryInfo &TLI,
2140 const LoopInfo &LI) {
2141 bool MadeChange = false;
2142
2143 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2144 // For each store:
2145 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2146 MemoryDef *KillingDef = State.MemDefs[I];
2147 if (State.SkipStores.count(KillingDef))
2148 continue;
2149 Instruction *KillingI = KillingDef->getMemoryInst();
2150
2151 std::optional<MemoryLocation> MaybeKillingLoc;
2152 if (State.isMemTerminatorInst(KillingI)) {
2153 if (auto KillingLoc = State.getLocForTerminator(KillingI))
2154 MaybeKillingLoc = KillingLoc->first;
2155 } else {
2156 MaybeKillingLoc = State.getLocForWrite(KillingI);
2157 }
2158
2159 if (!MaybeKillingLoc) {
2160 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2161 << *KillingI << "\n");
2162 continue;
2163 }
2164 MemoryLocation KillingLoc = *MaybeKillingLoc;
2165 assert(KillingLoc.Ptr && "KillingLoc should not be null");
2166 const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
2167 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2168 << *KillingDef << " (" << *KillingI << ")\n");
2169
2170 unsigned ScanLimit = MemorySSAScanLimit;
2171 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2172 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2173 // Worklist of MemoryAccesses that may be killed by KillingDef.
2175 // Track MemoryAccesses that have been deleted in the loop below, so we can
2176 // skip them. Don't use SkipStores for this, which may contain reused
2177 // MemoryAccess addresses.
2179 [[maybe_unused]] unsigned OrigNumSkipStores = State.SkipStores.size();
2180 ToCheck.insert(KillingDef->getDefiningAccess());
2181
2182 bool Shortend = false;
2183 bool IsMemTerm = State.isMemTerminatorInst(KillingI);
2184 // Check if MemoryAccesses in the worklist are killed by KillingDef.
2185 for (unsigned I = 0; I < ToCheck.size(); I++) {
2186 MemoryAccess *Current = ToCheck[I];
2187 if (Deleted.contains(Current))
2188 continue;
2189
2190 std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2191 KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
2192 WalkerStepLimit, IsMemTerm, PartialLimit);
2193
2194 if (!MaybeDeadAccess) {
2195 LLVM_DEBUG(dbgs() << " finished walk\n");
2196 continue;
2197 }
2198
2199 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2200 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2201 if (isa<MemoryPhi>(DeadAccess)) {
2202 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2203 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2204 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2205 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2206 BasicBlock *PhiBlock = DeadAccess->getBlock();
2207
2208 // We only consider incoming MemoryAccesses that come before the
2209 // MemoryPhi. Otherwise we could discover candidates that do not
2210 // strictly dominate our starting def.
2211 if (State.PostOrderNumbers[IncomingBlock] >
2212 State.PostOrderNumbers[PhiBlock])
2213 ToCheck.insert(IncomingAccess);
2214 }
2215 continue;
2216 }
2217 auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2218 Instruction *DeadI = DeadDefAccess->getMemoryInst();
2219 LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2220 ToCheck.insert(DeadDefAccess->getDefiningAccess());
2221 NumGetDomMemoryDefPassed++;
2222
2223 if (!DebugCounter::shouldExecute(MemorySSACounter))
2224 continue;
2225
2226 MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2227
2228 if (IsMemTerm) {
2229 const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2230 if (KillingUndObj != DeadUndObj)
2231 continue;
2232 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2233 << "\n KILLER: " << *KillingI << '\n');
2234 State.deleteDeadInstruction(DeadI, &Deleted);
2235 ++NumFastStores;
2236 MadeChange = true;
2237 } else {
2238 // Check if DeadI overwrites KillingI.
2239 int64_t KillingOffset = 0;
2240 int64_t DeadOffset = 0;
2241 OverwriteResult OR = State.isOverwrite(
2242 KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2243 if (OR == OW_MaybePartial) {
2244 auto Iter = State.IOLs.insert(
2245 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2246 DeadI->getParent(), InstOverlapIntervalsTy()));
2247 auto &IOL = Iter.first->second;
2248 OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2249 DeadOffset, DeadI, IOL);
2250 }
2251
2252 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2253 auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2254 auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2255 // We are re-using tryToMergePartialOverlappingStores, which requires
2256 // DeadSI to dominate KillingSI.
2257 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2258 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2260 KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2261 State.BatchAA, &DT)) {
2262
2263 // Update stored value of earlier store to merged constant.
2264 DeadSI->setOperand(0, Merged);
2265 ++NumModifiedStores;
2266 MadeChange = true;
2267
2268 Shortend = true;
2269 // Remove killing store and remove any outstanding overlap
2270 // intervals for the updated store.
2271 State.deleteDeadInstruction(KillingSI, &Deleted);
2272 auto I = State.IOLs.find(DeadSI->getParent());
2273 if (I != State.IOLs.end())
2274 I->second.erase(DeadSI);
2275 break;
2276 }
2277 }
2278 }
2279
2280 if (OR == OW_Complete) {
2281 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2282 << "\n KILLER: " << *KillingI << '\n');
2283 State.deleteDeadInstruction(DeadI, &Deleted);
2284 ++NumFastStores;
2285 MadeChange = true;
2286 }
2287 }
2288 }
2289
2290 assert(State.SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2291 "SkipStores and Deleted out of sync?");
2292
2293 // Check if the store is a no-op.
2294 if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2295 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2296 << '\n');
2297 State.deleteDeadInstruction(KillingI);
2298 NumRedundantStores++;
2299 MadeChange = true;
2300 continue;
2301 }
2302
2303 // Can we form a calloc from a memset/malloc pair?
2304 if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2305 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2306 << " DEAD: " << *KillingI << '\n');
2307 State.deleteDeadInstruction(KillingI);
2308 MadeChange = true;
2309 continue;
2310 }
2311 }
2312
2314 for (auto &KV : State.IOLs)
2315 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2316
2317 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2318 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2319
2320 while (!State.ToRemove.empty()) {
2321 Instruction *DeadInst = State.ToRemove.pop_back_val();
2322 DeadInst->eraseFromParent();
2323 }
2324
2325 return MadeChange;
2326}
2327} // end anonymous namespace
2328
2329//===----------------------------------------------------------------------===//
2330// DSE Pass
2331//===----------------------------------------------------------------------===//
2336 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2338 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2339
2340 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2341
2342#ifdef LLVM_ENABLE_STATS
2344 for (auto &I : instructions(F))
2345 NumRemainingStores += isa<StoreInst>(&I);
2346#endif
2347
2348 if (!Changed)
2349 return PreservedAnalyses::all();
2350
2354 PA.preserve<LoopAnalysis>();
2355 return PA;
2356}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefAnalysis InstSet & ToRemove
Expand Atomic instructions
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
The possible results of an alias query.
Definition: AliasAnalysis.h:81
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:99
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
void removeInstruction(Instruction *I)
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Create an "inbounds" getelementptr.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:82
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
const_iterator begin() const
Definition: IntervalMap.h:1146
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1101
const_iterator end() const
Definition: IntervalMap.h:1158
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
iterator find(const KeyT &Key)
Definition: MapVector.h:167
Value * getLength() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1040
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1562
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2113
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1549
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:62
Value * getAddr() const
Definition: PHITransAddr.h:58
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
iterator begin() const
Definition: SmallPtrSet.h:380
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Value * getValueOperand()
Definition: Instructions.h:414
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:771
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:830
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:245
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate, true > m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:567
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1898
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238
bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DbgAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
Definition: DebugInfo.cpp:2121
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1650
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< po_iterator< T > > post_order(const T &G)
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2043
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Holds the characteristics of one fragment of a larger variable.
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.