LLVM 23.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
53#include "llvm/IR/Argument.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/Constant.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugInfo.h"
61#include "llvm/IR/Dominators.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/IRBuilder.h"
65#include "llvm/IR/InstrTypes.h"
66#include "llvm/IR/Instruction.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/PassManager.h"
72#include "llvm/IR/Value.h"
76#include "llvm/Support/Debug.h"
84#include <algorithm>
85#include <cassert>
86#include <cstdint>
87#include <map>
88#include <optional>
89#include <utility>
90
91using namespace llvm;
92using namespace PatternMatch;
93
94#define DEBUG_TYPE "dse"
95
96STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
97STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
98STATISTIC(NumFastStores, "Number of stores deleted");
99STATISTIC(NumFastOther, "Number of other instrs removed");
100STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
101STATISTIC(NumModifiedStores, "Number of stores modified");
102STATISTIC(NumCFGChecks, "Number of stores modified");
103STATISTIC(NumCFGTries, "Number of stores modified");
104STATISTIC(NumCFGSuccess, "Number of stores modified");
105STATISTIC(NumGetDomMemoryDefPassed,
106 "Number of times a valid candidate is returned from getDomMemoryDef");
107STATISTIC(NumDomMemDefChecks,
108 "Number iterations check for reads in getDomMemoryDef");
109
110DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
111 "Controls which MemoryDefs are eliminated.");
112
113static cl::opt<bool>
114EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
115 cl::init(true), cl::Hidden,
116 cl::desc("Enable partial-overwrite tracking in DSE"));
117
118static cl::opt<bool>
119EnablePartialStoreMerging("enable-dse-partial-store-merging",
120 cl::init(true), cl::Hidden,
121 cl::desc("Enable partial store merging in DSE"));
122
124 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
125 cl::desc("The number of memory instructions to scan for "
126 "dead store elimination (default = 150)"));
128 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
129 cl::desc("The maximum number of steps while walking upwards to find "
130 "MemoryDefs that may be killed (default = 90)"));
131
133 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
134 cl::desc("The maximum number candidates that only partially overwrite the "
135 "killing MemoryDef to consider"
136 " (default = 5)"));
137
139 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
140 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
141 "other stores per basic block (default = 5000)"));
142
144 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
145 cl::desc(
146 "The cost of a step in the same basic block as the killing MemoryDef"
147 "(default = 1)"));
148
150 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
152 cl::desc("The cost of a step in a different basic "
153 "block than the killing MemoryDef"
154 "(default = 5)"));
155
157 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
158 cl::desc("The maximum number of blocks to check when trying to prove that "
159 "all paths to an exit go through a killing block (default = 50)"));
160
161// This flags allows or disallows DSE to optimize MemorySSA during its
162// traversal. Note that DSE optimizing MemorySSA may impact other passes
163// downstream of the DSE invocation and can lead to issues not being
164// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
165// those cases, the flag can be used to check if DSE's MemorySSA optimizations
166// impact follow-up passes.
167static cl::opt<bool>
168 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
169 cl::desc("Allow DSE to optimize memory accesses."));
170
171// TODO: remove this flag.
173 "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
174 cl::desc("Enable the initializes attr improvement in DSE"));
175
176//===----------------------------------------------------------------------===//
177// Helper functions
178//===----------------------------------------------------------------------===//
179using OverlapIntervalsTy = std::map<int64_t, int64_t>;
181
182/// Returns true if the end of this instruction can be safely shortened in
183/// length.
185 // Don't shorten stores for now
186 if (isa<StoreInst>(I))
187 return false;
188
190 switch (II->getIntrinsicID()) {
191 default: return false;
192 case Intrinsic::memset:
193 case Intrinsic::memcpy:
194 case Intrinsic::memcpy_element_unordered_atomic:
195 case Intrinsic::memset_element_unordered_atomic:
196 // Do shorten memory intrinsics.
197 // FIXME: Add memmove if it's also safe to transform.
198 return true;
199 }
200 }
201
202 // Don't shorten libcalls calls for now.
203
204 return false;
205}
206
207/// Returns true if the beginning of this instruction can be safely shortened
208/// in length.
210 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
211 // easily done by offsetting the source address.
212 return isa<AnyMemSetInst>(I);
213}
214
215static std::optional<TypeSize> getPointerSize(const Value *V,
216 const DataLayout &DL,
217 const TargetLibraryInfo &TLI,
218 const Function *F) {
220 ObjectSizeOpts Opts;
222
223 if (getObjectSize(V, Size, DL, &TLI, Opts))
224 return TypeSize::getFixed(Size);
225 return std::nullopt;
226}
227
228namespace {
229
230enum OverwriteResult {
231 OW_Begin,
232 OW_Complete,
233 OW_End,
234 OW_PartialEarlierWithFullLater,
235 OW_MaybePartial,
236 OW_None,
237 OW_Unknown
238};
239
240} // end anonymous namespace
241
242/// Check if two instruction are masked stores that completely
243/// overwrite one another. More specifically, \p KillingI has to
244/// overwrite \p DeadI.
245static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
246 const Instruction *DeadI,
248 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
249 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
250 if (KillingII == nullptr || DeadII == nullptr)
251 return OW_Unknown;
252 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
253 return OW_Unknown;
254
255 switch (KillingII->getIntrinsicID()) {
256 case Intrinsic::masked_store:
257 case Intrinsic::vp_store: {
258 const DataLayout &DL = KillingII->getDataLayout();
259 auto *KillingTy = KillingII->getArgOperand(0)->getType();
260 auto *DeadTy = DeadII->getArgOperand(0)->getType();
261 if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
262 return OW_Unknown;
263 // Element count.
264 if (cast<VectorType>(KillingTy)->getElementCount() !=
265 cast<VectorType>(DeadTy)->getElementCount())
266 return OW_Unknown;
267 // Pointers.
268 Value *KillingPtr = KillingII->getArgOperand(1);
269 Value *DeadPtr = DeadII->getArgOperand(1);
270 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
271 return OW_Unknown;
272 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
273 // Masks.
274 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
275 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
276 return OW_Unknown;
277 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
278 // Masks.
279 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
280 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
281 return OW_Unknown;
282 // Lengths.
283 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
284 return OW_Unknown;
285 }
286 return OW_Complete;
287 }
288 default:
289 return OW_Unknown;
290 }
291}
292
293/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
294/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
295/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
296/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
297/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
298/// overwritten by a killing (smaller) store which doesn't write outside the big
299/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
300/// NOTE: This function must only be called if both \p KillingLoc and \p
301/// DeadLoc belong to the same underlying object with valid \p KillingOff and
302/// \p DeadOff.
303static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
304 const MemoryLocation &DeadLoc,
305 int64_t KillingOff, int64_t DeadOff,
306 Instruction *DeadI,
308 const uint64_t KillingSize = KillingLoc.Size.getValue();
309 const uint64_t DeadSize = DeadLoc.Size.getValue();
310 // We may now overlap, although the overlap is not complete. There might also
311 // be other incomplete overlaps, and together, they might cover the complete
312 // dead store.
313 // Note: The correctness of this logic depends on the fact that this function
314 // is not even called providing DepWrite when there are any intervening reads.
316 KillingOff < int64_t(DeadOff + DeadSize) &&
317 int64_t(KillingOff + KillingSize) >= DeadOff) {
318
319 // Insert our part of the overlap into the map.
320 auto &IM = IOL[DeadI];
321 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
322 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
323 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
324 << ")\n");
325
326 // Make sure that we only insert non-overlapping intervals and combine
327 // adjacent intervals. The intervals are stored in the map with the ending
328 // offset as the key (in the half-open sense) and the starting offset as
329 // the value.
330 int64_t KillingIntStart = KillingOff;
331 int64_t KillingIntEnd = KillingOff + KillingSize;
332
333 // Find any intervals ending at, or after, KillingIntStart which start
334 // before KillingIntEnd.
335 auto ILI = IM.lower_bound(KillingIntStart);
336 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
337 // This existing interval is overlapped with the current store somewhere
338 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
339 // intervals and adjusting our start and end.
340 KillingIntStart = std::min(KillingIntStart, ILI->second);
341 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
342 ILI = IM.erase(ILI);
343
344 // Continue erasing and adjusting our end in case other previous
345 // intervals are also overlapped with the current store.
346 //
347 // |--- dead 1 ---| |--- dead 2 ---|
348 // |------- killing---------|
349 //
350 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
351 assert(ILI->second > KillingIntStart && "Unexpected interval");
352 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
353 ILI = IM.erase(ILI);
354 }
355 }
356
357 IM[KillingIntEnd] = KillingIntStart;
358
359 ILI = IM.begin();
360 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
361 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
362 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
363 << ") Composite KillingLoc [" << ILI->second << ", "
364 << ILI->first << ")\n");
365 ++NumCompletePartials;
366 return OW_Complete;
367 }
368 }
369
370 // Check for a dead store which writes to all the memory locations that
371 // the killing store writes to.
372 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
373 int64_t(DeadOff + DeadSize) > KillingOff &&
374 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
375 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
376 << ", " << int64_t(DeadOff + DeadSize)
377 << ") by a killing store [" << KillingOff << ", "
378 << int64_t(KillingOff + KillingSize) << ")\n");
379 // TODO: Maybe come up with a better name?
380 return OW_PartialEarlierWithFullLater;
381 }
382
383 // Another interesting case is if the killing store overwrites the end of the
384 // dead store.
385 //
386 // |--dead--|
387 // |-- killing --|
388 //
389 // In this case we may want to trim the size of dead store to avoid
390 // generating stores to addresses which will definitely be overwritten killing
391 // store.
393 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
394 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
395 return OW_End;
396
397 // Finally, we also need to check if the killing store overwrites the
398 // beginning of the dead store.
399 //
400 // |--dead--|
401 // |-- killing --|
402 //
403 // In this case we may want to move the destination address and trim the size
404 // of dead store to avoid generating stores to addresses which will definitely
405 // be overwritten killing store.
407 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
408 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
409 "Expect to be handled as OW_Complete");
410 return OW_Begin;
411 }
412 // Otherwise, they don't completely overlap.
413 return OW_Unknown;
414}
415
416/// Returns true if the memory which is accessed by the second instruction is not
417/// modified between the first and the second instruction.
418/// Precondition: Second instruction must be dominated by the first
419/// instruction.
420static bool
423 DominatorTree *DT) {
424 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
425 // instructions which can modify the memory location accessed by SecondI.
426 //
427 // While doing the walk keep track of the address to check. It might be
428 // different in different basic blocks due to PHI translation.
429 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
431 // Keep track of the address we visited each block with. Bail out if we
432 // visit a block with different addresses.
434
435 BasicBlock::iterator FirstBBI(FirstI);
436 ++FirstBBI;
437 BasicBlock::iterator SecondBBI(SecondI);
438 BasicBlock *FirstBB = FirstI->getParent();
439 BasicBlock *SecondBB = SecondI->getParent();
440 MemoryLocation MemLoc;
441 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
442 MemLoc = MemoryLocation::getForDest(MemSet);
443 else
444 MemLoc = MemoryLocation::get(SecondI);
445
446 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
447
448 // Start checking the SecondBB.
449 WorkList.push_back(
450 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
451 bool isFirstBlock = true;
452
453 // Check all blocks going backward until we reach the FirstBB.
454 while (!WorkList.empty()) {
455 BlockAddressPair Current = WorkList.pop_back_val();
456 BasicBlock *B = Current.first;
457 PHITransAddr &Addr = Current.second;
458 Value *Ptr = Addr.getAddr();
459
460 // Ignore instructions before FirstI if this is the FirstBB.
461 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
462
464 if (isFirstBlock) {
465 // Ignore instructions after SecondI if this is the first visit of SecondBB.
466 assert(B == SecondBB && "first block is not the store block");
467 EI = SecondBBI;
468 isFirstBlock = false;
469 } else {
470 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
471 // In this case we also have to look at instructions after SecondI.
472 EI = B->end();
473 }
474 for (; BI != EI; ++BI) {
475 Instruction *I = &*BI;
476 if (I->mayWriteToMemory() && I != SecondI)
477 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
478 return false;
479 }
480 if (B != FirstBB) {
481 assert(B != &FirstBB->getParent()->getEntryBlock() &&
482 "Should not hit the entry block because SI must be dominated by LI");
483 for (BasicBlock *Pred : predecessors(B)) {
484 PHITransAddr PredAddr = Addr;
485 if (PredAddr.needsPHITranslationFromBlock(B)) {
486 if (!PredAddr.isPotentiallyPHITranslatable())
487 return false;
488 if (!PredAddr.translateValue(B, Pred, DT, false))
489 return false;
490 }
491 Value *TranslatedPtr = PredAddr.getAddr();
492 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
493 if (!Inserted.second) {
494 // We already visited this block before. If it was with a different
495 // address - bail out!
496 if (TranslatedPtr != Inserted.first->second)
497 return false;
498 // ... otherwise just skip it.
499 continue;
500 }
501 WorkList.push_back(std::make_pair(Pred, PredAddr));
502 }
503 }
504 }
505 return true;
506}
507
508static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
509 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
510 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
511 const DataLayout &DL = Inst->getDataLayout();
512 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
513 uint64_t DeadSliceOffsetInBits =
514 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
515 auto SetDeadFragExpr = [](auto *Assign,
516 DIExpression::FragmentInfo DeadFragment) {
517 // createFragmentExpression expects an offset relative to the existing
518 // fragment offset if there is one.
519 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
520 Assign->getExpression()
521 ->getFragmentInfo()
522 .value_or(DIExpression::FragmentInfo(0, 0))
523 .OffsetInBits;
525 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
526 Assign->setExpression(*NewExpr);
527 return;
528 }
529 // Failed to create a fragment expression for this so discard the value,
530 // making this a kill location.
532 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
533 DeadFragment.SizeInBits);
534 Assign->setExpression(Expr);
535 Assign->setKillLocation();
536 };
537
538 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
539 // link to any instructions. Created in the loop below (once).
540 DIAssignID *LinkToNothing = nullptr;
541 LLVMContext &Ctx = Inst->getContext();
542 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
543 if (!LinkToNothing)
544 LinkToNothing = DIAssignID::getDistinct(Ctx);
545 return LinkToNothing;
546 };
547
548 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
549 // overlapping dbg.assign intrinsic.
550 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
551 std::optional<DIExpression::FragmentInfo> NewFragment;
552 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
553 DeadSliceSizeInBits, Assign,
554 NewFragment) ||
555 !NewFragment) {
556 // We couldn't calculate the intersecting fragment for some reason. Be
557 // cautious and unlink the whole assignment from the store.
558 Assign->setKillAddress();
559 Assign->setAssignId(GetDeadLink());
560 continue;
561 }
562 // No intersect.
563 if (NewFragment->SizeInBits == 0)
564 continue;
565
566 // Fragments overlap: insert a new dbg.assign for this dead part.
567 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
568 NewAssign->insertAfter(Assign->getIterator());
569 NewAssign->setAssignId(GetDeadLink());
570 if (NewFragment)
571 SetDeadFragExpr(NewAssign, *NewFragment);
572 NewAssign->setKillAddress();
573 }
574}
575
576/// Update the attributes given that a memory access is updated (the
577/// dereferenced pointer could be moved forward when shortening a
578/// mem intrinsic).
579static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
580 uint64_t PtrOffset) {
581 // Remember old attributes.
582 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
583
584 // Find attributes that should be kept, and remove the rest.
585 AttributeMask AttrsToRemove;
586 for (auto &Attr : OldAttrs) {
587 if (Attr.hasKindAsEnum()) {
588 switch (Attr.getKindAsEnum()) {
589 default:
590 break;
591 case Attribute::Alignment:
592 // Only keep alignment if PtrOffset satisfy the alignment.
593 if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
594 continue;
595 break;
596 case Attribute::Dereferenceable:
597 case Attribute::DereferenceableOrNull:
598 // We could reduce the size of these attributes according to
599 // PtrOffset. But we simply drop these for now.
600 break;
601 case Attribute::NonNull:
602 case Attribute::NoUndef:
603 continue;
604 }
605 }
606 AttrsToRemove.addAttribute(Attr);
607 }
608
609 // Remove the attributes that should be dropped.
610 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
611}
612
613static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
614 uint64_t &DeadSize, int64_t KillingStart,
615 uint64_t KillingSize, bool IsOverwriteEnd) {
616 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
617 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
618
619 // We assume that memet/memcpy operates in chunks of the "largest" native
620 // type size and aligned on the same value. That means optimal start and size
621 // of memset/memcpy should be modulo of preferred alignment of that type. That
622 // is it there is no any sense in trying to reduce store size any further
623 // since any "extra" stores comes for free anyway.
624 // On the other hand, maximum alignment we can achieve is limited by alignment
625 // of initial store.
626
627 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
628 // "largest" native type.
629 // Note: What is the proper way to get that value?
630 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
631 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
632
633 int64_t ToRemoveStart = 0;
634 uint64_t ToRemoveSize = 0;
635 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
636 // maintained on the remaining store.
637 if (IsOverwriteEnd) {
638 // Calculate required adjustment for 'KillingStart' in order to keep
639 // remaining store size aligned on 'PerfAlign'.
640 uint64_t Off =
641 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
642 ToRemoveStart = KillingStart + Off;
643 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
644 return false;
645 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
646 } else {
647 ToRemoveStart = DeadStart;
648 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
649 "Not overlapping accesses?");
650 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
651 // Calculate required adjustment for 'ToRemoveSize'in order to keep
652 // start of the remaining store aligned on 'PerfAlign'.
653 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
654 if (Off != 0) {
655 if (ToRemoveSize <= (PrefAlign.value() - Off))
656 return false;
657 ToRemoveSize -= PrefAlign.value() - Off;
658 }
659 assert(isAligned(PrefAlign, ToRemoveSize) &&
660 "Should preserve selected alignment");
661 }
662
663 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
664 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
665
666 uint64_t NewSize = DeadSize - ToRemoveSize;
667 if (DeadIntrinsic->isAtomic()) {
668 // When shortening an atomic memory intrinsic, the newly shortened
669 // length must remain an integer multiple of the element size.
670 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
671 if (0 != NewSize % ElementSize)
672 return false;
673 }
674
675 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
676 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
677 << "\n KILLER [" << ToRemoveStart << ", "
678 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
679
680 DeadIntrinsic->setLength(NewSize);
681 DeadIntrinsic->setDestAlignment(PrefAlign);
682
683 Value *OrigDest = DeadIntrinsic->getRawDest();
684 if (!IsOverwriteEnd) {
685 Value *Indices[1] = {
686 ConstantInt::get(DeadIntrinsic->getLength()->getType(), ToRemoveSize)};
688 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
689 DeadI->getIterator());
690 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
691 DeadIntrinsic->setDest(NewDestGEP);
692 adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
693 }
694
695 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
696 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
697 IsOverwriteEnd);
698
699 // Finally update start and size of dead access.
700 if (!IsOverwriteEnd)
701 DeadStart += ToRemoveSize;
702 DeadSize = NewSize;
703
704 return true;
705}
706
708 int64_t &DeadStart, uint64_t &DeadSize) {
709 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
710 return false;
711
712 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
713 int64_t KillingStart = OII->second;
714 uint64_t KillingSize = OII->first - KillingStart;
715
716 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
717
718 if (KillingStart > DeadStart &&
719 // Note: "KillingStart - KillingStart" is known to be positive due to
720 // preceding check.
721 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
722 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
723 // be non negative due to preceding checks.
724 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
725 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
726 true)) {
727 IntervalMap.erase(OII);
728 return true;
729 }
730 }
731 return false;
732}
733
736 int64_t &DeadStart, uint64_t &DeadSize) {
738 return false;
739
740 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
741 int64_t KillingStart = OII->second;
742 uint64_t KillingSize = OII->first - KillingStart;
743
744 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
745
746 if (KillingStart <= DeadStart &&
747 // Note: "DeadStart - KillingStart" is known to be non negative due to
748 // preceding check.
749 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
750 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
751 // be positive due to preceding checks.
752 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
753 "Should have been handled as OW_Complete");
754 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
755 false)) {
756 IntervalMap.erase(OII);
757 return true;
758 }
759 }
760 return false;
761}
762
763static Constant *
765 int64_t KillingOffset, int64_t DeadOffset,
767 DominatorTree *DT) {
768
769 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
770 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
771 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
772 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
773 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
774 // If the store we find is:
775 // a) partially overwritten by the store to 'Loc'
776 // b) the killing store is fully contained in the dead one and
777 // c) they both have a constant value
778 // d) none of the two stores need padding
779 // Merge the two stores, replacing the dead store's value with a
780 // merge of both values.
781 // TODO: Deal with other constant types (vectors, etc), and probably
782 // some mem intrinsics (if needed)
783
784 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
785 APInt KillingValue =
786 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
787 unsigned KillingBits = KillingValue.getBitWidth();
788 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
789 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
790
791 // Offset of the smaller store inside the larger store
792 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
793 unsigned LShiftAmount =
794 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
795 : BitOffsetDiff;
796 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
797 LShiftAmount + KillingBits);
798 // Clear the bits we'll be replacing, then OR with the smaller
799 // store, shifted appropriately.
800 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
801 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
802 << "\n Killing: " << *KillingI
803 << "\n Merged Value: " << Merged << '\n');
804 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
805 }
806 return nullptr;
807}
808
809// Returns true if \p I is an intrinsic that does not read or write memory.
812 switch (II->getIntrinsicID()) {
813 case Intrinsic::lifetime_start:
814 case Intrinsic::lifetime_end:
815 case Intrinsic::invariant_end:
816 case Intrinsic::launder_invariant_group:
817 case Intrinsic::assume:
818 return true;
819 case Intrinsic::dbg_declare:
820 case Intrinsic::dbg_label:
821 case Intrinsic::dbg_value:
822 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
823 default:
824 return false;
825 }
826 }
827 return false;
828}
829
830// Check if we can ignore \p D for DSE.
831static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
832 Instruction *DI = D->getMemoryInst();
833 // Calls that only access inaccessible memory cannot read or write any memory
834 // locations we consider for elimination.
835 if (auto *CB = dyn_cast<CallBase>(DI))
836 if (CB->onlyAccessesInaccessibleMemory())
837 return true;
838
839 // We can eliminate stores to locations not visible to the caller across
840 // throwing instructions.
841 if (DI->mayThrow() && !DefVisibleToCaller)
842 return true;
843
844 // We can remove the dead stores, irrespective of the fence and its ordering
845 // (release/acquire/seq_cst). Fences only constraints the ordering of
846 // already visible stores, it does not make a store visible to other
847 // threads. So, skipping over a fence does not change a store from being
848 // dead.
849 if (isa<FenceInst>(DI))
850 return true;
851
852 // Skip intrinsics that do not really read or modify memory.
853 if (isNoopIntrinsic(DI))
854 return true;
855
856 return false;
857}
858
859namespace {
860
861// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
862// defined by `MemDef`.
863struct MemoryLocationWrapper {
864 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
865 bool DefByInitializesAttr)
866 : MemLoc(MemLoc), MemDef(MemDef),
867 DefByInitializesAttr(DefByInitializesAttr) {
868 assert(MemLoc.Ptr && "MemLoc should be not null");
869 UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
870 DefInst = MemDef->getMemoryInst();
871 }
872
873 MemoryLocation MemLoc;
874 const Value *UnderlyingObject;
875 MemoryDef *MemDef;
876 Instruction *DefInst;
877 bool DefByInitializesAttr = false;
878};
879
880// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
881// defined by this MemoryDef.
882struct MemoryDefWrapper {
883 MemoryDefWrapper(MemoryDef *MemDef,
884 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
885 DefInst = MemDef->getMemoryInst();
886 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
887 DefinedLocations.push_back(
888 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
889 }
890 Instruction *DefInst;
892};
893
894struct ArgumentInitInfo {
895 unsigned Idx;
896 bool IsDeadOrInvisibleOnUnwind;
897 ConstantRangeList Inits;
898};
899} // namespace
900
903 return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
904}
905
906// Return the intersected range list of the initializes attributes of "Args".
907// "Args" are call arguments that alias to each other.
908// If any argument in "Args" doesn't have dead_on_unwind attr and
909// "CallHasNoUnwindAttr" is false, return empty.
912 bool CallHasNoUnwindAttr) {
913 if (Args.empty())
914 return {};
915
916 // To address unwind, the function should have nounwind attribute or the
917 // arguments have dead or invisible on unwind. Otherwise, return empty.
918 for (const auto &Arg : Args) {
919 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
920 return {};
921 if (Arg.Inits.empty())
922 return {};
923 }
924
925 ConstantRangeList IntersectedIntervals = Args.front().Inits;
926 for (auto &Arg : Args.drop_front())
927 IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
928
929 return IntersectedIntervals;
930}
931
932namespace {
933
934struct DSEState {
935 Function &F;
936 AliasAnalysis &AA;
937 EarliestEscapeAnalysis EA;
938
939 /// The single BatchAA instance that is used to cache AA queries. It will
940 /// not be invalidated over the whole run. This is safe, because:
941 /// 1. Only memory writes are removed, so the alias cache for memory
942 /// locations remains valid.
943 /// 2. No new instructions are added (only instructions removed), so cached
944 /// information for a deleted value cannot be accessed by a re-used new
945 /// value pointer.
946 BatchAAResults BatchAA;
947
948 MemorySSA &MSSA;
949 DominatorTree &DT;
950 PostDominatorTree &PDT;
951 const TargetLibraryInfo &TLI;
952 const DataLayout &DL;
953 const LoopInfo &LI;
954
955 // Whether the function contains any irreducible control flow, useful for
956 // being accurately able to detect loops.
957 bool ContainsIrreducibleLoops;
958
959 // All MemoryDefs that potentially could kill other MemDefs.
961 // Any that should be skipped as they are already deleted
962 SmallPtrSet<MemoryAccess *, 4> SkipStores;
963 // Keep track whether a given object is captured before return or not.
964 DenseMap<const Value *, bool> CapturedBeforeReturn;
965 // Keep track of all of the objects that are invisible to the caller after
966 // the function returns.
967 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
968 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
969 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
970 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
971 // Post-order numbers for each basic block. Used to figure out if memory
972 // accesses are executed before another access.
973 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
974
975 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
976 /// basic block.
977 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
978 // Check if there are root nodes that are terminated by UnreachableInst.
979 // Those roots pessimize post-dominance queries. If there are such roots,
980 // fall back to CFG scan starting from all non-unreachable roots.
981 bool AnyUnreachableExit;
982
983 // Whether or not we should iterate on removing dead stores at the end of the
984 // function due to removing a store causing a previously captured pointer to
985 // no longer be captured.
986 bool ShouldIterateEndOfFunctionDSE;
987
988 /// Dead instructions to be removed at the end of DSE.
990
991 // Class contains self-reference, make sure it's not copied/moved.
992 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
993 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
994 const LoopInfo &LI);
995 DSEState(const DSEState &) = delete;
996 DSEState &operator=(const DSEState &) = delete;
997
998 LocationSize strengthenLocationSize(const Instruction *I,
999 LocationSize Size) const;
1000
1001 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1002 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1003 /// location (by \p DeadI instruction).
1004 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1005 /// \p DeadI, but they both write to the same underlying object. In that
1006 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1007 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1008 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1009 OverwriteResult isOverwrite(const Instruction *KillingI,
1010 const Instruction *DeadI,
1011 const MemoryLocation &KillingLoc,
1012 const MemoryLocation &DeadLoc,
1013 int64_t &KillingOff, int64_t &DeadOff);
1014
1015 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1016 const LocationSize StoreSize);
1017
1018 bool isInvisibleToCallerOnUnwind(const Value *V);
1019
1020 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
1021
1022 // Returns a list of <MemoryLocation, bool> pairs written by I.
1023 // The bool means whether the write is from Initializes attr.
1025 getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
1026
1027 /// Assuming this instruction has a dead analyzable write, can we delete
1028 /// this instruction?
1029 bool isRemovable(Instruction *I);
1030
1031 /// Returns true if \p UseInst completely overwrites \p DefLoc
1032 /// (stored by \p DefInst).
1033 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1034 Instruction *UseInst);
1035
1036 /// Returns true if \p Def is not read before returning from the function.
1037 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
1038
1039 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1040 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1041 /// indicating whether \p I is a free-like call.
1042 std::optional<std::pair<MemoryLocation, bool>>
1043 getLocForTerminator(Instruction *I) const;
1044
1045 /// Returns true if \p I is a memory terminator instruction like
1046 /// llvm.lifetime.end or free.
1047 bool isMemTerminatorInst(Instruction *I) const;
1048
1049 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1050 /// instruction \p AccessI.
1051 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1052 Instruction *MaybeTerm);
1053
1054 // Returns true if \p Use may read from \p DefLoc.
1055 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1056
1057 /// Returns true if a dependency between \p Current and \p KillingDef is
1058 /// guaranteed to be loop invariant for the loops that they are in. Either
1059 /// because they are known to be in the same block, in the same loop level or
1060 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1061 /// during execution of the containing function.
1062 bool isGuaranteedLoopIndependent(const Instruction *Current,
1063 const Instruction *KillingDef,
1064 const MemoryLocation &CurrentLoc);
1065
1066 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1067 /// loop. In particular, this guarantees that it only references a single
1068 /// MemoryLocation during execution of the containing function.
1069 bool isGuaranteedLoopInvariant(const Value *Ptr);
1070
1071 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1072 // with no read access between them or on any other path to a function exit
1073 // block if \p KillingLoc is not accessible after the function returns. If
1074 // there is no such MemoryDef, return std::nullopt. The returned value may not
1075 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1076 // encounter an aliasing MemoryUse (read).
1077 std::optional<MemoryAccess *>
1078 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1079 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1080 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1081 bool IsMemTerm, unsigned &PartialLimit,
1082 bool IsInitializesAttrMemLoc);
1083
1084 /// Delete dead memory defs and recursively add their operands to ToRemove if
1085 /// they became dead.
1086 void
1087 deleteDeadInstruction(Instruction *SI,
1088 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
1089
1090 // Check for any extra throws between \p KillingI and \p DeadI that block
1091 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1092 // MemoryDef that may throw are handled during the walk from one def to the
1093 // next.
1094 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1095 const Value *KillingUndObj);
1096
1097 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1098 // instructions act as barriers:
1099 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1100 // object.
1101 // * Atomic stores stronger that monotonic.
1102 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
1103
1104 /// Eliminate writes to objects that are not visible in the caller and are not
1105 /// accessed before returning from the function.
1106 bool eliminateDeadWritesAtEndOfFunction();
1107
1108 /// If we have a zero initializing memset following a call to malloc,
1109 /// try folding it into a call to calloc.
1110 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
1111
1112 // Check if there is a dominating condition, that implies that the value
1113 // being stored in a ptr is already present in the ptr.
1114 bool dominatingConditionImpliesValue(MemoryDef *Def);
1115
1116 /// \returns true if \p Def is a no-op store, either because it
1117 /// directly stores back a loaded value or stores zero to a calloced object.
1118 bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
1119
1120 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1121
1122 /// Eliminates writes to locations where the value that is being written
1123 /// is already stored at the same location.
1124 bool eliminateRedundantStoresOfExistingValues();
1125
1126 // Return the locations written by the initializes attribute.
1127 // Note that this function considers:
1128 // 1. Unwind edge: use "initializes" attribute only if the callee has
1129 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1130 // or the argument is invisible to caller on unwind. That is, we don't
1131 // perform incorrect DSE on unwind edges in the current function.
1132 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1133 // the intersected range list of their "initializes" attributes.
1134 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
1135
1136 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1137 // killed by `KillingLocWrapper.MemDef`. Return whether
1138 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1139 std::pair<bool, bool>
1140 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1141
1142 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1143 // change state: whether make any change.
1144 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1145};
1146
1147} // end anonymous namespace
1148
1149static void pushMemUses(MemoryAccess *Acc,
1152 for (Use &U : Acc->uses()) {
1153 auto *MA = cast<MemoryAccess>(U.getUser());
1154 if (Visited.insert(MA).second)
1155 WorkList.push_back(MA);
1156 }
1157}
1158
1159// Return true if "Arg" is function local and isn't captured before "CB".
1160static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
1162 const Value *UnderlyingObj = getUnderlyingObject(Arg);
1163 return isIdentifiedFunctionLocal(UnderlyingObj) &&
1165 EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt*/ true));
1166}
1167
1168DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1170 const TargetLibraryInfo &TLI, const LoopInfo &LI)
1171 : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT), PDT(PDT),
1172 TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1173 // Collect blocks with throwing instructions not modeled in MemorySSA and
1174 // alloc-like objects.
1175 unsigned PO = 0;
1176 for (BasicBlock *BB : post_order(&F)) {
1177 PostOrderNumbers[BB] = PO++;
1178 for (Instruction &I : *BB) {
1179 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1180 if (I.mayThrow() && !MA)
1181 ThrowingBlocks.insert(I.getParent());
1182
1183 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1184 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1185 (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
1187 MemDefs.push_back(MD);
1188 }
1189 }
1190
1191 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1192 // stores to them are dead at the end of the function.
1193 for (Argument &AI : F.args()) {
1194 if (AI.hasPassPointeeByValueCopyAttr()) {
1195 InvisibleToCallerAfterRet.insert({&AI, true});
1196 continue;
1197 }
1198
1199 if (!AI.getType()->isPointerTy())
1200 continue;
1201
1202 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1203 if (Info.coversAllReachableMemory())
1204 InvisibleToCallerAfterRet.insert({&AI, true});
1205 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1206 InvisibleToCallerAfterRetBounded.insert({&AI, DeadBytes});
1207 }
1208
1209 // Collect whether there is any irreducible control flow in the function.
1210 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
1211
1212 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
1213 return isa<UnreachableInst>(E->getTerminator());
1214 });
1215}
1216
1217LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1218 LocationSize Size) const {
1219 if (auto *CB = dyn_cast<CallBase>(I)) {
1220 LibFunc F;
1221 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
1222 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1223 // Use the precise location size specified by the 3rd argument
1224 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1225 // instruction. memset_chk will write either the amount specified as 3rd
1226 // argument or the function will immediately abort and exit the program.
1227 // NOTE: AA may determine NoAlias if it can prove that the access size
1228 // is larger than the allocation size due to that being UB. To avoid
1229 // returning potentially invalid NoAlias results by AA, limit the use of
1230 // the precise location size to isOverwrite.
1231 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1232 return LocationSize::precise(Len->getZExtValue());
1233 }
1234 }
1235 return Size;
1236}
1237
1238OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1239 const Instruction *DeadI,
1240 const MemoryLocation &KillingLoc,
1241 const MemoryLocation &DeadLoc,
1242 int64_t &KillingOff, int64_t &DeadOff) {
1243 // AliasAnalysis does not always account for loops. Limit overwrite checks
1244 // to dependencies for which we can guarantee they are independent of any
1245 // loops they are in.
1246 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1247 return OW_Unknown;
1248
1249 LocationSize KillingLocSize =
1250 strengthenLocationSize(KillingI, KillingLoc.Size);
1251 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1252 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1253 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1254 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1255
1256 // Check whether the killing store overwrites the whole object, in which
1257 // case the size/offset of the dead store does not matter.
1258 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1259 isIdentifiedObject(KillingUndObj)) {
1260 std::optional<TypeSize> KillingUndObjSize =
1261 getPointerSize(KillingUndObj, DL, TLI, &F);
1262 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1263 return OW_Complete;
1264 }
1265
1266 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1267 // get imprecise values here, though (except for unknown sizes).
1268 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1269 // In case no constant size is known, try to an IR values for the number
1270 // of bytes written and check if they match.
1271 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1272 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1273 if (KillingMemI && DeadMemI) {
1274 const Value *KillingV = KillingMemI->getLength();
1275 const Value *DeadV = DeadMemI->getLength();
1276 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1277 return OW_Complete;
1278 }
1279
1280 // Masked stores have imprecise locations, but we can reason about them
1281 // to some extent.
1282 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1283 }
1284
1285 const TypeSize KillingSize = KillingLocSize.getValue();
1286 const TypeSize DeadSize = DeadLoc.Size.getValue();
1287 // Bail on doing Size comparison which depends on AA for now
1288 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1289 const bool AnyScalable = DeadSize.isScalable() || KillingLocSize.isScalable();
1290
1291 if (AnyScalable)
1292 return OW_Unknown;
1293 // Query the alias information
1294 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1295
1296 // If the start pointers are the same, we just have to compare sizes to see if
1297 // the killing store was larger than the dead store.
1298 if (AAR == AliasResult::MustAlias) {
1299 // Make sure that the KillingSize size is >= the DeadSize size.
1300 if (KillingSize >= DeadSize)
1301 return OW_Complete;
1302 }
1303
1304 // If we hit a partial alias we may have a full overwrite
1305 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1306 int32_t Off = AAR.getOffset();
1307 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1308 return OW_Complete;
1309 }
1310
1311 // If we can't resolve the same pointers to the same object, then we can't
1312 // analyze them at all.
1313 if (DeadUndObj != KillingUndObj) {
1314 // Non aliasing stores to different objects don't overlap. Note that
1315 // if the killing store is known to overwrite whole object (out of
1316 // bounds access overwrites whole object as well) then it is assumed to
1317 // completely overwrite any store to the same object even if they don't
1318 // actually alias (see next check).
1319 if (AAR == AliasResult::NoAlias)
1320 return OW_None;
1321 return OW_Unknown;
1322 }
1323
1324 // Okay, we have stores to two completely different pointers. Try to
1325 // decompose the pointer into a "base + constant_offset" form. If the base
1326 // pointers are equal, then we can reason about the two stores.
1327 DeadOff = 0;
1328 KillingOff = 0;
1329 const Value *DeadBasePtr =
1330 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1331 const Value *KillingBasePtr =
1332 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1333
1334 // If the base pointers still differ, we have two completely different
1335 // stores.
1336 if (DeadBasePtr != KillingBasePtr)
1337 return OW_Unknown;
1338
1339 // The killing access completely overlaps the dead store if and only if
1340 // both start and end of the dead one is "inside" the killing one:
1341 // |<->|--dead--|<->|
1342 // |-----killing------|
1343 // Accesses may overlap if and only if start of one of them is "inside"
1344 // another one:
1345 // |<->|--dead--|<-------->|
1346 // |-------killing--------|
1347 // OR
1348 // |-------dead-------|
1349 // |<->|---killing---|<----->|
1350 //
1351 // We have to be careful here as *Off is signed while *.Size is unsigned.
1352
1353 // Check if the dead access starts "not before" the killing one.
1354 if (DeadOff >= KillingOff) {
1355 // If the dead access ends "not after" the killing access then the
1356 // dead one is completely overwritten by the killing one.
1357 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1358 return OW_Complete;
1359 // If start of the dead access is "before" end of the killing access
1360 // then accesses overlap.
1361 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1362 return OW_MaybePartial;
1363 }
1364 // If start of the killing access is "before" end of the dead access then
1365 // accesses overlap.
1366 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1367 return OW_MaybePartial;
1368 }
1369
1370 // Can reach here only if accesses are known not to overlap.
1371 return OW_None;
1372}
1373
1374bool DSEState::isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1375 const LocationSize StoreSize) {
1376 if (isa<AllocaInst>(V))
1377 return true;
1378
1379 auto IBounded = InvisibleToCallerAfterRetBounded.find(V);
1380 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1381 int64_t ValueOffset;
1382 [[maybe_unused]] const Value *BaseValue =
1383 GetPointerBaseWithConstantOffset(Ptr, ValueOffset, DL);
1384 // If we are not able to find a constant offset from the UO, we have to
1385 // pessimistically assume that the store writes to memory out of the
1386 // dead_on_return bounds.
1387 if (BaseValue != V)
1388 return false;
1389 // This store is only invisible after return if we are in bounds of the
1390 // range marked dead.
1391 if (StoreSize.hasValue() &&
1392 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1393 ValueOffset >= 0)
1394 return true;
1395 }
1396 auto I = InvisibleToCallerAfterRet.insert({V, false});
1397 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1398 I.first->second = capturesNothing(PointerMayBeCaptured(
1399 V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));
1400 return I.first->second;
1401}
1402
1403bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1404 bool RequiresNoCaptureBeforeUnwind;
1405 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1406 return false;
1407 if (!RequiresNoCaptureBeforeUnwind)
1408 return true;
1409
1410 auto I = CapturedBeforeReturn.insert({V, true});
1411 if (I.second)
1412 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1413 // with the killing MemoryDef. But we refrain from doing so for now to
1414 // limit compile-time and this does not cause any changes to the number
1415 // of stores removed on a large test set in practice.
1416 I.first->second = capturesAnything(PointerMayBeCaptured(
1417 V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));
1418 return !I.first->second;
1419}
1420
1421std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction *I) const {
1422 if (!I->mayWriteToMemory())
1423 return std::nullopt;
1424
1425 if (auto *CB = dyn_cast<CallBase>(I))
1426 return MemoryLocation::getForDest(CB, TLI);
1427
1429}
1430
1432DSEState::getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1434 if (isMemTerminatorInst(I)) {
1435 if (auto Loc = getLocForTerminator(I))
1436 Locations.push_back(std::make_pair(Loc->first, false));
1437 return Locations;
1438 }
1439
1440 if (auto Loc = getLocForWrite(I))
1441 Locations.push_back(std::make_pair(*Loc, false));
1442
1443 if (ConsiderInitializesAttr) {
1444 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1445 Locations.push_back(std::make_pair(MemLoc, true));
1446 }
1447 }
1448 return Locations;
1449}
1450
1451bool DSEState::isRemovable(Instruction *I) {
1452 assert(getLocForWrite(I) && "Must have analyzable write");
1453
1454 // Don't remove volatile/atomic stores.
1455 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1456 return SI->isUnordered();
1457
1458 if (auto *CB = dyn_cast<CallBase>(I)) {
1459 // Don't remove volatile memory intrinsics.
1460 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1461 return !MI->isVolatile();
1462
1463 // Never remove dead lifetime intrinsics, e.g. because they are followed
1464 // by a free.
1465 if (CB->isLifetimeStartOrEnd())
1466 return false;
1467
1468 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1469 !CB->isTerminator();
1470 }
1471
1472 return false;
1473}
1474
1475bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1476 Instruction *DefInst, Instruction *UseInst) {
1477 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1478 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1479 // MemoryDef.
1480 if (!UseInst->mayWriteToMemory())
1481 return false;
1482
1483 if (auto *CB = dyn_cast<CallBase>(UseInst))
1484 if (CB->onlyAccessesInaccessibleMemory())
1485 return false;
1486
1487 int64_t InstWriteOffset, DepWriteOffset;
1488 if (auto CC = getLocForWrite(UseInst))
1489 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1490 DepWriteOffset) == OW_Complete;
1491 return false;
1492}
1493
1494bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1495 const MemoryLocation &DefLoc) {
1496 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1497 << *Def->getMemoryInst()
1498 << ") is at the end the function \n");
1500 SmallPtrSet<MemoryAccess *, 8> Visited;
1501
1502 pushMemUses(Def, WorkList, Visited);
1503 for (unsigned I = 0; I < WorkList.size(); I++) {
1504 if (WorkList.size() >= MemorySSAScanLimit) {
1505 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1506 return false;
1507 }
1508
1509 MemoryAccess *UseAccess = WorkList[I];
1510 if (isa<MemoryPhi>(UseAccess)) {
1511 // AliasAnalysis does not account for loops. Limit elimination to
1512 // candidates for which we can guarantee they always store to the same
1513 // memory location.
1514 if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1515 return false;
1516
1517 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1518 continue;
1519 }
1520 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1521 // of times this is called and/or caching it.
1522 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1523 if (isReadClobber(DefLoc, UseInst)) {
1524 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1525 return false;
1526 }
1527
1528 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1529 pushMemUses(UseDef, WorkList, Visited);
1530 }
1531 return true;
1532}
1533
1534std::optional<std::pair<MemoryLocation, bool>>
1535DSEState::getLocForTerminator(Instruction *I) const {
1536 if (auto *CB = dyn_cast<CallBase>(I)) {
1537 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1538 return {
1539 std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};
1540 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1541 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1542 }
1543
1544 return std::nullopt;
1545}
1546
1547bool DSEState::isMemTerminatorInst(Instruction *I) const {
1548 auto *CB = dyn_cast<CallBase>(I);
1549 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1550 getFreedOperand(CB, &TLI) != nullptr);
1551}
1552
1553bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1554 Instruction *MaybeTerm) {
1555 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1556 getLocForTerminator(MaybeTerm);
1557
1558 if (!MaybeTermLoc)
1559 return false;
1560
1561 // If the terminator is a free-like call, all accesses to the underlying
1562 // object can be considered terminated.
1563 if (getUnderlyingObject(Loc.Ptr) !=
1564 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1565 return false;
1566
1567 auto TermLoc = MaybeTermLoc->first;
1568 if (MaybeTermLoc->second) {
1569 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1570 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1571 }
1572 int64_t InstWriteOffset = 0;
1573 int64_t DepWriteOffset = 0;
1574 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1575 DepWriteOffset) == OW_Complete;
1576}
1577
1578bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1579 Instruction *UseInst) {
1580 if (isNoopIntrinsic(UseInst))
1581 return false;
1582
1583 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1584 // treated as read clobber.
1585 if (auto SI = dyn_cast<StoreInst>(UseInst))
1586 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1587
1588 if (!UseInst->mayReadFromMemory())
1589 return false;
1590
1591 if (auto *CB = dyn_cast<CallBase>(UseInst))
1592 if (CB->onlyAccessesInaccessibleMemory())
1593 return false;
1594
1595 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1596}
1597
1598bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1599 const Instruction *KillingDef,
1600 const MemoryLocation &CurrentLoc) {
1601 // If the dependency is within the same block or loop level (being careful
1602 // of irreducible loops), we know that AA will return a valid result for the
1603 // memory dependency. (Both at the function level, outside of any loop,
1604 // would also be valid but we currently disable that to limit compile time).
1605 if (Current->getParent() == KillingDef->getParent())
1606 return true;
1607 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1608 if (!ContainsIrreducibleLoops && CurrentLI &&
1609 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1610 return true;
1611 // Otherwise check the memory location is invariant to any loops.
1612 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1613}
1614
1615bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1616 Ptr = Ptr->stripPointerCasts();
1617 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1618 if (GEP->hasAllConstantIndices())
1619 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1620
1621 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1622 return I->getParent()->isEntryBlock() ||
1623 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1624 }
1625 return true;
1626}
1627
1628std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1629 MemoryDef *KillingDef, MemoryAccess *StartAccess,
1630 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1631 unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1632 unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1633 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1634 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1635 return std::nullopt;
1636 }
1637
1638 MemoryAccess *Current = StartAccess;
1639 Instruction *KillingI = KillingDef->getMemoryInst();
1640 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1641
1642 // Only optimize defining access of KillingDef when directly starting at its
1643 // defining access. The defining access also must only access KillingLoc. At
1644 // the moment we only support instructions with a single write location, so
1645 // it should be sufficient to disable optimizations for instructions that
1646 // also read from memory.
1647 bool CanOptimize = OptimizeMemorySSA &&
1648 KillingDef->getDefiningAccess() == StartAccess &&
1649 !KillingI->mayReadFromMemory();
1650
1651 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1652 std::optional<MemoryLocation> CurrentLoc;
1653 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1654 LLVM_DEBUG({
1655 dbgs() << " visiting " << *Current;
1656 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1657 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1658 << ")";
1659 dbgs() << "\n";
1660 });
1661
1662 // Reached TOP.
1663 if (MSSA.isLiveOnEntryDef(Current)) {
1664 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1665 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1666 // The first clobbering def is... none.
1667 KillingDef->setOptimized(Current);
1668 return std::nullopt;
1669 }
1670
1671 // Cost of a step. Accesses in the same block are more likely to be valid
1672 // candidates for elimination, hence consider them cheaper.
1673 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1676 if (WalkerStepLimit <= StepCost) {
1677 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1678 return std::nullopt;
1679 }
1680 WalkerStepLimit -= StepCost;
1681
1682 // Return for MemoryPhis. They cannot be eliminated directly and the
1683 // caller is responsible for traversing them.
1684 if (isa<MemoryPhi>(Current)) {
1685 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1686 return Current;
1687 }
1688
1689 // Below, check if CurrentDef is a valid candidate to be eliminated by
1690 // KillingDef. If it is not, check the next candidate.
1691 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1692 Instruction *CurrentI = CurrentDef->getMemoryInst();
1693
1694 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1695 CanOptimize = false;
1696 continue;
1697 }
1698
1699 // Before we try to remove anything, check for any extra throwing
1700 // instructions that block us from DSEing
1701 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1702 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1703 return std::nullopt;
1704 }
1705
1706 // Check for anything that looks like it will be a barrier to further
1707 // removal
1708 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1709 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1710 return std::nullopt;
1711 }
1712
1713 // If Current is known to be on path that reads DefLoc or is a read
1714 // clobber, bail out, as the path is not profitable. We skip this check
1715 // for intrinsic calls, because the code knows how to handle memcpy
1716 // intrinsics.
1717 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1718 return std::nullopt;
1719
1720 // Quick check if there are direct uses that are read-clobbers.
1721 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1722 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1723 return !MSSA.dominates(StartAccess, UseOrDef) &&
1724 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1725 return false;
1726 })) {
1727 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1728 return std::nullopt;
1729 }
1730
1731 // If Current does not have an analyzable write location or is not
1732 // removable, skip it.
1733 CurrentLoc = getLocForWrite(CurrentI);
1734 if (!CurrentLoc || !isRemovable(CurrentI)) {
1735 CanOptimize = false;
1736 continue;
1737 }
1738
1739 // AliasAnalysis does not account for loops. Limit elimination to
1740 // candidates for which we can guarantee they always store to the same
1741 // memory location and not located in different loops.
1742 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1743 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1744 CanOptimize = false;
1745 continue;
1746 }
1747
1748 if (IsMemTerm) {
1749 // If the killing def is a memory terminator (e.g. lifetime.end), check
1750 // the next candidate if the current Current does not write the same
1751 // underlying object as the terminator.
1752 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1753 CanOptimize = false;
1754 continue;
1755 }
1756 } else {
1757 int64_t KillingOffset = 0;
1758 int64_t DeadOffset = 0;
1759 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1760 KillingOffset, DeadOffset);
1761 if (CanOptimize) {
1762 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1763 // optimized access. Do not optimize if CurrentDef is already the
1764 // defining access of KillingDef.
1765 if (CurrentDef != KillingDef->getDefiningAccess() &&
1766 (OR == OW_Complete || OR == OW_MaybePartial))
1767 KillingDef->setOptimized(CurrentDef);
1768
1769 // Once a may-aliasing def is encountered do not set an optimized
1770 // access.
1771 if (OR != OW_None)
1772 CanOptimize = false;
1773 }
1774
1775 // If Current does not write to the same object as KillingDef, check
1776 // the next candidate.
1777 if (OR == OW_Unknown || OR == OW_None)
1778 continue;
1779 else if (OR == OW_MaybePartial) {
1780 // If KillingDef only partially overwrites Current, check the next
1781 // candidate if the partial step limit is exceeded. This aggressively
1782 // limits the number of candidates for partial store elimination,
1783 // which are less likely to be removable in the end.
1784 if (PartialLimit <= 1) {
1785 WalkerStepLimit -= 1;
1786 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1787 "next access\n");
1788 continue;
1789 }
1790 PartialLimit -= 1;
1791 }
1792 }
1793 break;
1794 };
1795
1796 // Accesses to objects accessible after the function returns can only be
1797 // eliminated if the access is dead along all paths to the exit. Collect
1798 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1799 // they cover all paths from MaybeDeadAccess to any function exit.
1800 SmallPtrSet<Instruction *, 16> KillingDefs;
1801 KillingDefs.insert(KillingDef->getMemoryInst());
1802 MemoryAccess *MaybeDeadAccess = Current;
1803 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1804 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1805 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1806 << *MaybeDeadI << ")\n");
1807
1809 SmallPtrSet<MemoryAccess *, 32> Visited;
1810 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1811
1812 // Check if DeadDef may be read.
1813 for (unsigned I = 0; I < WorkList.size(); I++) {
1814 MemoryAccess *UseAccess = WorkList[I];
1815
1816 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1817 // Bail out if the number of accesses to check exceeds the scan limit.
1818 if (ScanLimit < (WorkList.size() - I)) {
1819 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1820 return std::nullopt;
1821 }
1822 --ScanLimit;
1823 NumDomMemDefChecks++;
1824
1825 if (isa<MemoryPhi>(UseAccess)) {
1826 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1827 return DT.properlyDominates(KI->getParent(), UseAccess->getBlock());
1828 })) {
1829 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1830 continue;
1831 }
1832 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1833 pushMemUses(UseAccess, WorkList, Visited);
1834 continue;
1835 }
1836
1837 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1838 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1839
1840 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1841 return DT.dominates(KI, UseInst);
1842 })) {
1843 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1844 continue;
1845 }
1846
1847 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1848 // MemoryAccesses. We do not have to check it's users.
1849 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1850 LLVM_DEBUG(
1851 dbgs()
1852 << " ... skipping, memterminator invalidates following accesses\n");
1853 continue;
1854 }
1855
1856 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1857 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1858 pushMemUses(UseAccess, WorkList, Visited);
1859 continue;
1860 }
1861
1862 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1863 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1864 return std::nullopt;
1865 }
1866
1867 // Uses which may read the original MemoryDef mean we cannot eliminate the
1868 // original MD. Stop walk.
1869 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1870 // the callee would be dominated by initializations, so it should be safe.
1871 bool IsKillingDefFromInitAttr = false;
1872 if (IsInitializesAttrMemLoc) {
1873 if (KillingI == UseInst &&
1874 KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1875 IsKillingDefFromInitAttr = true;
1876 }
1877
1878 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1879 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1880 return std::nullopt;
1881 }
1882
1883 // If this worklist walks back to the original memory access (and the
1884 // pointer is not guarenteed loop invariant) then we cannot assume that a
1885 // store kills itself.
1886 if (MaybeDeadAccess == UseAccess &&
1887 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1888 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1889 return std::nullopt;
1890 }
1891 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1892 // if it reads the memory location.
1893 // TODO: It would probably be better to check for self-reads before
1894 // calling the function.
1895 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1896 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1897 continue;
1898 }
1899
1900 // Check all uses for MemoryDefs, except for defs completely overwriting
1901 // the original location. Otherwise we have to check uses of *all*
1902 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1903 // miss cases like the following
1904 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1905 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1906 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1907 // (The Use points to the *first* Def it may alias)
1908 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1909 // stores [0,1]
1910 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1911 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1912 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1913 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1914 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1915 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1916 KillingLoc.Size)) {
1918 << " ... found killing def " << *UseInst << "\n");
1919 KillingDefs.insert(UseInst);
1920 }
1921 } else {
1923 << " ... found preceeding def " << *UseInst << "\n");
1924 return std::nullopt;
1925 }
1926 } else
1927 pushMemUses(UseDef, WorkList, Visited);
1928 }
1929 }
1930
1931 // For accesses to locations visible after the function returns, make sure
1932 // that the location is dead (=overwritten) along all paths from
1933 // MaybeDeadAccess to the exit.
1934 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1935 KillingLoc.Size)) {
1936 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1937 for (Instruction *KD : KillingDefs)
1938 KillingBlocks.insert(KD->getParent());
1939 assert(!KillingBlocks.empty() &&
1940 "Expected at least a single killing block");
1941
1942 // Find the common post-dominator of all killing blocks.
1943 BasicBlock *CommonPred = *KillingBlocks.begin();
1944 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1945 if (!CommonPred)
1946 break;
1947 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1948 }
1949
1950 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1951 // there is a path from MaybeDeadAccess to an exit not going through a
1952 // killing block.
1953 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1954 if (!AnyUnreachableExit)
1955 return std::nullopt;
1956
1957 // Fall back to CFG scan starting at all non-unreachable roots if not
1958 // all paths to the exit go through CommonPred.
1959 CommonPred = nullptr;
1960 }
1961
1962 // If CommonPred itself is in the set of killing blocks, we're done.
1963 if (KillingBlocks.count(CommonPred))
1964 return {MaybeDeadAccess};
1965
1966 SetVector<BasicBlock *> WorkList;
1967 // If CommonPred is null, there are multiple exits from the function.
1968 // They all have to be added to the worklist.
1969 if (CommonPred)
1970 WorkList.insert(CommonPred);
1971 else
1972 for (BasicBlock *R : PDT.roots()) {
1973 if (!isa<UnreachableInst>(R->getTerminator()))
1974 WorkList.insert(R);
1975 }
1976
1977 NumCFGTries++;
1978 // Check if all paths starting from an exit node go through one of the
1979 // killing blocks before reaching MaybeDeadAccess.
1980 for (unsigned I = 0; I < WorkList.size(); I++) {
1981 NumCFGChecks++;
1982 BasicBlock *Current = WorkList[I];
1983 if (KillingBlocks.count(Current))
1984 continue;
1985 if (Current == MaybeDeadAccess->getBlock())
1986 return std::nullopt;
1987
1988 // MaybeDeadAccess is reachable from the entry, so we don't have to
1989 // explore unreachable blocks further.
1990 if (!DT.isReachableFromEntry(Current))
1991 continue;
1992
1993 WorkList.insert_range(predecessors(Current));
1994
1995 if (WorkList.size() >= MemorySSAPathCheckLimit)
1996 return std::nullopt;
1997 }
1998 NumCFGSuccess++;
1999 }
2000
2001 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
2002 // potentially dead.
2003 return {MaybeDeadAccess};
2004}
2005
2006void DSEState::deleteDeadInstruction(Instruction *SI,
2007 SmallPtrSetImpl<MemoryAccess *> *Deleted) {
2008 MemorySSAUpdater Updater(&MSSA);
2009 SmallVector<Instruction *, 32> NowDeadInsts;
2010 NowDeadInsts.push_back(SI);
2011 --NumFastOther;
2012
2013 while (!NowDeadInsts.empty()) {
2014 Instruction *DeadInst = NowDeadInsts.pop_back_val();
2015 ++NumFastOther;
2016
2017 // Try to preserve debug information attached to the dead instruction.
2018 salvageDebugInfo(*DeadInst);
2019 salvageKnowledge(DeadInst);
2020
2021 // Remove the Instruction from MSSA.
2022 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
2023 bool IsMemDef = MA && isa<MemoryDef>(MA);
2024 if (MA) {
2025 if (IsMemDef) {
2026 auto *MD = cast<MemoryDef>(MA);
2027 SkipStores.insert(MD);
2028 if (Deleted)
2029 Deleted->insert(MD);
2030 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
2031 if (SI->getValueOperand()->getType()->isPointerTy()) {
2032 const Value *UO = getUnderlyingObject(SI->getValueOperand());
2033 if (CapturedBeforeReturn.erase(UO))
2034 ShouldIterateEndOfFunctionDSE = true;
2035 InvisibleToCallerAfterRet.erase(UO);
2036 InvisibleToCallerAfterRetBounded.erase(UO);
2037 }
2038 }
2039 }
2040
2041 Updater.removeMemoryAccess(MA);
2042 }
2043
2044 auto I = IOLs.find(DeadInst->getParent());
2045 if (I != IOLs.end())
2046 I->second.erase(DeadInst);
2047 // Remove its operands
2048 for (Use &O : DeadInst->operands())
2049 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
2050 O.set(PoisonValue::get(O->getType()));
2051 if (isInstructionTriviallyDead(OpI, &TLI))
2052 NowDeadInsts.push_back(OpI);
2053 }
2054
2055 EA.removeInstruction(DeadInst);
2056 // Remove memory defs directly if they don't produce results, but only
2057 // queue other dead instructions for later removal. They may have been
2058 // used as memory locations that have been cached by BatchAA. Removing
2059 // them here may lead to newly created instructions to be allocated at the
2060 // same address, yielding stale cache entries.
2061 if (IsMemDef && DeadInst->getType()->isVoidTy())
2062 DeadInst->eraseFromParent();
2063 else
2064 ToRemove.push_back(DeadInst);
2065 }
2066}
2067
2068bool DSEState::mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
2069 const Value *KillingUndObj) {
2070 // First see if we can ignore it by using the fact that KillingI is an
2071 // alloca/alloca like object that is not visible to the caller during
2072 // execution of the function.
2073 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
2074 return false;
2075
2076 if (KillingI->getParent() == DeadI->getParent())
2077 return ThrowingBlocks.count(KillingI->getParent());
2078 return !ThrowingBlocks.empty();
2079}
2080
2081bool DSEState::isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
2082 // If DeadI may throw it acts as a barrier, unless we are to an
2083 // alloca/alloca like object that does not escape.
2084 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
2085 return true;
2086
2087 // If DeadI is an atomic load/store stronger than monotonic, do not try to
2088 // eliminate/reorder it.
2089 if (DeadI->isAtomic()) {
2090 if (auto *LI = dyn_cast<LoadInst>(DeadI))
2091 return isStrongerThanMonotonic(LI->getOrdering());
2092 if (auto *SI = dyn_cast<StoreInst>(DeadI))
2093 return isStrongerThanMonotonic(SI->getOrdering());
2094 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
2095 return isStrongerThanMonotonic(ARMW->getOrdering());
2096 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
2097 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
2098 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
2099 llvm_unreachable("other instructions should be skipped in MemorySSA");
2100 }
2101 return false;
2102}
2103
2104bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2105 bool MadeChange = false;
2106 LLVM_DEBUG(
2107 dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2108 do {
2109 ShouldIterateEndOfFunctionDSE = false;
2110 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
2111 if (SkipStores.contains(Def))
2112 continue;
2113
2114 Instruction *DefI = Def->getMemoryInst();
2115 auto DefLoc = getLocForWrite(DefI);
2116 if (!DefLoc || !isRemovable(DefI)) {
2117 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2118 "instruction not removable.\n");
2119 continue;
2120 }
2121
2122 // NOTE: Currently eliminating writes at the end of a function is
2123 // limited to MemoryDefs with a single underlying object, to save
2124 // compile-time. In practice it appears the case with multiple
2125 // underlying objects is very uncommon. If it turns out to be important,
2126 // we can use getUnderlyingObjects here instead.
2127 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
2128 if (!isInvisibleToCallerAfterRet(UO, DefLoc->Ptr, DefLoc->Size))
2129 continue;
2130
2131 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
2132 // See through pointer-to-pointer bitcasts
2133 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2134 "of the function\n");
2136 ++NumFastStores;
2137 MadeChange = true;
2138 }
2139 }
2140 } while (ShouldIterateEndOfFunctionDSE);
2141 return MadeChange;
2142}
2143
2144bool DSEState::tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2145 Instruction *DefI = Def->getMemoryInst();
2146 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2147 if (!MemSet)
2148 // TODO: Could handle zero store to small allocation as well.
2149 return false;
2150 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2151 if (!StoredConstant || !StoredConstant->isNullValue())
2152 return false;
2153
2154 if (!isRemovable(DefI))
2155 // The memset might be volatile..
2156 return false;
2157
2158 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
2159 F.hasFnAttribute(Attribute::SanitizeAddress) ||
2160 F.hasFnAttribute(Attribute::SanitizeHWAddress) || F.getName() == "calloc")
2161 return false;
2162 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
2163 if (!Malloc)
2164 return false;
2165 auto *InnerCallee = Malloc->getCalledFunction();
2166 if (!InnerCallee)
2167 return false;
2168 LibFunc Func = NotLibFunc;
2169 StringRef ZeroedVariantName;
2170 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
2171 Func != LibFunc_malloc) {
2172 Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");
2173 if (!Attr.isValid())
2174 return false;
2175 ZeroedVariantName = Attr.getValueAsString();
2176 if (ZeroedVariantName.empty())
2177 return false;
2178 }
2179
2180 // Gracefully handle malloc with unexpected memory attributes.
2181 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
2182 if (!MallocDef)
2183 return false;
2184
2185 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2186 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2187 // of malloc block
2188 auto *MallocBB = Malloc->getParent(), *MemsetBB = Memset->getParent();
2189 if (MallocBB == MemsetBB)
2190 return true;
2191 auto *Ptr = Memset->getArgOperand(0);
2192 auto *TI = MallocBB->getTerminator();
2193 BasicBlock *TrueBB, *FalseBB;
2194 if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2195 m_Zero()),
2196 TrueBB, FalseBB)))
2197 return false;
2198 if (MemsetBB != FalseBB)
2199 return false;
2200 return true;
2201 };
2202
2203 if (Malloc->getOperand(0) != MemSet->getLength())
2204 return false;
2205 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
2206 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2207 return false;
2208 IRBuilder<> IRB(Malloc);
2209 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2210 Value *Calloc = nullptr;
2211 if (!ZeroedVariantName.empty()) {
2212 LLVMContext &Ctx = Malloc->getContext();
2213 AttributeList Attrs = InnerCallee->getAttributes();
2214 AllocFnKind AllocKind =
2215 Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
2216 AllocFnKind::Zeroed;
2217 AllocKind &= ~AllocFnKind::Uninitialized;
2218 Attrs =
2219 Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
2220 .removeFnAttribute(Ctx, "alloc-variant-zeroed");
2221 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2222 ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);
2223 cast<Function>(ZeroedVariant.getCallee())
2224 ->setCallingConv(Malloc->getCallingConv());
2226 Args.append(Malloc->arg_begin(), Malloc->arg_end());
2227 CallInst *CI = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);
2228 CI->setCallingConv(Malloc->getCallingConv());
2229 Calloc = CI;
2230 } else {
2231 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2232 Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
2233 IRB, TLI, Malloc->getType()->getPointerAddressSpace());
2234 }
2235 if (!Calloc)
2236 return false;
2237
2238 MemorySSAUpdater Updater(&MSSA);
2239 auto *NewAccess = Updater.createMemoryAccessAfter(cast<Instruction>(Calloc),
2240 nullptr, MallocDef);
2241 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2242 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2243 Malloc->replaceAllUsesWith(Calloc);
2245 return true;
2246}
2247
2248bool DSEState::dominatingConditionImpliesValue(MemoryDef *Def) {
2249 auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
2250 BasicBlock *StoreBB = StoreI->getParent();
2251 Value *StorePtr = StoreI->getPointerOperand();
2252 Value *StoreVal = StoreI->getValueOperand();
2253
2254 DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
2255 if (!IDom)
2256 return false;
2257
2258 auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
2259 if (!BI || !BI->isConditional())
2260 return false;
2261
2262 // In case both blocks are the same, it is not possible to determine
2263 // if optimization is possible. (We would not want to optimize a store
2264 // in the FalseBB if condition is true and vice versa.)
2265 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2266 return false;
2267
2268 Instruction *ICmpL;
2269 CmpPredicate Pred;
2270 if (!match(BI->getCondition(),
2271 m_c_ICmp(Pred,
2272 m_CombineAnd(m_Load(m_Specific(StorePtr)),
2273 m_Instruction(ICmpL)),
2274 m_Specific(StoreVal))) ||
2275 !ICmpInst::isEquality(Pred))
2276 return false;
2277
2278 // In case the else blocks also branches to the if block or the other way
2279 // around it is not possible to determine if the optimization is possible.
2280 if (Pred == ICmpInst::ICMP_EQ &&
2281 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
2282 StoreBB))
2283 return false;
2284
2285 if (Pred == ICmpInst::ICMP_NE &&
2286 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
2287 StoreBB))
2288 return false;
2289
2290 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
2291 MemoryAccess *ClobAcc =
2292 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2293
2294 return MSSA.dominates(ClobAcc, LoadAcc);
2295}
2296
2297bool DSEState::storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2298 Instruction *DefI = Def->getMemoryInst();
2299 StoreInst *Store = dyn_cast<StoreInst>(DefI);
2300 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2301 Constant *StoredConstant = nullptr;
2302 if (Store)
2303 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2304 else if (MemSet)
2305 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2306 else
2307 return false;
2308
2309 if (!isRemovable(DefI))
2310 return false;
2311
2312 if (StoredConstant) {
2313 Constant *InitC =
2314 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2315 // If the clobbering access is LiveOnEntry, no instructions between them
2316 // can modify the memory location.
2317 if (InitC && InitC == StoredConstant)
2318 return MSSA.isLiveOnEntryDef(
2319 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2320 }
2321
2322 if (!Store)
2323 return false;
2324
2325 if (dominatingConditionImpliesValue(Def))
2326 return true;
2327
2328 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2329 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2330 // Get the defining access for the load.
2331 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2332 // Fast path: the defining accesses are the same.
2333 if (LoadAccess == Def->getDefiningAccess())
2334 return true;
2335
2336 // Look through phi accesses. Recursively scan all phi accesses by
2337 // adding them to a worklist. Bail when we run into a memory def that
2338 // does not match LoadAccess.
2339 SetVector<MemoryAccess *> ToCheck;
2340 MemoryAccess *Current =
2341 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2342 // We don't want to bail when we run into the store memory def. But,
2343 // the phi access may point to it. So, pretend like we've already
2344 // checked it.
2345 ToCheck.insert(Def);
2346 ToCheck.insert(Current);
2347 // Start at current (1) to simulate already having checked Def.
2348 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2349 Current = ToCheck[I];
2350 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2351 // Check all the operands.
2352 for (auto &Use : PhiAccess->incoming_values())
2353 ToCheck.insert(cast<MemoryAccess>(&Use));
2354 continue;
2355 }
2356
2357 // If we found a memory def, bail. This happens when we have an
2358 // unrelated write in between an otherwise noop store.
2359 assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2360 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2361 // We are searching for the definition of the store's destination.
2362 // So, if that is the same definition as the load, then this is a
2363 // noop. Otherwise, fail.
2364 if (LoadAccess != Current)
2365 return false;
2366 }
2367 return true;
2368 }
2369 }
2370
2371 return false;
2372}
2373
2374bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2375 bool Changed = false;
2376 for (auto OI : IOL) {
2377 Instruction *DeadI = OI.first;
2378 MemoryLocation Loc = *getLocForWrite(DeadI);
2379 assert(isRemovable(DeadI) && "Expect only removable instruction");
2380
2381 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2382 int64_t DeadStart = 0;
2383 uint64_t DeadSize = Loc.Size.getValue();
2384 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2385 OverlapIntervalsTy &IntervalMap = OI.second;
2386 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2387 if (IntervalMap.empty())
2388 continue;
2389 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2390 }
2391 return Changed;
2392}
2393
2394bool DSEState::eliminateRedundantStoresOfExistingValues() {
2395 bool MadeChange = false;
2396 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2397 "already existing value\n");
2398 for (auto *Def : MemDefs) {
2399 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2400 continue;
2401
2402 Instruction *DefInst = Def->getMemoryInst();
2403 auto MaybeDefLoc = getLocForWrite(DefInst);
2404 if (!MaybeDefLoc || !isRemovable(DefInst))
2405 continue;
2406
2407 MemoryDef *UpperDef;
2408 // To conserve compile-time, we avoid walking to the next clobbering def.
2409 // Instead, we just try to get the optimized access, if it exists. DSE
2410 // will try to optimize defs during the earlier traversal.
2411 if (Def->isOptimized())
2412 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2413 else
2414 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2415 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2416 continue;
2417
2418 Instruction *UpperInst = UpperDef->getMemoryInst();
2419 auto IsRedundantStore = [&]() {
2420 // We don't care about differences in call attributes here.
2421 if (DefInst->isIdenticalToWhenDefined(UpperInst,
2422 /*IntersectAttrs=*/true))
2423 return true;
2424 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2425 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2426 // MemSetInst must have a write location.
2427 auto UpperLoc = getLocForWrite(UpperInst);
2428 if (!UpperLoc)
2429 return false;
2430 int64_t InstWriteOffset = 0;
2431 int64_t DepWriteOffset = 0;
2432 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2433 InstWriteOffset, DepWriteOffset);
2434 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2435 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2436 OR == OW_Complete;
2437 }
2438 }
2439 return false;
2440 };
2441
2442 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2443 continue;
2444 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2445 << '\n');
2446 deleteDeadInstruction(DefInst);
2447 NumRedundantStores++;
2448 MadeChange = true;
2449 }
2450 return MadeChange;
2451}
2452
2454DSEState::getInitializesArgMemLoc(const Instruction *I) {
2455 const CallBase *CB = dyn_cast<CallBase>(I);
2456 if (!CB)
2457 return {};
2458
2459 // Collect aliasing arguments and their initializes ranges.
2460 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2461 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2462 Value *CurArg = CB->getArgOperand(Idx);
2463 if (!CurArg->getType()->isPointerTy())
2464 continue;
2465
2466 ConstantRangeList Inits;
2467 Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2468 // initializes on byval arguments refers to the callee copy, not the
2469 // original memory the caller passed in.
2470 if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))
2471 Inits = InitializesAttr.getValueAsConstantRangeList();
2472
2473 // Check whether "CurArg" could alias with global variables. We require
2474 // either it's function local and isn't captured before or the "CB" only
2475 // accesses arg or inaccessible mem.
2476 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2477 !isFuncLocalAndNotCaptured(CurArg, CB, EA))
2478 Inits = ConstantRangeList();
2479
2480 // We don't perform incorrect DSE on unwind edges in the current function,
2481 // and use the "initializes" attribute to kill dead stores if:
2482 // - The call does not throw exceptions, "CB->doesNotThrow()".
2483 // - Or the callee parameter has "dead_on_unwind" attribute.
2484 // - Or the argument is invisible to caller on unwind, and there are no
2485 // unwind edges from this call in the current function (e.g. `CallInst`).
2486 bool IsDeadOrInvisibleOnUnwind =
2487 CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2488 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2489 ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2490 bool FoundAliasing = false;
2491 for (auto &[Arg, AliasList] : Arguments) {
2492 auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2494 if (AAR == AliasResult::NoAlias) {
2495 continue;
2496 } else if (AAR == AliasResult::MustAlias) {
2497 FoundAliasing = true;
2498 AliasList.push_back(InitInfo);
2499 } else {
2500 // For PartialAlias and MayAlias, there is an offset or may be an
2501 // unknown offset between the arguments and we insert an empty init
2502 // range to discard the entire initializes info while intersecting.
2503 FoundAliasing = true;
2504 AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2505 ConstantRangeList()});
2506 }
2507 }
2508 if (!FoundAliasing)
2509 Arguments[CurArg] = {InitInfo};
2510 }
2511
2513 for (const auto &[_, Args] : Arguments) {
2514 auto IntersectedRanges =
2516 if (IntersectedRanges.empty())
2517 continue;
2518
2519 for (const auto &Arg : Args) {
2520 for (const auto &Range : IntersectedRanges) {
2521 int64_t Start = Range.getLower().getSExtValue();
2522 int64_t End = Range.getUpper().getSExtValue();
2523 // For now, we only handle locations starting at offset 0.
2524 if (Start == 0)
2525 Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2526 LocationSize::precise(End - Start),
2527 CB->getAAMetadata()));
2528 }
2529 }
2530 }
2531 return Locations;
2532}
2533
2534std::pair<bool, bool>
2535DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2536 bool Changed = false;
2537 bool DeletedKillingLoc = false;
2538 unsigned ScanLimit = MemorySSAScanLimit;
2539 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2540 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2541 // Worklist of MemoryAccesses that may be killed by
2542 // "KillingLocWrapper.MemDef".
2543 SmallSetVector<MemoryAccess *, 8> ToCheck;
2544 // Track MemoryAccesses that have been deleted in the loop below, so we can
2545 // skip them. Don't use SkipStores for this, which may contain reused
2546 // MemoryAccess addresses.
2547 SmallPtrSet<MemoryAccess *, 8> Deleted;
2548 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2549 ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2550
2551 // Check if MemoryAccesses in the worklist are killed by
2552 // "KillingLocWrapper.MemDef".
2553 for (unsigned I = 0; I < ToCheck.size(); I++) {
2554 MemoryAccess *Current = ToCheck[I];
2555 if (Deleted.contains(Current))
2556 continue;
2557 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2558 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2559 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2560 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2561 KillingLocWrapper.DefByInitializesAttr);
2562
2563 if (!MaybeDeadAccess) {
2564 LLVM_DEBUG(dbgs() << " finished walk\n");
2565 continue;
2566 }
2567 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2568 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2569 if (isa<MemoryPhi>(DeadAccess)) {
2570 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2571 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2572 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2573 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2574 BasicBlock *PhiBlock = DeadAccess->getBlock();
2575
2576 // We only consider incoming MemoryAccesses that come before the
2577 // MemoryPhi. Otherwise we could discover candidates that do not
2578 // strictly dominate our starting def.
2579 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2580 ToCheck.insert(IncomingAccess);
2581 }
2582 continue;
2583 }
2584 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2585 // It would incorrectly consider a call instruction as redundant store
2586 // and remove this call instruction.
2587 // TODO: this conflates the existence of a MemoryLocation with being able
2588 // to delete the instruction. Fix isRemovable() to consider calls with
2589 // side effects that cannot be removed, e.g. calls with the initializes
2590 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2591 MemoryDefWrapper DeadDefWrapper(
2592 cast<MemoryDef>(DeadAccess),
2593 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2594 /*ConsiderInitializesAttr=*/false));
2595 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2596 MemoryLocationWrapper &DeadLocWrapper =
2597 DeadDefWrapper.DefinedLocations.front();
2598 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2599 ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2600 NumGetDomMemoryDefPassed++;
2601
2602 if (!DebugCounter::shouldExecute(MemorySSACounter))
2603 continue;
2604 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2605 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2606 continue;
2607 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2608 << *DeadLocWrapper.DefInst << "\n KILLER: "
2609 << *KillingLocWrapper.DefInst << '\n');
2610 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2611 ++NumFastStores;
2612 Changed = true;
2613 } else {
2614 // Check if DeadI overwrites KillingI.
2615 int64_t KillingOffset = 0;
2616 int64_t DeadOffset = 0;
2617 OverwriteResult OR =
2618 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2619 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2620 KillingOffset, DeadOffset);
2621 if (OR == OW_MaybePartial) {
2622 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2623 OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2624 KillingOffset, DeadOffset,
2625 DeadLocWrapper.DefInst, IOL);
2626 }
2627 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2628 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2629 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2630 // We are re-using tryToMergePartialOverlappingStores, which requires
2631 // DeadSI to dominate KillingSI.
2632 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2633 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2634 if (Constant *Merged = tryToMergePartialOverlappingStores(
2635 KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2636 &DT)) {
2637
2638 // Update stored value of earlier store to merged constant.
2639 DeadSI->setOperand(0, Merged);
2640 ++NumModifiedStores;
2641 Changed = true;
2642 DeletedKillingLoc = true;
2643
2644 // Remove killing store and remove any outstanding overlap
2645 // intervals for the updated store.
2646 deleteDeadInstruction(KillingSI, &Deleted);
2647 auto I = IOLs.find(DeadSI->getParent());
2648 if (I != IOLs.end())
2649 I->second.erase(DeadSI);
2650 break;
2651 }
2652 }
2653 }
2654 if (OR == OW_Complete) {
2655 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2656 << *DeadLocWrapper.DefInst << "\n KILLER: "
2657 << *KillingLocWrapper.DefInst << '\n');
2658 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2659 ++NumFastStores;
2660 Changed = true;
2661 }
2662 }
2663 }
2664
2665 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2666 "SkipStores and Deleted out of sync?");
2667
2668 return {Changed, DeletedKillingLoc};
2669}
2670
2671bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2672 if (KillingDefWrapper.DefinedLocations.empty()) {
2673 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2674 << *KillingDefWrapper.DefInst << "\n");
2675 return false;
2676 }
2677
2678 bool MadeChange = false;
2679 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2680 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2681 << *KillingLocWrapper.MemDef << " ("
2682 << *KillingLocWrapper.DefInst << ")\n");
2683 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2684 MadeChange |= Changed;
2685
2686 // Check if the store is a no-op.
2687 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2688 KillingLocWrapper.UnderlyingObject)) {
2689 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2690 << *KillingLocWrapper.DefInst << '\n');
2691 deleteDeadInstruction(KillingLocWrapper.DefInst);
2692 NumRedundantStores++;
2693 MadeChange = true;
2694 continue;
2695 }
2696 // Can we form a calloc from a memset/malloc pair?
2697 if (!DeletedKillingLoc &&
2698 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2699 KillingLocWrapper.UnderlyingObject)) {
2700 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2701 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2702 deleteDeadInstruction(KillingLocWrapper.DefInst);
2703 MadeChange = true;
2704 continue;
2705 }
2706 }
2707 return MadeChange;
2708}
2709
2712 const TargetLibraryInfo &TLI,
2713 const LoopInfo &LI) {
2714 bool MadeChange = false;
2715 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2716 // For each store:
2717 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2718 MemoryDef *KillingDef = State.MemDefs[I];
2719 if (State.SkipStores.count(KillingDef))
2720 continue;
2721
2722 MemoryDefWrapper KillingDefWrapper(
2723 KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2725 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2726 }
2727
2729 for (auto &KV : State.IOLs)
2730 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2731
2732 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2733 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2734
2735 while (!State.ToRemove.empty()) {
2736 Instruction *DeadInst = State.ToRemove.pop_back_val();
2737 DeadInst->eraseFromParent();
2738 }
2739
2740 return MadeChange;
2741}
2742
2743//===----------------------------------------------------------------------===//
2744// DSE Pass
2745//===----------------------------------------------------------------------===//
2750 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2752 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2753
2754 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2755
2756#ifdef LLVM_ENABLE_STATS
2758 for (auto &I : instructions(F))
2759 NumRemainingStores += isa<StoreInst>(&I);
2760#endif
2761
2762 if (!Changed)
2763 return PreservedAnalyses::all();
2764
2768 PA.preserve<LoopAnalysis>();
2769 return PA;
2770}
2771
2772namespace {
2773
2774/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2775class DSELegacyPass : public FunctionPass {
2776public:
2777 static char ID; // Pass identification, replacement for typeid
2778
2779 DSELegacyPass() : FunctionPass(ID) {
2781 }
2782
2783 bool runOnFunction(Function &F) override {
2784 if (skipFunction(F))
2785 return false;
2786
2787 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2788 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2789 const TargetLibraryInfo &TLI =
2790 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2791 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2792 PostDominatorTree &PDT =
2793 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2794 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2795
2796 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2797
2798#ifdef LLVM_ENABLE_STATS
2800 for (auto &I : instructions(F))
2801 NumRemainingStores += isa<StoreInst>(&I);
2802#endif
2803
2804 return Changed;
2805 }
2806
2807 void getAnalysisUsage(AnalysisUsage &AU) const override {
2808 AU.setPreservesCFG();
2809 AU.addRequired<AAResultsWrapperPass>();
2810 AU.addRequired<TargetLibraryInfoWrapperPass>();
2811 AU.addPreserved<GlobalsAAWrapperPass>();
2812 AU.addRequired<DominatorTreeWrapperPass>();
2813 AU.addPreserved<DominatorTreeWrapperPass>();
2814 AU.addRequired<PostDominatorTreeWrapperPass>();
2815 AU.addRequired<MemorySSAWrapperPass>();
2816 AU.addPreserved<PostDominatorTreeWrapperPass>();
2817 AU.addPreserved<MemorySSAWrapperPass>();
2818 AU.addRequired<LoopInfoWrapperPass>();
2819 AU.addPreserved<LoopInfoWrapperPass>();
2820 AU.addRequired<AssumptionCacheTracker>();
2821 }
2822};
2823
2824} // end anonymous namespace
2825
2826char DSELegacyPass::ID = 0;
2827
2828INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2829 false)
2839INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2840 false)
2841
2843 return new DSELegacyPass();
2844}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
MapVector< Instruction *, OverlapIntervalsTy > InstOverlapIntervalsTy
static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller)
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static bool isNoopIntrinsic(Instruction *I)
static ConstantRangeList getIntersectedInitRangeList(ArrayRef< ArgumentInitInfo > Args, bool CallHasNoUnwindAttr)
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static void pushMemUses(MemoryAccess *Acc, SmallVectorImpl< MemoryAccess * > &WorkList, SmallPtrSetImpl< MemoryAccess * > &Visited)
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, EarliestEscapeAnalysis &EA)
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, uint64_t PtrOffset)
Update the attributes given that a memory access is updated (the dereferenced pointer could be moved ...
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static bool hasInitializesAttr(Instruction *I)
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI, const LoopInfo &LI)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
early cse Early CSE w MemorySSA
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An immutable pass that tracks lazily created AssumptionCache objects.
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
LLVM_ABI bool onlyAccessesInaccessibleMemOrArgMem() const
Determine if the function may only access memory that is either inaccessible from the IR or pointed t...
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
LLVM_ABI Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
This class represents a list of constant ranges.
bool empty() const
Return true if this list contains no members.
LLVM_ABI ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
static DIAssignID * getDistinct(LLVMContext &Context)
DbgVariableFragmentInfo FragmentInfo
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
CaptureComponents getCapturesBefore(const Value *Object, const Instruction *I, bool OrAt) override
Return how Object may be captured before instruction I, considering only provenance captures.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
bool isEquality() const
Return true if this predicate is either EQ or NE.
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
const_iterator begin() const
bool empty() const
empty - Return true when no intervals are mapped.
const_iterator end() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool hasValue() const
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Value * getLength() const
Value * getValue() const
BasicBlock * getBlock() const
Definition MemorySSA.h:162
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
void setOptimized(MemoryAccess *MA)
Definition MemorySSA.h:392
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1053
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:993
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI MemorySSAWalker * getSkipSelfWalker()
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Value * getAddr() const
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
iterator begin() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
iterator_range< use_iterator > uses()
Definition Value.h:380
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgVariableRecord *DVRAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
@ Uninitialized
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
AllocFnKind
Definition Attributes.h:53
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1730
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< po_iterator< T > > post_order(const T &G)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:406
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition Alignment.h:186
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI FunctionPass * createDeadStoreEliminationPass()
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:336
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:332
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.