LLVM 17.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
54#include "llvm/IR/Argument.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/Constant.h"
57#include "llvm/IR/Constants.h"
58#include "llvm/IR/DataLayout.h"
59#include "llvm/IR/DebugInfo.h"
60#include "llvm/IR/Dominators.h"
61#include "llvm/IR/Function.h"
62#include "llvm/IR/IRBuilder.h"
64#include "llvm/IR/InstrTypes.h"
65#include "llvm/IR/Instruction.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/PassManager.h"
71#include "llvm/IR/Value.h"
73#include "llvm/Pass.h"
76#include "llvm/Support/Debug.h"
84#include <algorithm>
85#include <cassert>
86#include <cstdint>
87#include <iterator>
88#include <map>
89#include <optional>
90#include <utility>
91
92using namespace llvm;
93using namespace PatternMatch;
94
95#define DEBUG_TYPE "dse"
96
97STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99STATISTIC(NumFastStores, "Number of stores deleted");
100STATISTIC(NumFastOther, "Number of other instrs removed");
101STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102STATISTIC(NumModifiedStores, "Number of stores modified");
103STATISTIC(NumCFGChecks, "Number of stores modified");
104STATISTIC(NumCFGTries, "Number of stores modified");
105STATISTIC(NumCFGSuccess, "Number of stores modified");
106STATISTIC(NumGetDomMemoryDefPassed,
107 "Number of times a valid candidate is returned from getDomMemoryDef");
108STATISTIC(NumDomMemDefChecks,
109 "Number iterations check for reads in getDomMemoryDef");
110
111DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112 "Controls which MemoryDefs are eliminated.");
113
114static cl::opt<bool>
115EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial-overwrite tracking in DSE"));
118
119static cl::opt<bool>
120EnablePartialStoreMerging("enable-dse-partial-store-merging",
121 cl::init(true), cl::Hidden,
122 cl::desc("Enable partial store merging in DSE"));
123
125 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
126 cl::desc("The number of memory instructions to scan for "
127 "dead store elimination (default = 150)"));
129 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
130 cl::desc("The maximum number of steps while walking upwards to find "
131 "MemoryDefs that may be killed (default = 90)"));
132
134 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
135 cl::desc("The maximum number candidates that only partially overwrite the "
136 "killing MemoryDef to consider"
137 " (default = 5)"));
138
140 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
141 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
142 "other stores per basic block (default = 5000)"));
143
145 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
146 cl::desc(
147 "The cost of a step in the same basic block as the killing MemoryDef"
148 "(default = 1)"));
149
151 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
153 cl::desc("The cost of a step in a different basic "
154 "block than the killing MemoryDef"
155 "(default = 5)"));
156
158 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
159 cl::desc("The maximum number of blocks to check when trying to prove that "
160 "all paths to an exit go through a killing block (default = 50)"));
161
162// This flags allows or disallows DSE to optimize MemorySSA during its
163// traversal. Note that DSE optimizing MemorySSA may impact other passes
164// downstream of the DSE invocation and can lead to issues not being
165// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166// those cases, the flag can be used to check if DSE's MemorySSA optimizations
167// impact follow-up passes.
168static cl::opt<bool>
169 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
170 cl::desc("Allow DSE to optimize memory accesses."));
171
172//===----------------------------------------------------------------------===//
173// Helper functions
174//===----------------------------------------------------------------------===//
175using OverlapIntervalsTy = std::map<int64_t, int64_t>;
177
178/// Returns true if the end of this instruction can be safely shortened in
179/// length.
181 // Don't shorten stores for now
182 if (isa<StoreInst>(I))
183 return false;
184
185 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
186 switch (II->getIntrinsicID()) {
187 default: return false;
188 case Intrinsic::memset:
189 case Intrinsic::memcpy:
190 case Intrinsic::memcpy_element_unordered_atomic:
191 case Intrinsic::memset_element_unordered_atomic:
192 // Do shorten memory intrinsics.
193 // FIXME: Add memmove if it's also safe to transform.
194 return true;
195 }
196 }
197
198 // Don't shorten libcalls calls for now.
199
200 return false;
201}
202
203/// Returns true if the beginning of this instruction can be safely shortened
204/// in length.
206 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
207 // easily done by offsetting the source address.
208 return isa<AnyMemSetInst>(I);
209}
210
211static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
212 const TargetLibraryInfo &TLI,
213 const Function *F) {
215 ObjectSizeOpts Opts;
217
218 if (getObjectSize(V, Size, DL, &TLI, Opts))
219 return Size;
221}
222
223namespace {
224
225enum OverwriteResult {
226 OW_Begin,
227 OW_Complete,
228 OW_End,
229 OW_PartialEarlierWithFullLater,
230 OW_MaybePartial,
231 OW_None,
232 OW_Unknown
233};
234
235} // end anonymous namespace
236
237/// Check if two instruction are masked stores that completely
238/// overwrite one another. More specifically, \p KillingI has to
239/// overwrite \p DeadI.
240static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
241 const Instruction *DeadI,
242 BatchAAResults &AA) {
243 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
244 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
245 if (KillingII == nullptr || DeadII == nullptr)
246 return OW_Unknown;
247 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
248 return OW_Unknown;
249 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
250 // Type size.
251 VectorType *KillingTy =
252 cast<VectorType>(KillingII->getArgOperand(0)->getType());
253 VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
254 if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
255 return OW_Unknown;
256 // Element count.
257 if (KillingTy->getElementCount() != DeadTy->getElementCount())
258 return OW_Unknown;
259 // Pointers.
260 Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
261 Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
262 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
263 return OW_Unknown;
264 // Masks.
265 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
266 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
267 return OW_Unknown;
268 return OW_Complete;
269 }
270 return OW_Unknown;
271}
272
273/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
274/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
275/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
276/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
277/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
278/// overwritten by a killing (smaller) store which doesn't write outside the big
279/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
280/// NOTE: This function must only be called if both \p KillingLoc and \p
281/// DeadLoc belong to the same underlying object with valid \p KillingOff and
282/// \p DeadOff.
283static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
284 const MemoryLocation &DeadLoc,
285 int64_t KillingOff, int64_t DeadOff,
286 Instruction *DeadI,
288 const uint64_t KillingSize = KillingLoc.Size.getValue();
289 const uint64_t DeadSize = DeadLoc.Size.getValue();
290 // We may now overlap, although the overlap is not complete. There might also
291 // be other incomplete overlaps, and together, they might cover the complete
292 // dead store.
293 // Note: The correctness of this logic depends on the fact that this function
294 // is not even called providing DepWrite when there are any intervening reads.
296 KillingOff < int64_t(DeadOff + DeadSize) &&
297 int64_t(KillingOff + KillingSize) >= DeadOff) {
298
299 // Insert our part of the overlap into the map.
300 auto &IM = IOL[DeadI];
301 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
302 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
303 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
304 << ")\n");
305
306 // Make sure that we only insert non-overlapping intervals and combine
307 // adjacent intervals. The intervals are stored in the map with the ending
308 // offset as the key (in the half-open sense) and the starting offset as
309 // the value.
310 int64_t KillingIntStart = KillingOff;
311 int64_t KillingIntEnd = KillingOff + KillingSize;
312
313 // Find any intervals ending at, or after, KillingIntStart which start
314 // before KillingIntEnd.
315 auto ILI = IM.lower_bound(KillingIntStart);
316 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
317 // This existing interval is overlapped with the current store somewhere
318 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
319 // intervals and adjusting our start and end.
320 KillingIntStart = std::min(KillingIntStart, ILI->second);
321 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
322 ILI = IM.erase(ILI);
323
324 // Continue erasing and adjusting our end in case other previous
325 // intervals are also overlapped with the current store.
326 //
327 // |--- dead 1 ---| |--- dead 2 ---|
328 // |------- killing---------|
329 //
330 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
331 assert(ILI->second > KillingIntStart && "Unexpected interval");
332 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
333 ILI = IM.erase(ILI);
334 }
335 }
336
337 IM[KillingIntEnd] = KillingIntStart;
338
339 ILI = IM.begin();
340 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
341 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
342 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
343 << ") Composite KillingLoc [" << ILI->second << ", "
344 << ILI->first << ")\n");
345 ++NumCompletePartials;
346 return OW_Complete;
347 }
348 }
349
350 // Check for a dead store which writes to all the memory locations that
351 // the killing store writes to.
352 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
353 int64_t(DeadOff + DeadSize) > KillingOff &&
354 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
355 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
356 << ", " << int64_t(DeadOff + DeadSize)
357 << ") by a killing store [" << KillingOff << ", "
358 << int64_t(KillingOff + KillingSize) << ")\n");
359 // TODO: Maybe come up with a better name?
360 return OW_PartialEarlierWithFullLater;
361 }
362
363 // Another interesting case is if the killing store overwrites the end of the
364 // dead store.
365 //
366 // |--dead--|
367 // |-- killing --|
368 //
369 // In this case we may want to trim the size of dead store to avoid
370 // generating stores to addresses which will definitely be overwritten killing
371 // store.
373 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
374 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
375 return OW_End;
376
377 // Finally, we also need to check if the killing store overwrites the
378 // beginning of the dead store.
379 //
380 // |--dead--|
381 // |-- killing --|
382 //
383 // In this case we may want to move the destination address and trim the size
384 // of dead store to avoid generating stores to addresses which will definitely
385 // be overwritten killing store.
387 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
388 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
389 "Expect to be handled as OW_Complete");
390 return OW_Begin;
391 }
392 // Otherwise, they don't completely overlap.
393 return OW_Unknown;
394}
395
396/// Returns true if the memory which is accessed by the second instruction is not
397/// modified between the first and the second instruction.
398/// Precondition: Second instruction must be dominated by the first
399/// instruction.
400static bool
402 BatchAAResults &AA, const DataLayout &DL,
403 DominatorTree *DT) {
404 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
405 // instructions which can modify the memory location accessed by SecondI.
406 //
407 // While doing the walk keep track of the address to check. It might be
408 // different in different basic blocks due to PHI translation.
409 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
411 // Keep track of the address we visited each block with. Bail out if we
412 // visit a block with different addresses.
414
415 BasicBlock::iterator FirstBBI(FirstI);
416 ++FirstBBI;
417 BasicBlock::iterator SecondBBI(SecondI);
418 BasicBlock *FirstBB = FirstI->getParent();
419 BasicBlock *SecondBB = SecondI->getParent();
420 MemoryLocation MemLoc;
421 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
422 MemLoc = MemoryLocation::getForDest(MemSet);
423 else
424 MemLoc = MemoryLocation::get(SecondI);
425
426 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
427
428 // Start checking the SecondBB.
429 WorkList.push_back(
430 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
431 bool isFirstBlock = true;
432
433 // Check all blocks going backward until we reach the FirstBB.
434 while (!WorkList.empty()) {
435 BlockAddressPair Current = WorkList.pop_back_val();
436 BasicBlock *B = Current.first;
437 PHITransAddr &Addr = Current.second;
438 Value *Ptr = Addr.getAddr();
439
440 // Ignore instructions before FirstI if this is the FirstBB.
441 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
442
444 if (isFirstBlock) {
445 // Ignore instructions after SecondI if this is the first visit of SecondBB.
446 assert(B == SecondBB && "first block is not the store block");
447 EI = SecondBBI;
448 isFirstBlock = false;
449 } else {
450 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
451 // In this case we also have to look at instructions after SecondI.
452 EI = B->end();
453 }
454 for (; BI != EI; ++BI) {
455 Instruction *I = &*BI;
456 if (I->mayWriteToMemory() && I != SecondI)
457 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
458 return false;
459 }
460 if (B != FirstBB) {
461 assert(B != &FirstBB->getParent()->getEntryBlock() &&
462 "Should not hit the entry block because SI must be dominated by LI");
463 for (BasicBlock *Pred : predecessors(B)) {
464 PHITransAddr PredAddr = Addr;
465 if (PredAddr.needsPHITranslationFromBlock(B)) {
466 if (!PredAddr.isPotentiallyPHITranslatable())
467 return false;
468 if (!PredAddr.translateValue(B, Pred, DT, false))
469 return false;
470 }
471 Value *TranslatedPtr = PredAddr.getAddr();
472 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
473 if (!Inserted.second) {
474 // We already visited this block before. If it was with a different
475 // address - bail out!
476 if (TranslatedPtr != Inserted.first->second)
477 return false;
478 // ... otherwise just skip it.
479 continue;
480 }
481 WorkList.push_back(std::make_pair(Pred, PredAddr));
482 }
483 }
484 }
485 return true;
486}
487
488static void shortenAssignment(Instruction *Inst, uint64_t OldOffsetInBits,
489 uint64_t OldSizeInBits, uint64_t NewSizeInBits,
490 bool IsOverwriteEnd) {
491 DIExpression::FragmentInfo DeadFragment;
492 DeadFragment.SizeInBits = OldSizeInBits - NewSizeInBits;
493 DeadFragment.OffsetInBits =
494 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
495
496 auto CreateDeadFragExpr = [Inst, DeadFragment]() {
497 // FIXME: This should be using the DIExpression in the Alloca's dbg.assign
498 // for the variable, since that could also contain a fragment?
500 DIExpression::get(Inst->getContext(), std::nullopt),
501 DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
502 };
503
504 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
505 // link to any instructions. Created in the loop below (once).
506 DIAssignID *LinkToNothing = nullptr;
507
508 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
509 // overlapping dbg.assign intrinsic.
510 for (auto *DAI : at::getAssignmentMarkers(Inst)) {
511 if (auto FragInfo = DAI->getExpression()->getFragmentInfo()) {
512 if (!DIExpression::fragmentsOverlap(*FragInfo, DeadFragment))
513 continue;
514 }
515
516 // Fragments overlap: insert a new dbg.assign for this dead part.
517 auto *NewAssign = cast<DbgAssignIntrinsic>(DAI->clone());
518 NewAssign->insertAfter(DAI);
519 if (!LinkToNothing)
520 LinkToNothing = DIAssignID::getDistinct(Inst->getContext());
521 NewAssign->setAssignId(LinkToNothing);
522 NewAssign->setExpression(CreateDeadFragExpr());
523 NewAssign->setKillAddress();
524 }
525}
526
527static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
528 uint64_t &DeadSize, int64_t KillingStart,
529 uint64_t KillingSize, bool IsOverwriteEnd) {
530 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
531 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
532
533 // We assume that memet/memcpy operates in chunks of the "largest" native
534 // type size and aligned on the same value. That means optimal start and size
535 // of memset/memcpy should be modulo of preferred alignment of that type. That
536 // is it there is no any sense in trying to reduce store size any further
537 // since any "extra" stores comes for free anyway.
538 // On the other hand, maximum alignment we can achieve is limited by alignment
539 // of initial store.
540
541 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
542 // "largest" native type.
543 // Note: What is the proper way to get that value?
544 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
545 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
546
547 int64_t ToRemoveStart = 0;
548 uint64_t ToRemoveSize = 0;
549 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
550 // maintained on the remaining store.
551 if (IsOverwriteEnd) {
552 // Calculate required adjustment for 'KillingStart' in order to keep
553 // remaining store size aligned on 'PerfAlign'.
554 uint64_t Off =
555 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
556 ToRemoveStart = KillingStart + Off;
557 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
558 return false;
559 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
560 } else {
561 ToRemoveStart = DeadStart;
562 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
563 "Not overlapping accesses?");
564 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
565 // Calculate required adjustment for 'ToRemoveSize'in order to keep
566 // start of the remaining store aligned on 'PerfAlign'.
567 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
568 if (Off != 0) {
569 if (ToRemoveSize <= (PrefAlign.value() - Off))
570 return false;
571 ToRemoveSize -= PrefAlign.value() - Off;
572 }
573 assert(isAligned(PrefAlign, ToRemoveSize) &&
574 "Should preserve selected alignment");
575 }
576
577 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
578 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
579
580 uint64_t NewSize = DeadSize - ToRemoveSize;
581 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
582 // When shortening an atomic memory intrinsic, the newly shortened
583 // length must remain an integer multiple of the element size.
584 const uint32_t ElementSize = AMI->getElementSizeInBytes();
585 if (0 != NewSize % ElementSize)
586 return false;
587 }
588
589 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
590 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
591 << "\n KILLER [" << ToRemoveStart << ", "
592 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
593
594 Value *DeadWriteLength = DeadIntrinsic->getLength();
595 Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
596 DeadIntrinsic->setLength(TrimmedLength);
597 DeadIntrinsic->setDestAlignment(PrefAlign);
598
599 if (!IsOverwriteEnd) {
600 Value *OrigDest = DeadIntrinsic->getRawDest();
601 Type *Int8PtrTy =
602 Type::getInt8PtrTy(DeadIntrinsic->getContext(),
603 OrigDest->getType()->getPointerAddressSpace());
604 Value *Dest = OrigDest;
605 if (OrigDest->getType() != Int8PtrTy)
606 Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
607 Value *Indices[1] = {
608 ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
610 Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
611 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
612 if (NewDestGEP->getType() != OrigDest->getType())
613 NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
614 "", DeadI);
615 DeadIntrinsic->setDest(NewDestGEP);
616 }
617
618 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
619 shortenAssignment(DeadI, DeadStart * 8, DeadSize * 8, NewSize * 8,
620 IsOverwriteEnd);
621
622 // Finally update start and size of dead access.
623 if (!IsOverwriteEnd)
624 DeadStart += ToRemoveSize;
625 DeadSize = NewSize;
626
627 return true;
628}
629
631 int64_t &DeadStart, uint64_t &DeadSize) {
632 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
633 return false;
634
635 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
636 int64_t KillingStart = OII->second;
637 uint64_t KillingSize = OII->first - KillingStart;
638
639 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
640
641 if (KillingStart > DeadStart &&
642 // Note: "KillingStart - KillingStart" is known to be positive due to
643 // preceding check.
644 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
645 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
646 // be non negative due to preceding checks.
647 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
648 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
649 true)) {
650 IntervalMap.erase(OII);
651 return true;
652 }
653 }
654 return false;
655}
656
659 int64_t &DeadStart, uint64_t &DeadSize) {
661 return false;
662
663 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
664 int64_t KillingStart = OII->second;
665 uint64_t KillingSize = OII->first - KillingStart;
666
667 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
668
669 if (KillingStart <= DeadStart &&
670 // Note: "DeadStart - KillingStart" is known to be non negative due to
671 // preceding check.
672 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
673 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
674 // be positive due to preceding checks.
675 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
676 "Should have been handled as OW_Complete");
677 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
678 false)) {
679 IntervalMap.erase(OII);
680 return true;
681 }
682 }
683 return false;
684}
685
686static Constant *
688 int64_t KillingOffset, int64_t DeadOffset,
689 const DataLayout &DL, BatchAAResults &AA,
690 DominatorTree *DT) {
691
692 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
693 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
694 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
695 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
696 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
697 // If the store we find is:
698 // a) partially overwritten by the store to 'Loc'
699 // b) the killing store is fully contained in the dead one and
700 // c) they both have a constant value
701 // d) none of the two stores need padding
702 // Merge the two stores, replacing the dead store's value with a
703 // merge of both values.
704 // TODO: Deal with other constant types (vectors, etc), and probably
705 // some mem intrinsics (if needed)
706
707 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
708 APInt KillingValue =
709 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
710 unsigned KillingBits = KillingValue.getBitWidth();
711 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
712 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
713
714 // Offset of the smaller store inside the larger store
715 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
716 unsigned LShiftAmount =
717 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
718 : BitOffsetDiff;
719 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
720 LShiftAmount + KillingBits);
721 // Clear the bits we'll be replacing, then OR with the smaller
722 // store, shifted appropriately.
723 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
724 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
725 << "\n Killing: " << *KillingI
726 << "\n Merged Value: " << Merged << '\n');
727 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
728 }
729 return nullptr;
730}
731
732namespace {
733// Returns true if \p I is an intrinsic that does not read or write memory.
734bool isNoopIntrinsic(Instruction *I) {
735 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
736 switch (II->getIntrinsicID()) {
737 case Intrinsic::lifetime_start:
738 case Intrinsic::lifetime_end:
739 case Intrinsic::invariant_end:
740 case Intrinsic::launder_invariant_group:
741 case Intrinsic::assume:
742 return true;
743 case Intrinsic::dbg_declare:
744 case Intrinsic::dbg_label:
745 case Intrinsic::dbg_value:
746 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
747 default:
748 return false;
749 }
750 }
751 return false;
752}
753
754// Check if we can ignore \p D for DSE.
755bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
756 Instruction *DI = D->getMemoryInst();
757 // Calls that only access inaccessible memory cannot read or write any memory
758 // locations we consider for elimination.
759 if (auto *CB = dyn_cast<CallBase>(DI))
760 if (CB->onlyAccessesInaccessibleMemory())
761 return true;
762
763 // We can eliminate stores to locations not visible to the caller across
764 // throwing instructions.
765 if (DI->mayThrow() && !DefVisibleToCaller)
766 return true;
767
768 // We can remove the dead stores, irrespective of the fence and its ordering
769 // (release/acquire/seq_cst). Fences only constraints the ordering of
770 // already visible stores, it does not make a store visible to other
771 // threads. So, skipping over a fence does not change a store from being
772 // dead.
773 if (isa<FenceInst>(DI))
774 return true;
775
776 // Skip intrinsics that do not really read or modify memory.
777 if (isNoopIntrinsic(DI))
778 return true;
779
780 return false;
781}
782
783struct DSEState {
784 Function &F;
785 AliasAnalysis &AA;
787
788 /// The single BatchAA instance that is used to cache AA queries. It will
789 /// not be invalidated over the whole run. This is safe, because:
790 /// 1. Only memory writes are removed, so the alias cache for memory
791 /// locations remains valid.
792 /// 2. No new instructions are added (only instructions removed), so cached
793 /// information for a deleted value cannot be accessed by a re-used new
794 /// value pointer.
795 BatchAAResults BatchAA;
796
797 MemorySSA &MSSA;
798 DominatorTree &DT;
800 const TargetLibraryInfo &TLI;
801 const DataLayout &DL;
802 const LoopInfo &LI;
803
804 // Whether the function contains any irreducible control flow, useful for
805 // being accurately able to detect loops.
806 bool ContainsIrreducibleLoops;
807
808 // All MemoryDefs that potentially could kill other MemDefs.
810 // Any that should be skipped as they are already deleted
812 // Keep track whether a given object is captured before return or not.
813 DenseMap<const Value *, bool> CapturedBeforeReturn;
814 // Keep track of all of the objects that are invisible to the caller after
815 // the function returns.
816 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
817 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
818 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
819 // Post-order numbers for each basic block. Used to figure out if memory
820 // accesses are executed before another access.
821 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
822 // Values that are only used with assumes. Used to refine pointer escape
823 // analysis.
825
826 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
827 /// basic block.
829 // Check if there are root nodes that are terminated by UnreachableInst.
830 // Those roots pessimize post-dominance queries. If there are such roots,
831 // fall back to CFG scan starting from all non-unreachable roots.
832 bool AnyUnreachableExit;
833
834 // Whether or not we should iterate on removing dead stores at the end of the
835 // function due to removing a store causing a previously captured pointer to
836 // no longer be captured.
837 bool ShouldIterateEndOfFunctionDSE;
838
839 // Class contains self-reference, make sure it's not copied/moved.
840 DSEState(const DSEState &) = delete;
841 DSEState &operator=(const DSEState &) = delete;
842
843 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
845 const TargetLibraryInfo &TLI, const LoopInfo &LI)
846 : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
847 DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
848 // Collect blocks with throwing instructions not modeled in MemorySSA and
849 // alloc-like objects.
850 unsigned PO = 0;
851 for (BasicBlock *BB : post_order(&F)) {
852 PostOrderNumbers[BB] = PO++;
853 for (Instruction &I : *BB) {
854 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
855 if (I.mayThrow() && !MA)
856 ThrowingBlocks.insert(I.getParent());
857
858 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
859 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
860 (getLocForWrite(&I) || isMemTerminatorInst(&I)))
861 MemDefs.push_back(MD);
862 }
863 }
864
865 // Treat byval or inalloca arguments the same as Allocas, stores to them are
866 // dead at the end of the function.
867 for (Argument &AI : F.args())
868 if (AI.hasPassPointeeByValueCopyAttr())
869 InvisibleToCallerAfterRet.insert({&AI, true});
870
871 // Collect whether there is any irreducible control flow in the function.
872 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
873
874 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
875 return isa<UnreachableInst>(E->getTerminator());
876 });
877
878 CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
879 }
880
881 LocationSize strengthenLocationSize(const Instruction *I,
882 LocationSize Size) const {
883 if (auto *CB = dyn_cast<CallBase>(I)) {
884 LibFunc F;
885 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
886 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
887 // Use the precise location size specified by the 3rd argument
888 // for determining KillingI overwrites DeadLoc if it is a memset_chk
889 // instruction. memset_chk will write either the amount specified as 3rd
890 // argument or the function will immediately abort and exit the program.
891 // NOTE: AA may determine NoAlias if it can prove that the access size
892 // is larger than the allocation size due to that being UB. To avoid
893 // returning potentially invalid NoAlias results by AA, limit the use of
894 // the precise location size to isOverwrite.
895 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
896 return LocationSize::precise(Len->getZExtValue());
897 }
898 }
899 return Size;
900 }
901
902 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
903 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
904 /// location (by \p DeadI instruction).
905 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
906 /// \p DeadI, but they both write to the same underlying object. In that
907 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
908 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
909 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
910 OverwriteResult isOverwrite(const Instruction *KillingI,
911 const Instruction *DeadI,
912 const MemoryLocation &KillingLoc,
913 const MemoryLocation &DeadLoc,
914 int64_t &KillingOff, int64_t &DeadOff) {
915 // AliasAnalysis does not always account for loops. Limit overwrite checks
916 // to dependencies for which we can guarantee they are independent of any
917 // loops they are in.
918 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
919 return OW_Unknown;
920
921 LocationSize KillingLocSize =
922 strengthenLocationSize(KillingI, KillingLoc.Size);
923 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
924 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
925 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
926 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
927
928 // Check whether the killing store overwrites the whole object, in which
929 // case the size/offset of the dead store does not matter.
930 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise()) {
931 uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
932 if (KillingUndObjSize != MemoryLocation::UnknownSize &&
933 KillingUndObjSize == KillingLocSize.getValue())
934 return OW_Complete;
935 }
936
937 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
938 // get imprecise values here, though (except for unknown sizes).
939 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
940 // In case no constant size is known, try to an IR values for the number
941 // of bytes written and check if they match.
942 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
943 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
944 if (KillingMemI && DeadMemI) {
945 const Value *KillingV = KillingMemI->getLength();
946 const Value *DeadV = DeadMemI->getLength();
947 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
948 return OW_Complete;
949 }
950
951 // Masked stores have imprecise locations, but we can reason about them
952 // to some extent.
953 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
954 }
955
956 const uint64_t KillingSize = KillingLocSize.getValue();
957 const uint64_t DeadSize = DeadLoc.Size.getValue();
958
959 // Query the alias information
960 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
961
962 // If the start pointers are the same, we just have to compare sizes to see if
963 // the killing store was larger than the dead store.
964 if (AAR == AliasResult::MustAlias) {
965 // Make sure that the KillingSize size is >= the DeadSize size.
966 if (KillingSize >= DeadSize)
967 return OW_Complete;
968 }
969
970 // If we hit a partial alias we may have a full overwrite
971 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
972 int32_t Off = AAR.getOffset();
973 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
974 return OW_Complete;
975 }
976
977 // If we can't resolve the same pointers to the same object, then we can't
978 // analyze them at all.
979 if (DeadUndObj != KillingUndObj) {
980 // Non aliasing stores to different objects don't overlap. Note that
981 // if the killing store is known to overwrite whole object (out of
982 // bounds access overwrites whole object as well) then it is assumed to
983 // completely overwrite any store to the same object even if they don't
984 // actually alias (see next check).
985 if (AAR == AliasResult::NoAlias)
986 return OW_None;
987 return OW_Unknown;
988 }
989
990 // Okay, we have stores to two completely different pointers. Try to
991 // decompose the pointer into a "base + constant_offset" form. If the base
992 // pointers are equal, then we can reason about the two stores.
993 DeadOff = 0;
994 KillingOff = 0;
995 const Value *DeadBasePtr =
996 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
997 const Value *KillingBasePtr =
998 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
999
1000 // If the base pointers still differ, we have two completely different
1001 // stores.
1002 if (DeadBasePtr != KillingBasePtr)
1003 return OW_Unknown;
1004
1005 // The killing access completely overlaps the dead store if and only if
1006 // both start and end of the dead one is "inside" the killing one:
1007 // |<->|--dead--|<->|
1008 // |-----killing------|
1009 // Accesses may overlap if and only if start of one of them is "inside"
1010 // another one:
1011 // |<->|--dead--|<-------->|
1012 // |-------killing--------|
1013 // OR
1014 // |-------dead-------|
1015 // |<->|---killing---|<----->|
1016 //
1017 // We have to be careful here as *Off is signed while *.Size is unsigned.
1018
1019 // Check if the dead access starts "not before" the killing one.
1020 if (DeadOff >= KillingOff) {
1021 // If the dead access ends "not after" the killing access then the
1022 // dead one is completely overwritten by the killing one.
1023 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1024 return OW_Complete;
1025 // If start of the dead access is "before" end of the killing access
1026 // then accesses overlap.
1027 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1028 return OW_MaybePartial;
1029 }
1030 // If start of the killing access is "before" end of the dead access then
1031 // accesses overlap.
1032 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1033 return OW_MaybePartial;
1034 }
1035
1036 // Can reach here only if accesses are known not to overlap.
1037 return OW_None;
1038 }
1039
1040 bool isInvisibleToCallerAfterRet(const Value *V) {
1041 if (isa<AllocaInst>(V))
1042 return true;
1043 auto I = InvisibleToCallerAfterRet.insert({V, false});
1044 if (I.second) {
1045 if (!isInvisibleToCallerOnUnwind(V)) {
1046 I.first->second = false;
1047 } else if (isNoAliasCall(V)) {
1048 I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
1049 }
1050 }
1051 return I.first->second;
1052 }
1053
1054 bool isInvisibleToCallerOnUnwind(const Value *V) {
1055 bool RequiresNoCaptureBeforeUnwind;
1056 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1057 return false;
1058 if (!RequiresNoCaptureBeforeUnwind)
1059 return true;
1060
1061 auto I = CapturedBeforeReturn.insert({V, true});
1062 if (I.second)
1063 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1064 // with the killing MemoryDef. But we refrain from doing so for now to
1065 // limit compile-time and this does not cause any changes to the number
1066 // of stores removed on a large test set in practice.
1067 I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
1068 return !I.first->second;
1069 }
1070
1071 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1072 if (!I->mayWriteToMemory())
1073 return std::nullopt;
1074
1075 if (auto *CB = dyn_cast<CallBase>(I))
1076 return MemoryLocation::getForDest(CB, TLI);
1077
1079 }
1080
1081 /// Assuming this instruction has a dead analyzable write, can we delete
1082 /// this instruction?
1083 bool isRemovable(Instruction *I) {
1084 assert(getLocForWrite(I) && "Must have analyzable write");
1085
1086 // Don't remove volatile/atomic stores.
1087 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1088 return SI->isUnordered();
1089
1090 if (auto *CB = dyn_cast<CallBase>(I)) {
1091 // Don't remove volatile memory intrinsics.
1092 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1093 return !MI->isVolatile();
1094
1095 // Never remove dead lifetime intrinsics, e.g. because they are followed
1096 // by a free.
1097 if (CB->isLifetimeStartOrEnd())
1098 return false;
1099
1100 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1101 !CB->isTerminator();
1102 }
1103
1104 return false;
1105 }
1106
1107 /// Returns true if \p UseInst completely overwrites \p DefLoc
1108 /// (stored by \p DefInst).
1109 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1110 Instruction *UseInst) {
1111 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1112 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1113 // MemoryDef.
1114 if (!UseInst->mayWriteToMemory())
1115 return false;
1116
1117 if (auto *CB = dyn_cast<CallBase>(UseInst))
1118 if (CB->onlyAccessesInaccessibleMemory())
1119 return false;
1120
1121 int64_t InstWriteOffset, DepWriteOffset;
1122 if (auto CC = getLocForWrite(UseInst))
1123 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1124 DepWriteOffset) == OW_Complete;
1125 return false;
1126 }
1127
1128 /// Returns true if \p Def is not read before returning from the function.
1129 bool isWriteAtEndOfFunction(MemoryDef *Def) {
1130 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1131 << *Def->getMemoryInst()
1132 << ") is at the end the function \n");
1133
1134 auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1135 if (!MaybeLoc) {
1136 LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1137 return false;
1138 }
1139
1142 auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1143 if (!Visited.insert(Acc).second)
1144 return;
1145 for (Use &U : Acc->uses())
1146 WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1147 };
1148 PushMemUses(Def);
1149 for (unsigned I = 0; I < WorkList.size(); I++) {
1150 if (WorkList.size() >= MemorySSAScanLimit) {
1151 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1152 return false;
1153 }
1154
1155 MemoryAccess *UseAccess = WorkList[I];
1156 if (isa<MemoryPhi>(UseAccess)) {
1157 // AliasAnalysis does not account for loops. Limit elimination to
1158 // candidates for which we can guarantee they always store to the same
1159 // memory location.
1160 if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
1161 return false;
1162
1163 PushMemUses(cast<MemoryPhi>(UseAccess));
1164 continue;
1165 }
1166 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1167 // of times this is called and/or caching it.
1168 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1169 if (isReadClobber(*MaybeLoc, UseInst)) {
1170 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1171 return false;
1172 }
1173
1174 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1175 PushMemUses(UseDef);
1176 }
1177 return true;
1178 }
1179
1180 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1181 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1182 /// indicating whether \p I is a free-like call.
1183 std::optional<std::pair<MemoryLocation, bool>>
1184 getLocForTerminator(Instruction *I) const {
1185 uint64_t Len;
1186 Value *Ptr;
1187 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1188 m_Value(Ptr))))
1189 return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1190
1191 if (auto *CB = dyn_cast<CallBase>(I)) {
1192 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1193 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1194 }
1195
1196 return std::nullopt;
1197 }
1198
1199 /// Returns true if \p I is a memory terminator instruction like
1200 /// llvm.lifetime.end or free.
1201 bool isMemTerminatorInst(Instruction *I) const {
1202 auto *CB = dyn_cast<CallBase>(I);
1203 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1204 getFreedOperand(CB, &TLI) != nullptr);
1205 }
1206
1207 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1208 /// instruction \p AccessI.
1209 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1210 Instruction *MaybeTerm) {
1211 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1212 getLocForTerminator(MaybeTerm);
1213
1214 if (!MaybeTermLoc)
1215 return false;
1216
1217 // If the terminator is a free-like call, all accesses to the underlying
1218 // object can be considered terminated.
1219 if (getUnderlyingObject(Loc.Ptr) !=
1220 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1221 return false;
1222
1223 auto TermLoc = MaybeTermLoc->first;
1224 if (MaybeTermLoc->second) {
1225 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1226 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1227 }
1228 int64_t InstWriteOffset = 0;
1229 int64_t DepWriteOffset = 0;
1230 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1231 DepWriteOffset) == OW_Complete;
1232 }
1233
1234 // Returns true if \p Use may read from \p DefLoc.
1235 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1236 if (isNoopIntrinsic(UseInst))
1237 return false;
1238
1239 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1240 // treated as read clobber.
1241 if (auto SI = dyn_cast<StoreInst>(UseInst))
1242 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1243
1244 if (!UseInst->mayReadFromMemory())
1245 return false;
1246
1247 if (auto *CB = dyn_cast<CallBase>(UseInst))
1248 if (CB->onlyAccessesInaccessibleMemory())
1249 return false;
1250
1251 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1252 }
1253
1254 /// Returns true if a dependency between \p Current and \p KillingDef is
1255 /// guaranteed to be loop invariant for the loops that they are in. Either
1256 /// because they are known to be in the same block, in the same loop level or
1257 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1258 /// during execution of the containing function.
1259 bool isGuaranteedLoopIndependent(const Instruction *Current,
1260 const Instruction *KillingDef,
1261 const MemoryLocation &CurrentLoc) {
1262 // If the dependency is within the same block or loop level (being careful
1263 // of irreducible loops), we know that AA will return a valid result for the
1264 // memory dependency. (Both at the function level, outside of any loop,
1265 // would also be valid but we currently disable that to limit compile time).
1266 if (Current->getParent() == KillingDef->getParent())
1267 return true;
1268 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1269 if (!ContainsIrreducibleLoops && CurrentLI &&
1270 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1271 return true;
1272 // Otherwise check the memory location is invariant to any loops.
1273 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1274 }
1275
1276 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1277 /// loop. In particular, this guarantees that it only references a single
1278 /// MemoryLocation during execution of the containing function.
1279 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1280 Ptr = Ptr->stripPointerCasts();
1281 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1282 if (GEP->hasAllConstantIndices())
1283 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1284
1285 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1286 return I->getParent()->isEntryBlock() ||
1287 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1288 }
1289 return true;
1290 }
1291
1292 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1293 // with no read access between them or on any other path to a function exit
1294 // block if \p KillingLoc is not accessible after the function returns. If
1295 // there is no such MemoryDef, return std::nullopt. The returned value may not
1296 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1297 // encounter an aliasing MemoryUse (read).
1298 std::optional<MemoryAccess *>
1299 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1300 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1301 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1302 bool IsMemTerm, unsigned &PartialLimit) {
1303 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1304 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1305 return std::nullopt;
1306 }
1307
1308 MemoryAccess *Current = StartAccess;
1309 Instruction *KillingI = KillingDef->getMemoryInst();
1310 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1311
1312 // Only optimize defining access of KillingDef when directly starting at its
1313 // defining access. The defining access also must only access KillingLoc. At
1314 // the moment we only support instructions with a single write location, so
1315 // it should be sufficient to disable optimizations for instructions that
1316 // also read from memory.
1317 bool CanOptimize = OptimizeMemorySSA &&
1318 KillingDef->getDefiningAccess() == StartAccess &&
1319 !KillingI->mayReadFromMemory();
1320
1321 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1322 std::optional<MemoryLocation> CurrentLoc;
1323 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1324 LLVM_DEBUG({
1325 dbgs() << " visiting " << *Current;
1326 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1327 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1328 << ")";
1329 dbgs() << "\n";
1330 });
1331
1332 // Reached TOP.
1333 if (MSSA.isLiveOnEntryDef(Current)) {
1334 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1335 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1336 // The first clobbering def is... none.
1337 KillingDef->setOptimized(Current);
1338 return std::nullopt;
1339 }
1340
1341 // Cost of a step. Accesses in the same block are more likely to be valid
1342 // candidates for elimination, hence consider them cheaper.
1343 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1346 if (WalkerStepLimit <= StepCost) {
1347 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1348 return std::nullopt;
1349 }
1350 WalkerStepLimit -= StepCost;
1351
1352 // Return for MemoryPhis. They cannot be eliminated directly and the
1353 // caller is responsible for traversing them.
1354 if (isa<MemoryPhi>(Current)) {
1355 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1356 return Current;
1357 }
1358
1359 // Below, check if CurrentDef is a valid candidate to be eliminated by
1360 // KillingDef. If it is not, check the next candidate.
1361 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1362 Instruction *CurrentI = CurrentDef->getMemoryInst();
1363
1364 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1365 CanOptimize = false;
1366 continue;
1367 }
1368
1369 // Before we try to remove anything, check for any extra throwing
1370 // instructions that block us from DSEing
1371 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1372 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1373 return std::nullopt;
1374 }
1375
1376 // Check for anything that looks like it will be a barrier to further
1377 // removal
1378 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1379 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1380 return std::nullopt;
1381 }
1382
1383 // If Current is known to be on path that reads DefLoc or is a read
1384 // clobber, bail out, as the path is not profitable. We skip this check
1385 // for intrinsic calls, because the code knows how to handle memcpy
1386 // intrinsics.
1387 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1388 return std::nullopt;
1389
1390 // Quick check if there are direct uses that are read-clobbers.
1391 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1392 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1393 return !MSSA.dominates(StartAccess, UseOrDef) &&
1394 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1395 return false;
1396 })) {
1397 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1398 return std::nullopt;
1399 }
1400
1401 // If Current does not have an analyzable write location or is not
1402 // removable, skip it.
1403 CurrentLoc = getLocForWrite(CurrentI);
1404 if (!CurrentLoc || !isRemovable(CurrentI)) {
1405 CanOptimize = false;
1406 continue;
1407 }
1408
1409 // AliasAnalysis does not account for loops. Limit elimination to
1410 // candidates for which we can guarantee they always store to the same
1411 // memory location and not located in different loops.
1412 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1413 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1414 CanOptimize = false;
1415 continue;
1416 }
1417
1418 if (IsMemTerm) {
1419 // If the killing def is a memory terminator (e.g. lifetime.end), check
1420 // the next candidate if the current Current does not write the same
1421 // underlying object as the terminator.
1422 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1423 CanOptimize = false;
1424 continue;
1425 }
1426 } else {
1427 int64_t KillingOffset = 0;
1428 int64_t DeadOffset = 0;
1429 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1430 KillingOffset, DeadOffset);
1431 if (CanOptimize) {
1432 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1433 // optimized access. Do not optimize if CurrentDef is already the
1434 // defining access of KillingDef.
1435 if (CurrentDef != KillingDef->getDefiningAccess() &&
1436 (OR == OW_Complete || OR == OW_MaybePartial))
1437 KillingDef->setOptimized(CurrentDef);
1438
1439 // Once a may-aliasing def is encountered do not set an optimized
1440 // access.
1441 if (OR != OW_None)
1442 CanOptimize = false;
1443 }
1444
1445 // If Current does not write to the same object as KillingDef, check
1446 // the next candidate.
1447 if (OR == OW_Unknown || OR == OW_None)
1448 continue;
1449 else if (OR == OW_MaybePartial) {
1450 // If KillingDef only partially overwrites Current, check the next
1451 // candidate if the partial step limit is exceeded. This aggressively
1452 // limits the number of candidates for partial store elimination,
1453 // which are less likely to be removable in the end.
1454 if (PartialLimit <= 1) {
1455 WalkerStepLimit -= 1;
1456 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1457 continue;
1458 }
1459 PartialLimit -= 1;
1460 }
1461 }
1462 break;
1463 };
1464
1465 // Accesses to objects accessible after the function returns can only be
1466 // eliminated if the access is dead along all paths to the exit. Collect
1467 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1468 // they cover all paths from MaybeDeadAccess to any function exit.
1470 KillingDefs.insert(KillingDef->getMemoryInst());
1471 MemoryAccess *MaybeDeadAccess = Current;
1472 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1473 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1474 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1475 << *MaybeDeadI << ")\n");
1476
1478 auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1479 for (Use &U : Acc->uses())
1480 WorkList.insert(cast<MemoryAccess>(U.getUser()));
1481 };
1482 PushMemUses(MaybeDeadAccess);
1483
1484 // Check if DeadDef may be read.
1485 for (unsigned I = 0; I < WorkList.size(); I++) {
1486 MemoryAccess *UseAccess = WorkList[I];
1487
1488 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1489 // Bail out if the number of accesses to check exceeds the scan limit.
1490 if (ScanLimit < (WorkList.size() - I)) {
1491 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1492 return std::nullopt;
1493 }
1494 --ScanLimit;
1495 NumDomMemDefChecks++;
1496
1497 if (isa<MemoryPhi>(UseAccess)) {
1498 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1499 return DT.properlyDominates(KI->getParent(),
1500 UseAccess->getBlock());
1501 })) {
1502 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1503 continue;
1504 }
1505 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1506 PushMemUses(UseAccess);
1507 continue;
1508 }
1509
1510 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1511 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1512
1513 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1514 return DT.dominates(KI, UseInst);
1515 })) {
1516 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1517 continue;
1518 }
1519
1520 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1521 // MemoryAccesses. We do not have to check it's users.
1522 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1523 LLVM_DEBUG(
1524 dbgs()
1525 << " ... skipping, memterminator invalidates following accesses\n");
1526 continue;
1527 }
1528
1529 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1530 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1531 PushMemUses(UseAccess);
1532 continue;
1533 }
1534
1535 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1536 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1537 return std::nullopt;
1538 }
1539
1540 // Uses which may read the original MemoryDef mean we cannot eliminate the
1541 // original MD. Stop walk.
1542 if (isReadClobber(MaybeDeadLoc, UseInst)) {
1543 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1544 return std::nullopt;
1545 }
1546
1547 // If this worklist walks back to the original memory access (and the
1548 // pointer is not guarenteed loop invariant) then we cannot assume that a
1549 // store kills itself.
1550 if (MaybeDeadAccess == UseAccess &&
1551 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1552 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1553 return std::nullopt;
1554 }
1555 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1556 // if it reads the memory location.
1557 // TODO: It would probably be better to check for self-reads before
1558 // calling the function.
1559 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1560 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1561 continue;
1562 }
1563
1564 // Check all uses for MemoryDefs, except for defs completely overwriting
1565 // the original location. Otherwise we have to check uses of *all*
1566 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1567 // miss cases like the following
1568 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1569 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1570 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1571 // (The Use points to the *first* Def it may alias)
1572 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1573 // stores [0,1]
1574 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1575 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1576 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1577 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1578 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1579 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1581 << " ... found killing def " << *UseInst << "\n");
1582 KillingDefs.insert(UseInst);
1583 }
1584 } else {
1586 << " ... found preceeding def " << *UseInst << "\n");
1587 return std::nullopt;
1588 }
1589 } else
1590 PushMemUses(UseDef);
1591 }
1592 }
1593
1594 // For accesses to locations visible after the function returns, make sure
1595 // that the location is dead (=overwritten) along all paths from
1596 // MaybeDeadAccess to the exit.
1597 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1598 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1599 for (Instruction *KD : KillingDefs)
1600 KillingBlocks.insert(KD->getParent());
1601 assert(!KillingBlocks.empty() &&
1602 "Expected at least a single killing block");
1603
1604 // Find the common post-dominator of all killing blocks.
1605 BasicBlock *CommonPred = *KillingBlocks.begin();
1606 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1607 if (!CommonPred)
1608 break;
1609 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1610 }
1611
1612 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1613 // there is a path from MaybeDeadAccess to an exit not going through a
1614 // killing block.
1615 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1616 if (!AnyUnreachableExit)
1617 return std::nullopt;
1618
1619 // Fall back to CFG scan starting at all non-unreachable roots if not
1620 // all paths to the exit go through CommonPred.
1621 CommonPred = nullptr;
1622 }
1623
1624 // If CommonPred itself is in the set of killing blocks, we're done.
1625 if (KillingBlocks.count(CommonPred))
1626 return {MaybeDeadAccess};
1627
1628 SetVector<BasicBlock *> WorkList;
1629 // If CommonPred is null, there are multiple exits from the function.
1630 // They all have to be added to the worklist.
1631 if (CommonPred)
1632 WorkList.insert(CommonPred);
1633 else
1634 for (BasicBlock *R : PDT.roots()) {
1635 if (!isa<UnreachableInst>(R->getTerminator()))
1636 WorkList.insert(R);
1637 }
1638
1639 NumCFGTries++;
1640 // Check if all paths starting from an exit node go through one of the
1641 // killing blocks before reaching MaybeDeadAccess.
1642 for (unsigned I = 0; I < WorkList.size(); I++) {
1643 NumCFGChecks++;
1644 BasicBlock *Current = WorkList[I];
1645 if (KillingBlocks.count(Current))
1646 continue;
1647 if (Current == MaybeDeadAccess->getBlock())
1648 return std::nullopt;
1649
1650 // MaybeDeadAccess is reachable from the entry, so we don't have to
1651 // explore unreachable blocks further.
1652 if (!DT.isReachableFromEntry(Current))
1653 continue;
1654
1655 for (BasicBlock *Pred : predecessors(Current))
1656 WorkList.insert(Pred);
1657
1658 if (WorkList.size() >= MemorySSAPathCheckLimit)
1659 return std::nullopt;
1660 }
1661 NumCFGSuccess++;
1662 }
1663
1664 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1665 // potentially dead.
1666 return {MaybeDeadAccess};
1667 }
1668
1669 // Delete dead memory defs
1671 MemorySSAUpdater Updater(&MSSA);
1672 SmallVector<Instruction *, 32> NowDeadInsts;
1673 NowDeadInsts.push_back(SI);
1674 --NumFastOther;
1675
1676 while (!NowDeadInsts.empty()) {
1677 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1678 ++NumFastOther;
1679
1680 // Try to preserve debug information attached to the dead instruction.
1681 salvageDebugInfo(*DeadInst);
1682 salvageKnowledge(DeadInst);
1683
1684 // Remove the Instruction from MSSA.
1685 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1686 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1687 SkipStores.insert(MD);
1688 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1689 if (SI->getValueOperand()->getType()->isPointerTy()) {
1690 const Value *UO = getUnderlyingObject(SI->getValueOperand());
1691 if (CapturedBeforeReturn.erase(UO))
1692 ShouldIterateEndOfFunctionDSE = true;
1693 InvisibleToCallerAfterRet.erase(UO);
1694 }
1695 }
1696 }
1697
1698 Updater.removeMemoryAccess(MA);
1699 }
1700
1701 auto I = IOLs.find(DeadInst->getParent());
1702 if (I != IOLs.end())
1703 I->second.erase(DeadInst);
1704 // Remove its operands
1705 for (Use &O : DeadInst->operands())
1706 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1707 O = nullptr;
1708 if (isInstructionTriviallyDead(OpI, &TLI))
1709 NowDeadInsts.push_back(OpI);
1710 }
1711
1712 EI.removeInstruction(DeadInst);
1713 DeadInst->eraseFromParent();
1714 }
1715 }
1716
1717 // Check for any extra throws between \p KillingI and \p DeadI that block
1718 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1719 // MemoryDef that may throw are handled during the walk from one def to the
1720 // next.
1721 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1722 const Value *KillingUndObj) {
1723 // First see if we can ignore it by using the fact that KillingI is an
1724 // alloca/alloca like object that is not visible to the caller during
1725 // execution of the function.
1726 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1727 return false;
1728
1729 if (KillingI->getParent() == DeadI->getParent())
1730 return ThrowingBlocks.count(KillingI->getParent());
1731 return !ThrowingBlocks.empty();
1732 }
1733
1734 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1735 // instructions act as barriers:
1736 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1737 // object.
1738 // * Atomic stores stronger that monotonic.
1739 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1740 // If DeadI may throw it acts as a barrier, unless we are to an
1741 // alloca/alloca like object that does not escape.
1742 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1743 return true;
1744
1745 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1746 // eliminate/reorder it.
1747 if (DeadI->isAtomic()) {
1748 if (auto *LI = dyn_cast<LoadInst>(DeadI))
1749 return isStrongerThanMonotonic(LI->getOrdering());
1750 if (auto *SI = dyn_cast<StoreInst>(DeadI))
1751 return isStrongerThanMonotonic(SI->getOrdering());
1752 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1753 return isStrongerThanMonotonic(ARMW->getOrdering());
1754 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1755 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1756 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1757 llvm_unreachable("other instructions should be skipped in MemorySSA");
1758 }
1759 return false;
1760 }
1761
1762 /// Eliminate writes to objects that are not visible in the caller and are not
1763 /// accessed before returning from the function.
1764 bool eliminateDeadWritesAtEndOfFunction() {
1765 bool MadeChange = false;
1766 LLVM_DEBUG(
1767 dbgs()
1768 << "Trying to eliminate MemoryDefs at the end of the function\n");
1769 do {
1770 ShouldIterateEndOfFunctionDSE = false;
1771 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1772 if (SkipStores.contains(Def))
1773 continue;
1774
1775 Instruction *DefI = Def->getMemoryInst();
1776 auto DefLoc = getLocForWrite(DefI);
1777 if (!DefLoc || !isRemovable(DefI))
1778 continue;
1779
1780 // NOTE: Currently eliminating writes at the end of a function is
1781 // limited to MemoryDefs with a single underlying object, to save
1782 // compile-time. In practice it appears the case with multiple
1783 // underlying objects is very uncommon. If it turns out to be important,
1784 // we can use getUnderlyingObjects here instead.
1785 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1786 if (!isInvisibleToCallerAfterRet(UO))
1787 continue;
1788
1789 if (isWriteAtEndOfFunction(Def)) {
1790 // See through pointer-to-pointer bitcasts
1791 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1792 "of the function\n");
1794 ++NumFastStores;
1795 MadeChange = true;
1796 }
1797 }
1798 } while (ShouldIterateEndOfFunctionDSE);
1799 return MadeChange;
1800 }
1801
1802 /// If we have a zero initializing memset following a call to malloc,
1803 /// try folding it into a call to calloc.
1804 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1805 Instruction *DefI = Def->getMemoryInst();
1806 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1807 if (!MemSet)
1808 // TODO: Could handle zero store to small allocation as well.
1809 return false;
1810 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1811 if (!StoredConstant || !StoredConstant->isNullValue())
1812 return false;
1813
1814 if (!isRemovable(DefI))
1815 // The memset might be volatile..
1816 return false;
1817
1818 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1819 F.hasFnAttribute(Attribute::SanitizeAddress) ||
1820 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1821 F.getName() == "calloc")
1822 return false;
1823 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1824 if (!Malloc)
1825 return false;
1826 auto *InnerCallee = Malloc->getCalledFunction();
1827 if (!InnerCallee)
1828 return false;
1829 LibFunc Func;
1830 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1831 Func != LibFunc_malloc)
1832 return false;
1833
1834 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1835 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1836 // of malloc block
1837 auto *MallocBB = Malloc->getParent(),
1838 *MemsetBB = Memset->getParent();
1839 if (MallocBB == MemsetBB)
1840 return true;
1841 auto *Ptr = Memset->getArgOperand(0);
1842 auto *TI = MallocBB->getTerminator();
1844 BasicBlock *TrueBB, *FalseBB;
1845 if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1846 FalseBB)))
1847 return false;
1848 if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1849 return false;
1850 return true;
1851 };
1852
1853 if (Malloc->getOperand(0) != MemSet->getLength())
1854 return false;
1855 if (!shouldCreateCalloc(Malloc, MemSet) ||
1856 !DT.dominates(Malloc, MemSet) ||
1857 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1858 return false;
1859 IRBuilder<> IRB(Malloc);
1860 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
1861 auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
1862 Malloc->getArgOperand(0), IRB, TLI);
1863 if (!Calloc)
1864 return false;
1865 MemorySSAUpdater Updater(&MSSA);
1866 auto *LastDef =
1867 cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
1868 auto *NewAccess =
1869 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
1870 LastDef);
1871 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1872 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1873 Updater.removeMemoryAccess(Malloc);
1874 Malloc->replaceAllUsesWith(Calloc);
1875 Malloc->eraseFromParent();
1876 return true;
1877 }
1878
1879 /// \returns true if \p Def is a no-op store, either because it
1880 /// directly stores back a loaded value or stores zero to a calloced object.
1881 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1882 Instruction *DefI = Def->getMemoryInst();
1883 StoreInst *Store = dyn_cast<StoreInst>(DefI);
1884 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1885 Constant *StoredConstant = nullptr;
1886 if (Store)
1887 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1888 else if (MemSet)
1889 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1890 else
1891 return false;
1892
1893 if (!isRemovable(DefI))
1894 return false;
1895
1896 if (StoredConstant) {
1897 Constant *InitC =
1898 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1899 // If the clobbering access is LiveOnEntry, no instructions between them
1900 // can modify the memory location.
1901 if (InitC && InitC == StoredConstant)
1902 return MSSA.isLiveOnEntryDef(
1903 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
1904 }
1905
1906 if (!Store)
1907 return false;
1908
1909 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1910 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1911 // Get the defining access for the load.
1912 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1913 // Fast path: the defining accesses are the same.
1914 if (LoadAccess == Def->getDefiningAccess())
1915 return true;
1916
1917 // Look through phi accesses. Recursively scan all phi accesses by
1918 // adding them to a worklist. Bail when we run into a memory def that
1919 // does not match LoadAccess.
1921 MemoryAccess *Current =
1922 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
1923 // We don't want to bail when we run into the store memory def. But,
1924 // the phi access may point to it. So, pretend like we've already
1925 // checked it.
1926 ToCheck.insert(Def);
1927 ToCheck.insert(Current);
1928 // Start at current (1) to simulate already having checked Def.
1929 for (unsigned I = 1; I < ToCheck.size(); ++I) {
1930 Current = ToCheck[I];
1931 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1932 // Check all the operands.
1933 for (auto &Use : PhiAccess->incoming_values())
1934 ToCheck.insert(cast<MemoryAccess>(&Use));
1935 continue;
1936 }
1937
1938 // If we found a memory def, bail. This happens when we have an
1939 // unrelated write in between an otherwise noop store.
1940 assert(isa<MemoryDef>(Current) &&
1941 "Only MemoryDefs should reach here.");
1942 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1943 // We are searching for the definition of the store's destination.
1944 // So, if that is the same definition as the load, then this is a
1945 // noop. Otherwise, fail.
1946 if (LoadAccess != Current)
1947 return false;
1948 }
1949 return true;
1950 }
1951 }
1952
1953 return false;
1954 }
1955
1956 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
1957 bool Changed = false;
1958 for (auto OI : IOL) {
1959 Instruction *DeadI = OI.first;
1960 MemoryLocation Loc = *getLocForWrite(DeadI);
1961 assert(isRemovable(DeadI) && "Expect only removable instruction");
1962
1963 const Value *Ptr = Loc.Ptr->stripPointerCasts();
1964 int64_t DeadStart = 0;
1965 uint64_t DeadSize = Loc.Size.getValue();
1966 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
1967 OverlapIntervalsTy &IntervalMap = OI.second;
1968 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
1969 if (IntervalMap.empty())
1970 continue;
1971 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
1972 }
1973 return Changed;
1974 }
1975
1976 /// Eliminates writes to locations where the value that is being written
1977 /// is already stored at the same location.
1978 bool eliminateRedundantStoresOfExistingValues() {
1979 bool MadeChange = false;
1980 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
1981 "already existing value\n");
1982 for (auto *Def : MemDefs) {
1983 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
1984 continue;
1985
1986 Instruction *DefInst = Def->getMemoryInst();
1987 auto MaybeDefLoc = getLocForWrite(DefInst);
1988 if (!MaybeDefLoc || !isRemovable(DefInst))
1989 continue;
1990
1991 MemoryDef *UpperDef;
1992 // To conserve compile-time, we avoid walking to the next clobbering def.
1993 // Instead, we just try to get the optimized access, if it exists. DSE
1994 // will try to optimize defs during the earlier traversal.
1995 if (Def->isOptimized())
1996 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
1997 else
1998 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
1999 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2000 continue;
2001
2002 Instruction *UpperInst = UpperDef->getMemoryInst();
2003 auto IsRedundantStore = [&]() {
2004 if (DefInst->isIdenticalTo(UpperInst))
2005 return true;
2006 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2007 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2008 // MemSetInst must have a write location.
2009 MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
2010 int64_t InstWriteOffset = 0;
2011 int64_t DepWriteOffset = 0;
2012 auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
2013 InstWriteOffset, DepWriteOffset);
2014 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2015 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2016 OR == OW_Complete;
2017 }
2018 }
2019 return false;
2020 };
2021
2022 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2023 continue;
2024 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2025 << '\n');
2026 deleteDeadInstruction(DefInst);
2027 NumRedundantStores++;
2028 MadeChange = true;
2029 }
2030 return MadeChange;
2031 }
2032};
2033
2034static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2036 AssumptionCache &AC,
2037 const TargetLibraryInfo &TLI,
2038 const LoopInfo &LI) {
2039 bool MadeChange = false;
2040
2041 MSSA.ensureOptimizedUses();
2042 DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2043 // For each store:
2044 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2045 MemoryDef *KillingDef = State.MemDefs[I];
2046 if (State.SkipStores.count(KillingDef))
2047 continue;
2048 Instruction *KillingI = KillingDef->getMemoryInst();
2049
2050 std::optional<MemoryLocation> MaybeKillingLoc;
2051 if (State.isMemTerminatorInst(KillingI)) {
2052 if (auto KillingLoc = State.getLocForTerminator(KillingI))
2053 MaybeKillingLoc = KillingLoc->first;
2054 } else {
2055 MaybeKillingLoc = State.getLocForWrite(KillingI);
2056 }
2057
2058 if (!MaybeKillingLoc) {
2059 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2060 << *KillingI << "\n");
2061 continue;
2062 }
2063 MemoryLocation KillingLoc = *MaybeKillingLoc;
2064 assert(KillingLoc.Ptr && "KillingLoc should not be null");
2065 const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
2066 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2067 << *KillingDef << " (" << *KillingI << ")\n");
2068
2069 unsigned ScanLimit = MemorySSAScanLimit;
2070 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2071 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2072 // Worklist of MemoryAccesses that may be killed by KillingDef.
2074 ToCheck.insert(KillingDef->getDefiningAccess());
2075
2076 bool Shortend = false;
2077 bool IsMemTerm = State.isMemTerminatorInst(KillingI);
2078 // Check if MemoryAccesses in the worklist are killed by KillingDef.
2079 for (unsigned I = 0; I < ToCheck.size(); I++) {
2080 MemoryAccess *Current = ToCheck[I];
2081 if (State.SkipStores.count(Current))
2082 continue;
2083
2084 std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2085 KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
2086 WalkerStepLimit, IsMemTerm, PartialLimit);
2087
2088 if (!MaybeDeadAccess) {
2089 LLVM_DEBUG(dbgs() << " finished walk\n");
2090 continue;
2091 }
2092
2093 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2094 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2095 if (isa<MemoryPhi>(DeadAccess)) {
2096 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2097 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2098 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2099 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2100 BasicBlock *PhiBlock = DeadAccess->getBlock();
2101
2102 // We only consider incoming MemoryAccesses that come before the
2103 // MemoryPhi. Otherwise we could discover candidates that do not
2104 // strictly dominate our starting def.
2105 if (State.PostOrderNumbers[IncomingBlock] >
2106 State.PostOrderNumbers[PhiBlock])
2107 ToCheck.insert(IncomingAccess);
2108 }
2109 continue;
2110 }
2111 auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2112 Instruction *DeadI = DeadDefAccess->getMemoryInst();
2113 LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2114 ToCheck.insert(DeadDefAccess->getDefiningAccess());
2115 NumGetDomMemoryDefPassed++;
2116
2117 if (!DebugCounter::shouldExecute(MemorySSACounter))
2118 continue;
2119
2120 MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2121
2122 if (IsMemTerm) {
2123 const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2124 if (KillingUndObj != DeadUndObj)
2125 continue;
2126 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2127 << "\n KILLER: " << *KillingI << '\n');
2128 State.deleteDeadInstruction(DeadI);
2129 ++NumFastStores;
2130 MadeChange = true;
2131 } else {
2132 // Check if DeadI overwrites KillingI.
2133 int64_t KillingOffset = 0;
2134 int64_t DeadOffset = 0;
2135 OverwriteResult OR = State.isOverwrite(
2136 KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2137 if (OR == OW_MaybePartial) {
2138 auto Iter = State.IOLs.insert(
2139 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2140 DeadI->getParent(), InstOverlapIntervalsTy()));
2141 auto &IOL = Iter.first->second;
2142 OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2143 DeadOffset, DeadI, IOL);
2144 }
2145
2146 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2147 auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2148 auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2149 // We are re-using tryToMergePartialOverlappingStores, which requires
2150 // DeadSI to dominate DeadSI.
2151 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2152 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2154 KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2155 State.BatchAA, &DT)) {
2156
2157 // Update stored value of earlier store to merged constant.
2158 DeadSI->setOperand(0, Merged);
2159 ++NumModifiedStores;
2160 MadeChange = true;
2161
2162 Shortend = true;
2163 // Remove killing store and remove any outstanding overlap
2164 // intervals for the updated store.
2165 State.deleteDeadInstruction(KillingSI);
2166 auto I = State.IOLs.find(DeadSI->getParent());
2167 if (I != State.IOLs.end())
2168 I->second.erase(DeadSI);
2169 break;
2170 }
2171 }
2172 }
2173
2174 if (OR == OW_Complete) {
2175 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2176 << "\n KILLER: " << *KillingI << '\n');
2177 State.deleteDeadInstruction(DeadI);
2178 ++NumFastStores;
2179 MadeChange = true;
2180 }
2181 }
2182 }
2183
2184 // Check if the store is a no-op.
2185 if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2186 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2187 << '\n');
2188 State.deleteDeadInstruction(KillingI);
2189 NumRedundantStores++;
2190 MadeChange = true;
2191 continue;
2192 }
2193
2194 // Can we form a calloc from a memset/malloc pair?
2195 if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2196 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2197 << " DEAD: " << *KillingI << '\n');
2198 State.deleteDeadInstruction(KillingI);
2199 MadeChange = true;
2200 continue;
2201 }
2202 }
2203
2205 for (auto &KV : State.IOLs)
2206 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2207
2208 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2209 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2210 return MadeChange;
2211}
2212} // end anonymous namespace
2213
2214//===----------------------------------------------------------------------===//
2215// DSE Pass
2216//===----------------------------------------------------------------------===//
2221 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2224 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2225
2226 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2227
2228#ifdef LLVM_ENABLE_STATS
2230 for (auto &I : instructions(F))
2231 NumRemainingStores += isa<StoreInst>(&I);
2232#endif
2233
2234 if (!Changed)
2235 return PreservedAnalyses::all();
2236
2240 PA.preserve<LoopAnalysis>();
2241 return PA;
2242}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static void shortenAssignment(Instruction *Inst, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
print must be executed print the must be executed context for all instructions
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
Class for arbitrary precision integers.
Definition: APInt.h:75
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:973
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:241
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
The possible results of an alias query.
Definition: AliasAnalysis.h:83
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
This class represents a function call, abstracting a target machine's calling convention.
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
static bool fragmentsOverlap(const FragmentInfo &A, const FragmentInfo &B)
Check if fragments overlap between a pair of FragmentInfos.
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:315
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
void removeInstruction(Instruction *I)
const BasicBlock & getEntryBlock() const
Definition: Function.h:740
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:993
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool mayThrow() const LLVM_READONLY
Return true if this instruction may throw an exception.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
const_iterator begin() const
Definition: IntervalMap.h:1146
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1101
const_iterator end() const
Definition: IntervalMap.h:1158
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
static LocationSize precise(uint64_t Value)
uint64_t getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
iterator end()
Definition: MapVector.h:72
iterator find(const KeyT &Key)
Definition: MapVector.h:147
Value * getLength() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1570
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1557
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
void ensureOptimizedUses()
By default, uses are not optimized during MemorySSA construction.
Definition: MemorySSA.cpp:2142
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:62
Value * getAddr() const
Definition: PHITransAddr.h:58
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
A vector that has set insertion semantics.
Definition: SetVector.h:40
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
iterator begin() const
Definition: SmallPtrSet.h:403
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Value * getValueOperand()
Definition: Instructions.h:390
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getInt8Ty(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
iterator_range< use_iterator > uses()
Definition: Value.h:376
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1741
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1367
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< po_iterator< T > > post_order(const T &G)
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2140
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Holds the characteristics of one fragment of a larger variable.
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.