LLVM 20.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/BasicBlock.h"
54#include "llvm/IR/Constant.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/DataLayout.h"
58#include "llvm/IR/DebugInfo.h"
59#include "llvm/IR/Dominators.h"
60#include "llvm/IR/Function.h"
61#include "llvm/IR/IRBuilder.h"
63#include "llvm/IR/InstrTypes.h"
64#include "llvm/IR/Instruction.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/PassManager.h"
70#include "llvm/IR/Value.h"
73#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <cstdint>
83#include <map>
84#include <optional>
85#include <utility>
86
87using namespace llvm;
88using namespace PatternMatch;
89
90#define DEBUG_TYPE "dse"
91
92STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94STATISTIC(NumFastStores, "Number of stores deleted");
95STATISTIC(NumFastOther, "Number of other instrs removed");
96STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97STATISTIC(NumModifiedStores, "Number of stores modified");
98STATISTIC(NumCFGChecks, "Number of stores modified");
99STATISTIC(NumCFGTries, "Number of stores modified");
100STATISTIC(NumCFGSuccess, "Number of stores modified");
101STATISTIC(NumGetDomMemoryDefPassed,
102 "Number of times a valid candidate is returned from getDomMemoryDef");
103STATISTIC(NumDomMemDefChecks,
104 "Number iterations check for reads in getDomMemoryDef");
105
106DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107 "Controls which MemoryDefs are eliminated.");
108
109static cl::opt<bool>
110EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111 cl::init(true), cl::Hidden,
112 cl::desc("Enable partial-overwrite tracking in DSE"));
113
114static cl::opt<bool>
115EnablePartialStoreMerging("enable-dse-partial-store-merging",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial store merging in DSE"));
118
120 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
121 cl::desc("The number of memory instructions to scan for "
122 "dead store elimination (default = 150)"));
124 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
125 cl::desc("The maximum number of steps while walking upwards to find "
126 "MemoryDefs that may be killed (default = 90)"));
127
129 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
130 cl::desc("The maximum number candidates that only partially overwrite the "
131 "killing MemoryDef to consider"
132 " (default = 5)"));
133
135 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
136 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
137 "other stores per basic block (default = 5000)"));
138
140 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
141 cl::desc(
142 "The cost of a step in the same basic block as the killing MemoryDef"
143 "(default = 1)"));
144
146 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
148 cl::desc("The cost of a step in a different basic "
149 "block than the killing MemoryDef"
150 "(default = 5)"));
151
153 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
154 cl::desc("The maximum number of blocks to check when trying to prove that "
155 "all paths to an exit go through a killing block (default = 50)"));
156
157// This flags allows or disallows DSE to optimize MemorySSA during its
158// traversal. Note that DSE optimizing MemorySSA may impact other passes
159// downstream of the DSE invocation and can lead to issues not being
160// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162// impact follow-up passes.
163static cl::opt<bool>
164 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
165 cl::desc("Allow DSE to optimize memory accesses."));
166
167// TODO: turn on and remove this flag.
169 "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
170 cl::desc("Enable the initializes attr improvement in DSE"));
171
172//===----------------------------------------------------------------------===//
173// Helper functions
174//===----------------------------------------------------------------------===//
175using OverlapIntervalsTy = std::map<int64_t, int64_t>;
177
178/// Returns true if the end of this instruction can be safely shortened in
179/// length.
181 // Don't shorten stores for now
182 if (isa<StoreInst>(I))
183 return false;
184
185 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
186 switch (II->getIntrinsicID()) {
187 default: return false;
188 case Intrinsic::memset:
189 case Intrinsic::memcpy:
190 case Intrinsic::memcpy_element_unordered_atomic:
191 case Intrinsic::memset_element_unordered_atomic:
192 // Do shorten memory intrinsics.
193 // FIXME: Add memmove if it's also safe to transform.
194 return true;
195 }
196 }
197
198 // Don't shorten libcalls calls for now.
199
200 return false;
201}
202
203/// Returns true if the beginning of this instruction can be safely shortened
204/// in length.
206 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
207 // easily done by offsetting the source address.
208 return isa<AnyMemSetInst>(I);
209}
210
211static std::optional<TypeSize> getPointerSize(const Value *V,
212 const DataLayout &DL,
213 const TargetLibraryInfo &TLI,
214 const Function *F) {
216 ObjectSizeOpts Opts;
218
219 if (getObjectSize(V, Size, DL, &TLI, Opts))
220 return TypeSize::getFixed(Size);
221 return std::nullopt;
222}
223
224namespace {
225
226enum OverwriteResult {
227 OW_Begin,
228 OW_Complete,
229 OW_End,
230 OW_PartialEarlierWithFullLater,
231 OW_MaybePartial,
232 OW_None,
233 OW_Unknown
234};
235
236} // end anonymous namespace
237
238/// Check if two instruction are masked stores that completely
239/// overwrite one another. More specifically, \p KillingI has to
240/// overwrite \p DeadI.
241static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
242 const Instruction *DeadI,
243 BatchAAResults &AA) {
244 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
245 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
246 if (KillingII == nullptr || DeadII == nullptr)
247 return OW_Unknown;
248 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
249 return OW_Unknown;
250 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
251 // Type size.
252 VectorType *KillingTy =
253 cast<VectorType>(KillingII->getArgOperand(0)->getType());
254 VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
255 if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
256 return OW_Unknown;
257 // Element count.
258 if (KillingTy->getElementCount() != DeadTy->getElementCount())
259 return OW_Unknown;
260 // Pointers.
261 Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
262 Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
263 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
264 return OW_Unknown;
265 // Masks.
266 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
267 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
268 return OW_Unknown;
269 return OW_Complete;
270 }
271 return OW_Unknown;
272}
273
274/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
275/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
276/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
277/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
278/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
279/// overwritten by a killing (smaller) store which doesn't write outside the big
280/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
281/// NOTE: This function must only be called if both \p KillingLoc and \p
282/// DeadLoc belong to the same underlying object with valid \p KillingOff and
283/// \p DeadOff.
284static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
285 const MemoryLocation &DeadLoc,
286 int64_t KillingOff, int64_t DeadOff,
287 Instruction *DeadI,
289 const uint64_t KillingSize = KillingLoc.Size.getValue();
290 const uint64_t DeadSize = DeadLoc.Size.getValue();
291 // We may now overlap, although the overlap is not complete. There might also
292 // be other incomplete overlaps, and together, they might cover the complete
293 // dead store.
294 // Note: The correctness of this logic depends on the fact that this function
295 // is not even called providing DepWrite when there are any intervening reads.
297 KillingOff < int64_t(DeadOff + DeadSize) &&
298 int64_t(KillingOff + KillingSize) >= DeadOff) {
299
300 // Insert our part of the overlap into the map.
301 auto &IM = IOL[DeadI];
302 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
303 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
304 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
305 << ")\n");
306
307 // Make sure that we only insert non-overlapping intervals and combine
308 // adjacent intervals. The intervals are stored in the map with the ending
309 // offset as the key (in the half-open sense) and the starting offset as
310 // the value.
311 int64_t KillingIntStart = KillingOff;
312 int64_t KillingIntEnd = KillingOff + KillingSize;
313
314 // Find any intervals ending at, or after, KillingIntStart which start
315 // before KillingIntEnd.
316 auto ILI = IM.lower_bound(KillingIntStart);
317 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
318 // This existing interval is overlapped with the current store somewhere
319 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
320 // intervals and adjusting our start and end.
321 KillingIntStart = std::min(KillingIntStart, ILI->second);
322 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
323 ILI = IM.erase(ILI);
324
325 // Continue erasing and adjusting our end in case other previous
326 // intervals are also overlapped with the current store.
327 //
328 // |--- dead 1 ---| |--- dead 2 ---|
329 // |------- killing---------|
330 //
331 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
332 assert(ILI->second > KillingIntStart && "Unexpected interval");
333 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
334 ILI = IM.erase(ILI);
335 }
336 }
337
338 IM[KillingIntEnd] = KillingIntStart;
339
340 ILI = IM.begin();
341 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
342 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
343 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
344 << ") Composite KillingLoc [" << ILI->second << ", "
345 << ILI->first << ")\n");
346 ++NumCompletePartials;
347 return OW_Complete;
348 }
349 }
350
351 // Check for a dead store which writes to all the memory locations that
352 // the killing store writes to.
353 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
354 int64_t(DeadOff + DeadSize) > KillingOff &&
355 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
356 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
357 << ", " << int64_t(DeadOff + DeadSize)
358 << ") by a killing store [" << KillingOff << ", "
359 << int64_t(KillingOff + KillingSize) << ")\n");
360 // TODO: Maybe come up with a better name?
361 return OW_PartialEarlierWithFullLater;
362 }
363
364 // Another interesting case is if the killing store overwrites the end of the
365 // dead store.
366 //
367 // |--dead--|
368 // |-- killing --|
369 //
370 // In this case we may want to trim the size of dead store to avoid
371 // generating stores to addresses which will definitely be overwritten killing
372 // store.
374 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
375 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
376 return OW_End;
377
378 // Finally, we also need to check if the killing store overwrites the
379 // beginning of the dead store.
380 //
381 // |--dead--|
382 // |-- killing --|
383 //
384 // In this case we may want to move the destination address and trim the size
385 // of dead store to avoid generating stores to addresses which will definitely
386 // be overwritten killing store.
388 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
389 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
390 "Expect to be handled as OW_Complete");
391 return OW_Begin;
392 }
393 // Otherwise, they don't completely overlap.
394 return OW_Unknown;
395}
396
397/// Returns true if the memory which is accessed by the second instruction is not
398/// modified between the first and the second instruction.
399/// Precondition: Second instruction must be dominated by the first
400/// instruction.
401static bool
403 BatchAAResults &AA, const DataLayout &DL,
404 DominatorTree *DT) {
405 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
406 // instructions which can modify the memory location accessed by SecondI.
407 //
408 // While doing the walk keep track of the address to check. It might be
409 // different in different basic blocks due to PHI translation.
410 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
412 // Keep track of the address we visited each block with. Bail out if we
413 // visit a block with different addresses.
415
416 BasicBlock::iterator FirstBBI(FirstI);
417 ++FirstBBI;
418 BasicBlock::iterator SecondBBI(SecondI);
419 BasicBlock *FirstBB = FirstI->getParent();
420 BasicBlock *SecondBB = SecondI->getParent();
421 MemoryLocation MemLoc;
422 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
423 MemLoc = MemoryLocation::getForDest(MemSet);
424 else
425 MemLoc = MemoryLocation::get(SecondI);
426
427 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
428
429 // Start checking the SecondBB.
430 WorkList.push_back(
431 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
432 bool isFirstBlock = true;
433
434 // Check all blocks going backward until we reach the FirstBB.
435 while (!WorkList.empty()) {
436 BlockAddressPair Current = WorkList.pop_back_val();
437 BasicBlock *B = Current.first;
438 PHITransAddr &Addr = Current.second;
439 Value *Ptr = Addr.getAddr();
440
441 // Ignore instructions before FirstI if this is the FirstBB.
442 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
443
445 if (isFirstBlock) {
446 // Ignore instructions after SecondI if this is the first visit of SecondBB.
447 assert(B == SecondBB && "first block is not the store block");
448 EI = SecondBBI;
449 isFirstBlock = false;
450 } else {
451 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
452 // In this case we also have to look at instructions after SecondI.
453 EI = B->end();
454 }
455 for (; BI != EI; ++BI) {
456 Instruction *I = &*BI;
457 if (I->mayWriteToMemory() && I != SecondI)
458 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
459 return false;
460 }
461 if (B != FirstBB) {
462 assert(B != &FirstBB->getParent()->getEntryBlock() &&
463 "Should not hit the entry block because SI must be dominated by LI");
464 for (BasicBlock *Pred : predecessors(B)) {
465 PHITransAddr PredAddr = Addr;
466 if (PredAddr.needsPHITranslationFromBlock(B)) {
467 if (!PredAddr.isPotentiallyPHITranslatable())
468 return false;
469 if (!PredAddr.translateValue(B, Pred, DT, false))
470 return false;
471 }
472 Value *TranslatedPtr = PredAddr.getAddr();
473 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
474 if (!Inserted.second) {
475 // We already visited this block before. If it was with a different
476 // address - bail out!
477 if (TranslatedPtr != Inserted.first->second)
478 return false;
479 // ... otherwise just skip it.
480 continue;
481 }
482 WorkList.push_back(std::make_pair(Pred, PredAddr));
483 }
484 }
485 }
486 return true;
487}
488
489static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
490 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
491 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
492 const DataLayout &DL = Inst->getDataLayout();
493 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
494 uint64_t DeadSliceOffsetInBits =
495 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
496 auto SetDeadFragExpr = [](auto *Assign,
497 DIExpression::FragmentInfo DeadFragment) {
498 // createFragmentExpression expects an offset relative to the existing
499 // fragment offset if there is one.
500 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
501 Assign->getExpression()
502 ->getFragmentInfo()
503 .value_or(DIExpression::FragmentInfo(0, 0))
504 .OffsetInBits;
506 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
507 Assign->setExpression(*NewExpr);
508 return;
509 }
510 // Failed to create a fragment expression for this so discard the value,
511 // making this a kill location.
513 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
514 DeadFragment.SizeInBits);
515 Assign->setExpression(Expr);
516 Assign->setKillLocation();
517 };
518
519 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
520 // link to any instructions. Created in the loop below (once).
521 DIAssignID *LinkToNothing = nullptr;
522 LLVMContext &Ctx = Inst->getContext();
523 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
524 if (!LinkToNothing)
525 LinkToNothing = DIAssignID::getDistinct(Ctx);
526 return LinkToNothing;
527 };
528
529 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
530 // overlapping dbg.assign intrinsic. The loop invalidates the iterators
531 // returned by getAssignmentMarkers so save a copy of the markers to iterate
532 // over.
533 auto LinkedRange = at::getAssignmentMarkers(Inst);
534 SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
536 SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
537 LinkedRange.end());
538 auto InsertAssignForOverlap = [&](auto *Assign) {
539 std::optional<DIExpression::FragmentInfo> NewFragment;
540 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
541 DeadSliceSizeInBits, Assign,
542 NewFragment) ||
543 !NewFragment) {
544 // We couldn't calculate the intersecting fragment for some reason. Be
545 // cautious and unlink the whole assignment from the store.
546 Assign->setKillAddress();
547 Assign->setAssignId(GetDeadLink());
548 return;
549 }
550 // No intersect.
551 if (NewFragment->SizeInBits == 0)
552 return;
553
554 // Fragments overlap: insert a new dbg.assign for this dead part.
555 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
556 NewAssign->insertAfter(Assign);
557 NewAssign->setAssignId(GetDeadLink());
558 if (NewFragment)
559 SetDeadFragExpr(NewAssign, *NewFragment);
560 NewAssign->setKillAddress();
561 };
562 for_each(Linked, InsertAssignForOverlap);
563 for_each(LinkedDVRAssigns, InsertAssignForOverlap);
564}
565
566static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
567 uint64_t &DeadSize, int64_t KillingStart,
568 uint64_t KillingSize, bool IsOverwriteEnd) {
569 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
570 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
571
572 // We assume that memet/memcpy operates in chunks of the "largest" native
573 // type size and aligned on the same value. That means optimal start and size
574 // of memset/memcpy should be modulo of preferred alignment of that type. That
575 // is it there is no any sense in trying to reduce store size any further
576 // since any "extra" stores comes for free anyway.
577 // On the other hand, maximum alignment we can achieve is limited by alignment
578 // of initial store.
579
580 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
581 // "largest" native type.
582 // Note: What is the proper way to get that value?
583 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
584 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
585
586 int64_t ToRemoveStart = 0;
587 uint64_t ToRemoveSize = 0;
588 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
589 // maintained on the remaining store.
590 if (IsOverwriteEnd) {
591 // Calculate required adjustment for 'KillingStart' in order to keep
592 // remaining store size aligned on 'PerfAlign'.
593 uint64_t Off =
594 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
595 ToRemoveStart = KillingStart + Off;
596 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
597 return false;
598 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
599 } else {
600 ToRemoveStart = DeadStart;
601 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
602 "Not overlapping accesses?");
603 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
604 // Calculate required adjustment for 'ToRemoveSize'in order to keep
605 // start of the remaining store aligned on 'PerfAlign'.
606 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
607 if (Off != 0) {
608 if (ToRemoveSize <= (PrefAlign.value() - Off))
609 return false;
610 ToRemoveSize -= PrefAlign.value() - Off;
611 }
612 assert(isAligned(PrefAlign, ToRemoveSize) &&
613 "Should preserve selected alignment");
614 }
615
616 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
617 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
618
619 uint64_t NewSize = DeadSize - ToRemoveSize;
620 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
621 // When shortening an atomic memory intrinsic, the newly shortened
622 // length must remain an integer multiple of the element size.
623 const uint32_t ElementSize = AMI->getElementSizeInBytes();
624 if (0 != NewSize % ElementSize)
625 return false;
626 }
627
628 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
629 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
630 << "\n KILLER [" << ToRemoveStart << ", "
631 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
632
633 Value *DeadWriteLength = DeadIntrinsic->getLength();
634 Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
635 DeadIntrinsic->setLength(TrimmedLength);
636 DeadIntrinsic->setDestAlignment(PrefAlign);
637
638 Value *OrigDest = DeadIntrinsic->getRawDest();
639 if (!IsOverwriteEnd) {
640 Value *Indices[1] = {
641 ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
643 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
644 DeadI->getIterator());
645 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
646 DeadIntrinsic->setDest(NewDestGEP);
647 }
648
649 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
650 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
651 IsOverwriteEnd);
652
653 // Finally update start and size of dead access.
654 if (!IsOverwriteEnd)
655 DeadStart += ToRemoveSize;
656 DeadSize = NewSize;
657
658 return true;
659}
660
662 int64_t &DeadStart, uint64_t &DeadSize) {
663 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
664 return false;
665
666 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
667 int64_t KillingStart = OII->second;
668 uint64_t KillingSize = OII->first - KillingStart;
669
670 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
671
672 if (KillingStart > DeadStart &&
673 // Note: "KillingStart - KillingStart" is known to be positive due to
674 // preceding check.
675 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
676 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
677 // be non negative due to preceding checks.
678 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
679 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
680 true)) {
681 IntervalMap.erase(OII);
682 return true;
683 }
684 }
685 return false;
686}
687
690 int64_t &DeadStart, uint64_t &DeadSize) {
692 return false;
693
694 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
695 int64_t KillingStart = OII->second;
696 uint64_t KillingSize = OII->first - KillingStart;
697
698 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
699
700 if (KillingStart <= DeadStart &&
701 // Note: "DeadStart - KillingStart" is known to be non negative due to
702 // preceding check.
703 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
704 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
705 // be positive due to preceding checks.
706 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
707 "Should have been handled as OW_Complete");
708 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
709 false)) {
710 IntervalMap.erase(OII);
711 return true;
712 }
713 }
714 return false;
715}
716
717static Constant *
719 int64_t KillingOffset, int64_t DeadOffset,
720 const DataLayout &DL, BatchAAResults &AA,
721 DominatorTree *DT) {
722
723 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
724 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
725 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
726 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
727 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
728 // If the store we find is:
729 // a) partially overwritten by the store to 'Loc'
730 // b) the killing store is fully contained in the dead one and
731 // c) they both have a constant value
732 // d) none of the two stores need padding
733 // Merge the two stores, replacing the dead store's value with a
734 // merge of both values.
735 // TODO: Deal with other constant types (vectors, etc), and probably
736 // some mem intrinsics (if needed)
737
738 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
739 APInt KillingValue =
740 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
741 unsigned KillingBits = KillingValue.getBitWidth();
742 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
743 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
744
745 // Offset of the smaller store inside the larger store
746 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
747 unsigned LShiftAmount =
748 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
749 : BitOffsetDiff;
750 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
751 LShiftAmount + KillingBits);
752 // Clear the bits we'll be replacing, then OR with the smaller
753 // store, shifted appropriately.
754 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
755 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
756 << "\n Killing: " << *KillingI
757 << "\n Merged Value: " << Merged << '\n');
758 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
759 }
760 return nullptr;
761}
762
763namespace {
764// Returns true if \p I is an intrinsic that does not read or write memory.
765bool isNoopIntrinsic(Instruction *I) {
766 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
767 switch (II->getIntrinsicID()) {
768 case Intrinsic::lifetime_start:
769 case Intrinsic::lifetime_end:
770 case Intrinsic::invariant_end:
771 case Intrinsic::launder_invariant_group:
772 case Intrinsic::assume:
773 return true;
774 case Intrinsic::dbg_declare:
775 case Intrinsic::dbg_label:
776 case Intrinsic::dbg_value:
777 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
778 default:
779 return false;
780 }
781 }
782 return false;
783}
784
785// Check if we can ignore \p D for DSE.
786bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
787 Instruction *DI = D->getMemoryInst();
788 // Calls that only access inaccessible memory cannot read or write any memory
789 // locations we consider for elimination.
790 if (auto *CB = dyn_cast<CallBase>(DI))
791 if (CB->onlyAccessesInaccessibleMemory())
792 return true;
793
794 // We can eliminate stores to locations not visible to the caller across
795 // throwing instructions.
796 if (DI->mayThrow() && !DefVisibleToCaller)
797 return true;
798
799 // We can remove the dead stores, irrespective of the fence and its ordering
800 // (release/acquire/seq_cst). Fences only constraints the ordering of
801 // already visible stores, it does not make a store visible to other
802 // threads. So, skipping over a fence does not change a store from being
803 // dead.
804 if (isa<FenceInst>(DI))
805 return true;
806
807 // Skip intrinsics that do not really read or modify memory.
808 if (isNoopIntrinsic(DI))
809 return true;
810
811 return false;
812}
813
814// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
815// defined by `MemDef`.
816struct MemoryLocationWrapper {
817 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
818 bool DefByInitializesAttr)
819 : MemLoc(MemLoc), MemDef(MemDef),
820 DefByInitializesAttr(DefByInitializesAttr) {
821 assert(MemLoc.Ptr && "MemLoc should be not null");
823 DefInst = MemDef->getMemoryInst();
824 }
825
826 MemoryLocation MemLoc;
827 const Value *UnderlyingObject;
828 MemoryDef *MemDef;
829 Instruction *DefInst;
830 bool DefByInitializesAttr = false;
831};
832
833// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
834// defined by this MemoryDef.
835struct MemoryDefWrapper {
836 MemoryDefWrapper(MemoryDef *MemDef,
837 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
838 DefInst = MemDef->getMemoryInst();
839 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
840 DefinedLocations.push_back(
841 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
842 }
843 Instruction *DefInst;
845};
846
847bool hasInitializesAttr(Instruction *I) {
848 CallBase *CB = dyn_cast<CallBase>(I);
849 return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
850}
851
852struct ArgumentInitInfo {
853 unsigned Idx;
854 bool IsDeadOrInvisibleOnUnwind;
855 ConstantRangeList Inits;
856};
857
858// Return the intersected range list of the initializes attributes of "Args".
859// "Args" are call arguments that alias to each other.
860// If any argument in "Args" doesn't have dead_on_unwind attr and
861// "CallHasNoUnwindAttr" is false, return empty.
862ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
863 bool CallHasNoUnwindAttr) {
864 if (Args.empty())
865 return {};
866
867 // To address unwind, the function should have nounwind attribute or the
868 // arguments have dead or invisible on unwind. Otherwise, return empty.
869 for (const auto &Arg : Args) {
870 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
871 return {};
872 if (Arg.Inits.empty())
873 return {};
874 }
875
876 ConstantRangeList IntersectedIntervals = Args.front().Inits;
877 for (auto &Arg : Args.drop_front())
878 IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
879
880 return IntersectedIntervals;
881}
882
883struct DSEState {
884 Function &F;
885 AliasAnalysis &AA;
887
888 /// The single BatchAA instance that is used to cache AA queries. It will
889 /// not be invalidated over the whole run. This is safe, because:
890 /// 1. Only memory writes are removed, so the alias cache for memory
891 /// locations remains valid.
892 /// 2. No new instructions are added (only instructions removed), so cached
893 /// information for a deleted value cannot be accessed by a re-used new
894 /// value pointer.
895 BatchAAResults BatchAA;
896
897 MemorySSA &MSSA;
898 DominatorTree &DT;
900 const TargetLibraryInfo &TLI;
901 const DataLayout &DL;
902 const LoopInfo &LI;
903
904 // Whether the function contains any irreducible control flow, useful for
905 // being accurately able to detect loops.
906 bool ContainsIrreducibleLoops;
907
908 // All MemoryDefs that potentially could kill other MemDefs.
910 // Any that should be skipped as they are already deleted
912 // Keep track whether a given object is captured before return or not.
913 DenseMap<const Value *, bool> CapturedBeforeReturn;
914 // Keep track of all of the objects that are invisible to the caller after
915 // the function returns.
916 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
917 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
918 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
919 // Post-order numbers for each basic block. Used to figure out if memory
920 // accesses are executed before another access.
921 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
922
923 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
924 /// basic block.
926 // Check if there are root nodes that are terminated by UnreachableInst.
927 // Those roots pessimize post-dominance queries. If there are such roots,
928 // fall back to CFG scan starting from all non-unreachable roots.
929 bool AnyUnreachableExit;
930
931 // Whether or not we should iterate on removing dead stores at the end of the
932 // function due to removing a store causing a previously captured pointer to
933 // no longer be captured.
934 bool ShouldIterateEndOfFunctionDSE;
935
936 /// Dead instructions to be removed at the end of DSE.
938
939 // Class contains self-reference, make sure it's not copied/moved.
940 DSEState(const DSEState &) = delete;
941 DSEState &operator=(const DSEState &) = delete;
942
943 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
944 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
945 const LoopInfo &LI)
946 : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
947 PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
948 // Collect blocks with throwing instructions not modeled in MemorySSA and
949 // alloc-like objects.
950 unsigned PO = 0;
951 for (BasicBlock *BB : post_order(&F)) {
952 PostOrderNumbers[BB] = PO++;
953 for (Instruction &I : *BB) {
954 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
955 if (I.mayThrow() && !MA)
956 ThrowingBlocks.insert(I.getParent());
957
958 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
959 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
960 (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
961 (EnableInitializesImprovement && hasInitializesAttr(&I))))
962 MemDefs.push_back(MD);
963 }
964 }
965
966 // Treat byval or inalloca arguments the same as Allocas, stores to them are
967 // dead at the end of the function.
968 for (Argument &AI : F.args())
969 if (AI.hasPassPointeeByValueCopyAttr())
970 InvisibleToCallerAfterRet.insert({&AI, true});
971
972 // Collect whether there is any irreducible control flow in the function.
973 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
974
975 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
976 return isa<UnreachableInst>(E->getTerminator());
977 });
978 }
979
980 static void pushMemUses(MemoryAccess *Acc,
983 for (Use &U : Acc->uses()) {
984 auto *MA = cast<MemoryAccess>(U.getUser());
985 if (Visited.insert(MA).second)
986 WorkList.push_back(MA);
987 }
988 };
989
990 LocationSize strengthenLocationSize(const Instruction *I,
991 LocationSize Size) const {
992 if (auto *CB = dyn_cast<CallBase>(I)) {
993 LibFunc F;
994 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
995 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
996 // Use the precise location size specified by the 3rd argument
997 // for determining KillingI overwrites DeadLoc if it is a memset_chk
998 // instruction. memset_chk will write either the amount specified as 3rd
999 // argument or the function will immediately abort and exit the program.
1000 // NOTE: AA may determine NoAlias if it can prove that the access size
1001 // is larger than the allocation size due to that being UB. To avoid
1002 // returning potentially invalid NoAlias results by AA, limit the use of
1003 // the precise location size to isOverwrite.
1004 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1005 return LocationSize::precise(Len->getZExtValue());
1006 }
1007 }
1008 return Size;
1009 }
1010
1011 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1012 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1013 /// location (by \p DeadI instruction).
1014 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1015 /// \p DeadI, but they both write to the same underlying object. In that
1016 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1017 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1018 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1019 OverwriteResult isOverwrite(const Instruction *KillingI,
1020 const Instruction *DeadI,
1021 const MemoryLocation &KillingLoc,
1022 const MemoryLocation &DeadLoc,
1023 int64_t &KillingOff, int64_t &DeadOff) {
1024 // AliasAnalysis does not always account for loops. Limit overwrite checks
1025 // to dependencies for which we can guarantee they are independent of any
1026 // loops they are in.
1027 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1028 return OW_Unknown;
1029
1030 LocationSize KillingLocSize =
1031 strengthenLocationSize(KillingI, KillingLoc.Size);
1032 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1033 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1034 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1035 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1036
1037 // Check whether the killing store overwrites the whole object, in which
1038 // case the size/offset of the dead store does not matter.
1039 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1040 isIdentifiedObject(KillingUndObj)) {
1041 std::optional<TypeSize> KillingUndObjSize =
1042 getPointerSize(KillingUndObj, DL, TLI, &F);
1043 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1044 return OW_Complete;
1045 }
1046
1047 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1048 // get imprecise values here, though (except for unknown sizes).
1049 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1050 // In case no constant size is known, try to an IR values for the number
1051 // of bytes written and check if they match.
1052 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1053 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1054 if (KillingMemI && DeadMemI) {
1055 const Value *KillingV = KillingMemI->getLength();
1056 const Value *DeadV = DeadMemI->getLength();
1057 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1058 return OW_Complete;
1059 }
1060
1061 // Masked stores have imprecise locations, but we can reason about them
1062 // to some extent.
1063 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1064 }
1065
1066 const TypeSize KillingSize = KillingLocSize.getValue();
1067 const TypeSize DeadSize = DeadLoc.Size.getValue();
1068 // Bail on doing Size comparison which depends on AA for now
1069 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1070 const bool AnyScalable =
1071 DeadSize.isScalable() || KillingLocSize.isScalable();
1072
1073 if (AnyScalable)
1074 return OW_Unknown;
1075 // Query the alias information
1076 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1077
1078 // If the start pointers are the same, we just have to compare sizes to see if
1079 // the killing store was larger than the dead store.
1080 if (AAR == AliasResult::MustAlias) {
1081 // Make sure that the KillingSize size is >= the DeadSize size.
1082 if (KillingSize >= DeadSize)
1083 return OW_Complete;
1084 }
1085
1086 // If we hit a partial alias we may have a full overwrite
1087 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1088 int32_t Off = AAR.getOffset();
1089 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1090 return OW_Complete;
1091 }
1092
1093 // If we can't resolve the same pointers to the same object, then we can't
1094 // analyze them at all.
1095 if (DeadUndObj != KillingUndObj) {
1096 // Non aliasing stores to different objects don't overlap. Note that
1097 // if the killing store is known to overwrite whole object (out of
1098 // bounds access overwrites whole object as well) then it is assumed to
1099 // completely overwrite any store to the same object even if they don't
1100 // actually alias (see next check).
1101 if (AAR == AliasResult::NoAlias)
1102 return OW_None;
1103 return OW_Unknown;
1104 }
1105
1106 // Okay, we have stores to two completely different pointers. Try to
1107 // decompose the pointer into a "base + constant_offset" form. If the base
1108 // pointers are equal, then we can reason about the two stores.
1109 DeadOff = 0;
1110 KillingOff = 0;
1111 const Value *DeadBasePtr =
1112 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1113 const Value *KillingBasePtr =
1114 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1115
1116 // If the base pointers still differ, we have two completely different
1117 // stores.
1118 if (DeadBasePtr != KillingBasePtr)
1119 return OW_Unknown;
1120
1121 // The killing access completely overlaps the dead store if and only if
1122 // both start and end of the dead one is "inside" the killing one:
1123 // |<->|--dead--|<->|
1124 // |-----killing------|
1125 // Accesses may overlap if and only if start of one of them is "inside"
1126 // another one:
1127 // |<->|--dead--|<-------->|
1128 // |-------killing--------|
1129 // OR
1130 // |-------dead-------|
1131 // |<->|---killing---|<----->|
1132 //
1133 // We have to be careful here as *Off is signed while *.Size is unsigned.
1134
1135 // Check if the dead access starts "not before" the killing one.
1136 if (DeadOff >= KillingOff) {
1137 // If the dead access ends "not after" the killing access then the
1138 // dead one is completely overwritten by the killing one.
1139 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1140 return OW_Complete;
1141 // If start of the dead access is "before" end of the killing access
1142 // then accesses overlap.
1143 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1144 return OW_MaybePartial;
1145 }
1146 // If start of the killing access is "before" end of the dead access then
1147 // accesses overlap.
1148 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1149 return OW_MaybePartial;
1150 }
1151
1152 // Can reach here only if accesses are known not to overlap.
1153 return OW_None;
1154 }
1155
1156 bool isInvisibleToCallerAfterRet(const Value *V) {
1157 if (isa<AllocaInst>(V))
1158 return true;
1159 auto I = InvisibleToCallerAfterRet.insert({V, false});
1160 if (I.second) {
1161 if (!isInvisibleToCallerOnUnwind(V)) {
1162 I.first->second = false;
1163 } else if (isNoAliasCall(V)) {
1164 I.first->second = !PointerMayBeCaptured(V, true, false);
1165 }
1166 }
1167 return I.first->second;
1168 }
1169
1170 bool isInvisibleToCallerOnUnwind(const Value *V) {
1171 bool RequiresNoCaptureBeforeUnwind;
1172 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1173 return false;
1174 if (!RequiresNoCaptureBeforeUnwind)
1175 return true;
1176
1177 auto I = CapturedBeforeReturn.insert({V, true});
1178 if (I.second)
1179 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1180 // with the killing MemoryDef. But we refrain from doing so for now to
1181 // limit compile-time and this does not cause any changes to the number
1182 // of stores removed on a large test set in practice.
1183 I.first->second = PointerMayBeCaptured(V, false, true);
1184 return !I.first->second;
1185 }
1186
1187 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1188 if (!I->mayWriteToMemory())
1189 return std::nullopt;
1190
1191 if (auto *CB = dyn_cast<CallBase>(I))
1192 return MemoryLocation::getForDest(CB, TLI);
1193
1195 }
1196
1197 // Returns a list of <MemoryLocation, bool> pairs written by I.
1198 // The bool means whether the write is from Initializes attr.
1200 getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1202 if (isMemTerminatorInst(I)) {
1203 if (auto Loc = getLocForTerminator(I))
1204 Locations.push_back(std::make_pair(Loc->first, false));
1205 return Locations;
1206 }
1207
1208 if (auto Loc = getLocForWrite(I))
1209 Locations.push_back(std::make_pair(*Loc, false));
1210
1211 if (ConsiderInitializesAttr) {
1212 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1213 Locations.push_back(std::make_pair(MemLoc, true));
1214 }
1215 }
1216 return Locations;
1217 }
1218
1219 /// Assuming this instruction has a dead analyzable write, can we delete
1220 /// this instruction?
1221 bool isRemovable(Instruction *I) {
1222 assert(getLocForWrite(I) && "Must have analyzable write");
1223
1224 // Don't remove volatile/atomic stores.
1225 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1226 return SI->isUnordered();
1227
1228 if (auto *CB = dyn_cast<CallBase>(I)) {
1229 // Don't remove volatile memory intrinsics.
1230 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1231 return !MI->isVolatile();
1232
1233 // Never remove dead lifetime intrinsics, e.g. because they are followed
1234 // by a free.
1235 if (CB->isLifetimeStartOrEnd())
1236 return false;
1237
1238 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1239 !CB->isTerminator();
1240 }
1241
1242 return false;
1243 }
1244
1245 /// Returns true if \p UseInst completely overwrites \p DefLoc
1246 /// (stored by \p DefInst).
1247 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1248 Instruction *UseInst) {
1249 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1250 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1251 // MemoryDef.
1252 if (!UseInst->mayWriteToMemory())
1253 return false;
1254
1255 if (auto *CB = dyn_cast<CallBase>(UseInst))
1257 return false;
1258
1259 int64_t InstWriteOffset, DepWriteOffset;
1260 if (auto CC = getLocForWrite(UseInst))
1261 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1262 DepWriteOffset) == OW_Complete;
1263 return false;
1264 }
1265
1266 /// Returns true if \p Def is not read before returning from the function.
1267 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {
1268 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1269 << *Def->getMemoryInst()
1270 << ") is at the end the function \n");
1273
1274 pushMemUses(Def, WorkList, Visited);
1275 for (unsigned I = 0; I < WorkList.size(); I++) {
1276 if (WorkList.size() >= MemorySSAScanLimit) {
1277 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1278 return false;
1279 }
1280
1281 MemoryAccess *UseAccess = WorkList[I];
1282 if (isa<MemoryPhi>(UseAccess)) {
1283 // AliasAnalysis does not account for loops. Limit elimination to
1284 // candidates for which we can guarantee they always store to the same
1285 // memory location.
1286 if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1287 return false;
1288
1289 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1290 continue;
1291 }
1292 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1293 // of times this is called and/or caching it.
1294 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1295 if (isReadClobber(DefLoc, UseInst)) {
1296 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1297 return false;
1298 }
1299
1300 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1301 pushMemUses(UseDef, WorkList, Visited);
1302 }
1303 return true;
1304 }
1305
1306 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1307 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1308 /// indicating whether \p I is a free-like call.
1309 std::optional<std::pair<MemoryLocation, bool>>
1310 getLocForTerminator(Instruction *I) const {
1311 uint64_t Len;
1312 Value *Ptr;
1313 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1314 m_Value(Ptr))))
1315 return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1316
1317 if (auto *CB = dyn_cast<CallBase>(I)) {
1318 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1319 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1320 }
1321
1322 return std::nullopt;
1323 }
1324
1325 /// Returns true if \p I is a memory terminator instruction like
1326 /// llvm.lifetime.end or free.
1327 bool isMemTerminatorInst(Instruction *I) const {
1328 auto *CB = dyn_cast<CallBase>(I);
1329 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1330 getFreedOperand(CB, &TLI) != nullptr);
1331 }
1332
1333 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1334 /// instruction \p AccessI.
1335 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1336 Instruction *MaybeTerm) {
1337 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1338 getLocForTerminator(MaybeTerm);
1339
1340 if (!MaybeTermLoc)
1341 return false;
1342
1343 // If the terminator is a free-like call, all accesses to the underlying
1344 // object can be considered terminated.
1345 if (getUnderlyingObject(Loc.Ptr) !=
1346 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1347 return false;
1348
1349 auto TermLoc = MaybeTermLoc->first;
1350 if (MaybeTermLoc->second) {
1351 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1352 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1353 }
1354 int64_t InstWriteOffset = 0;
1355 int64_t DepWriteOffset = 0;
1356 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1357 DepWriteOffset) == OW_Complete;
1358 }
1359
1360 // Returns true if \p Use may read from \p DefLoc.
1361 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1362 if (isNoopIntrinsic(UseInst))
1363 return false;
1364
1365 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1366 // treated as read clobber.
1367 if (auto SI = dyn_cast<StoreInst>(UseInst))
1368 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1369
1370 if (!UseInst->mayReadFromMemory())
1371 return false;
1372
1373 if (auto *CB = dyn_cast<CallBase>(UseInst))
1375 return false;
1376
1377 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1378 }
1379
1380 /// Returns true if a dependency between \p Current and \p KillingDef is
1381 /// guaranteed to be loop invariant for the loops that they are in. Either
1382 /// because they are known to be in the same block, in the same loop level or
1383 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1384 /// during execution of the containing function.
1385 bool isGuaranteedLoopIndependent(const Instruction *Current,
1386 const Instruction *KillingDef,
1387 const MemoryLocation &CurrentLoc) {
1388 // If the dependency is within the same block or loop level (being careful
1389 // of irreducible loops), we know that AA will return a valid result for the
1390 // memory dependency. (Both at the function level, outside of any loop,
1391 // would also be valid but we currently disable that to limit compile time).
1392 if (Current->getParent() == KillingDef->getParent())
1393 return true;
1394 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1395 if (!ContainsIrreducibleLoops && CurrentLI &&
1396 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1397 return true;
1398 // Otherwise check the memory location is invariant to any loops.
1399 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1400 }
1401
1402 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1403 /// loop. In particular, this guarantees that it only references a single
1404 /// MemoryLocation during execution of the containing function.
1405 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1406 Ptr = Ptr->stripPointerCasts();
1407 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1408 if (GEP->hasAllConstantIndices())
1409 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1410
1411 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1412 return I->getParent()->isEntryBlock() ||
1413 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1414 }
1415 return true;
1416 }
1417
1418 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1419 // with no read access between them or on any other path to a function exit
1420 // block if \p KillingLoc is not accessible after the function returns. If
1421 // there is no such MemoryDef, return std::nullopt. The returned value may not
1422 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1423 // encounter an aliasing MemoryUse (read).
1424 std::optional<MemoryAccess *>
1425 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1426 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1427 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1428 bool IsMemTerm, unsigned &PartialLimit,
1429 bool IsInitializesAttrMemLoc) {
1430 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1431 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1432 return std::nullopt;
1433 }
1434
1435 MemoryAccess *Current = StartAccess;
1436 Instruction *KillingI = KillingDef->getMemoryInst();
1437 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1438
1439 // Only optimize defining access of KillingDef when directly starting at its
1440 // defining access. The defining access also must only access KillingLoc. At
1441 // the moment we only support instructions with a single write location, so
1442 // it should be sufficient to disable optimizations for instructions that
1443 // also read from memory.
1444 bool CanOptimize = OptimizeMemorySSA &&
1445 KillingDef->getDefiningAccess() == StartAccess &&
1446 !KillingI->mayReadFromMemory();
1447
1448 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1449 std::optional<MemoryLocation> CurrentLoc;
1450 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1451 LLVM_DEBUG({
1452 dbgs() << " visiting " << *Current;
1453 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1454 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1455 << ")";
1456 dbgs() << "\n";
1457 });
1458
1459 // Reached TOP.
1460 if (MSSA.isLiveOnEntryDef(Current)) {
1461 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1462 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1463 // The first clobbering def is... none.
1464 KillingDef->setOptimized(Current);
1465 return std::nullopt;
1466 }
1467
1468 // Cost of a step. Accesses in the same block are more likely to be valid
1469 // candidates for elimination, hence consider them cheaper.
1470 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1473 if (WalkerStepLimit <= StepCost) {
1474 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1475 return std::nullopt;
1476 }
1477 WalkerStepLimit -= StepCost;
1478
1479 // Return for MemoryPhis. They cannot be eliminated directly and the
1480 // caller is responsible for traversing them.
1481 if (isa<MemoryPhi>(Current)) {
1482 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1483 return Current;
1484 }
1485
1486 // Below, check if CurrentDef is a valid candidate to be eliminated by
1487 // KillingDef. If it is not, check the next candidate.
1488 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1489 Instruction *CurrentI = CurrentDef->getMemoryInst();
1490
1491 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1492 CanOptimize = false;
1493 continue;
1494 }
1495
1496 // Before we try to remove anything, check for any extra throwing
1497 // instructions that block us from DSEing
1498 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1499 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1500 return std::nullopt;
1501 }
1502
1503 // Check for anything that looks like it will be a barrier to further
1504 // removal
1505 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1506 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1507 return std::nullopt;
1508 }
1509
1510 // If Current is known to be on path that reads DefLoc or is a read
1511 // clobber, bail out, as the path is not profitable. We skip this check
1512 // for intrinsic calls, because the code knows how to handle memcpy
1513 // intrinsics.
1514 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1515 return std::nullopt;
1516
1517 // Quick check if there are direct uses that are read-clobbers.
1518 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1519 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1520 return !MSSA.dominates(StartAccess, UseOrDef) &&
1521 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1522 return false;
1523 })) {
1524 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1525 return std::nullopt;
1526 }
1527
1528 // If Current does not have an analyzable write location or is not
1529 // removable, skip it.
1530 CurrentLoc = getLocForWrite(CurrentI);
1531 if (!CurrentLoc || !isRemovable(CurrentI)) {
1532 CanOptimize = false;
1533 continue;
1534 }
1535
1536 // AliasAnalysis does not account for loops. Limit elimination to
1537 // candidates for which we can guarantee they always store to the same
1538 // memory location and not located in different loops.
1539 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1540 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1541 CanOptimize = false;
1542 continue;
1543 }
1544
1545 if (IsMemTerm) {
1546 // If the killing def is a memory terminator (e.g. lifetime.end), check
1547 // the next candidate if the current Current does not write the same
1548 // underlying object as the terminator.
1549 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1550 CanOptimize = false;
1551 continue;
1552 }
1553 } else {
1554 int64_t KillingOffset = 0;
1555 int64_t DeadOffset = 0;
1556 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1557 KillingOffset, DeadOffset);
1558 if (CanOptimize) {
1559 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1560 // optimized access. Do not optimize if CurrentDef is already the
1561 // defining access of KillingDef.
1562 if (CurrentDef != KillingDef->getDefiningAccess() &&
1563 (OR == OW_Complete || OR == OW_MaybePartial))
1564 KillingDef->setOptimized(CurrentDef);
1565
1566 // Once a may-aliasing def is encountered do not set an optimized
1567 // access.
1568 if (OR != OW_None)
1569 CanOptimize = false;
1570 }
1571
1572 // If Current does not write to the same object as KillingDef, check
1573 // the next candidate.
1574 if (OR == OW_Unknown || OR == OW_None)
1575 continue;
1576 else if (OR == OW_MaybePartial) {
1577 // If KillingDef only partially overwrites Current, check the next
1578 // candidate if the partial step limit is exceeded. This aggressively
1579 // limits the number of candidates for partial store elimination,
1580 // which are less likely to be removable in the end.
1581 if (PartialLimit <= 1) {
1582 WalkerStepLimit -= 1;
1583 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1584 continue;
1585 }
1586 PartialLimit -= 1;
1587 }
1588 }
1589 break;
1590 };
1591
1592 // Accesses to objects accessible after the function returns can only be
1593 // eliminated if the access is dead along all paths to the exit. Collect
1594 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1595 // they cover all paths from MaybeDeadAccess to any function exit.
1597 KillingDefs.insert(KillingDef->getMemoryInst());
1598 MemoryAccess *MaybeDeadAccess = Current;
1599 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1600 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1601 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1602 << *MaybeDeadI << ")\n");
1603
1606 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1607
1608 // Check if DeadDef may be read.
1609 for (unsigned I = 0; I < WorkList.size(); I++) {
1610 MemoryAccess *UseAccess = WorkList[I];
1611
1612 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1613 // Bail out if the number of accesses to check exceeds the scan limit.
1614 if (ScanLimit < (WorkList.size() - I)) {
1615 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1616 return std::nullopt;
1617 }
1618 --ScanLimit;
1619 NumDomMemDefChecks++;
1620
1621 if (isa<MemoryPhi>(UseAccess)) {
1622 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1623 return DT.properlyDominates(KI->getParent(),
1624 UseAccess->getBlock());
1625 })) {
1626 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1627 continue;
1628 }
1629 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1630 pushMemUses(UseAccess, WorkList, Visited);
1631 continue;
1632 }
1633
1634 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1635 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1636
1637 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1638 return DT.dominates(KI, UseInst);
1639 })) {
1640 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1641 continue;
1642 }
1643
1644 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1645 // MemoryAccesses. We do not have to check it's users.
1646 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1647 LLVM_DEBUG(
1648 dbgs()
1649 << " ... skipping, memterminator invalidates following accesses\n");
1650 continue;
1651 }
1652
1653 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1654 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1655 pushMemUses(UseAccess, WorkList, Visited);
1656 continue;
1657 }
1658
1659 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1660 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1661 return std::nullopt;
1662 }
1663
1664 // Uses which may read the original MemoryDef mean we cannot eliminate the
1665 // original MD. Stop walk.
1666 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1667 // the callee would be dominated by initializations, so it should be safe.
1668 bool IsKillingDefFromInitAttr = false;
1669 if (IsInitializesAttrMemLoc) {
1670 if (KillingI == UseInst &&
1671 KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1672 IsKillingDefFromInitAttr = true;
1673 }
1674
1675 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1676 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1677 return std::nullopt;
1678 }
1679
1680 // If this worklist walks back to the original memory access (and the
1681 // pointer is not guarenteed loop invariant) then we cannot assume that a
1682 // store kills itself.
1683 if (MaybeDeadAccess == UseAccess &&
1684 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1685 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1686 return std::nullopt;
1687 }
1688 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1689 // if it reads the memory location.
1690 // TODO: It would probably be better to check for self-reads before
1691 // calling the function.
1692 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1693 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1694 continue;
1695 }
1696
1697 // Check all uses for MemoryDefs, except for defs completely overwriting
1698 // the original location. Otherwise we have to check uses of *all*
1699 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1700 // miss cases like the following
1701 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1702 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1703 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1704 // (The Use points to the *first* Def it may alias)
1705 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1706 // stores [0,1]
1707 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1708 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1709 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1710 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1711 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1712 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1714 << " ... found killing def " << *UseInst << "\n");
1715 KillingDefs.insert(UseInst);
1716 }
1717 } else {
1719 << " ... found preceeding def " << *UseInst << "\n");
1720 return std::nullopt;
1721 }
1722 } else
1723 pushMemUses(UseDef, WorkList, Visited);
1724 }
1725 }
1726
1727 // For accesses to locations visible after the function returns, make sure
1728 // that the location is dead (=overwritten) along all paths from
1729 // MaybeDeadAccess to the exit.
1730 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1731 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1732 for (Instruction *KD : KillingDefs)
1733 KillingBlocks.insert(KD->getParent());
1734 assert(!KillingBlocks.empty() &&
1735 "Expected at least a single killing block");
1736
1737 // Find the common post-dominator of all killing blocks.
1738 BasicBlock *CommonPred = *KillingBlocks.begin();
1739 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1740 if (!CommonPred)
1741 break;
1742 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1743 }
1744
1745 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1746 // there is a path from MaybeDeadAccess to an exit not going through a
1747 // killing block.
1748 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1749 if (!AnyUnreachableExit)
1750 return std::nullopt;
1751
1752 // Fall back to CFG scan starting at all non-unreachable roots if not
1753 // all paths to the exit go through CommonPred.
1754 CommonPred = nullptr;
1755 }
1756
1757 // If CommonPred itself is in the set of killing blocks, we're done.
1758 if (KillingBlocks.count(CommonPred))
1759 return {MaybeDeadAccess};
1760
1761 SetVector<BasicBlock *> WorkList;
1762 // If CommonPred is null, there are multiple exits from the function.
1763 // They all have to be added to the worklist.
1764 if (CommonPred)
1765 WorkList.insert(CommonPred);
1766 else
1767 for (BasicBlock *R : PDT.roots()) {
1768 if (!isa<UnreachableInst>(R->getTerminator()))
1769 WorkList.insert(R);
1770 }
1771
1772 NumCFGTries++;
1773 // Check if all paths starting from an exit node go through one of the
1774 // killing blocks before reaching MaybeDeadAccess.
1775 for (unsigned I = 0; I < WorkList.size(); I++) {
1776 NumCFGChecks++;
1777 BasicBlock *Current = WorkList[I];
1778 if (KillingBlocks.count(Current))
1779 continue;
1780 if (Current == MaybeDeadAccess->getBlock())
1781 return std::nullopt;
1782
1783 // MaybeDeadAccess is reachable from the entry, so we don't have to
1784 // explore unreachable blocks further.
1785 if (!DT.isReachableFromEntry(Current))
1786 continue;
1787
1788 for (BasicBlock *Pred : predecessors(Current))
1789 WorkList.insert(Pred);
1790
1791 if (WorkList.size() >= MemorySSAPathCheckLimit)
1792 return std::nullopt;
1793 }
1794 NumCFGSuccess++;
1795 }
1796
1797 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1798 // potentially dead.
1799 return {MaybeDeadAccess};
1800 }
1801
1802 /// Delete dead memory defs and recursively add their operands to ToRemove if
1803 /// they became dead.
1804 void
1807 MemorySSAUpdater Updater(&MSSA);
1808 SmallVector<Instruction *, 32> NowDeadInsts;
1809 NowDeadInsts.push_back(SI);
1810 --NumFastOther;
1811
1812 while (!NowDeadInsts.empty()) {
1813 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1814 ++NumFastOther;
1815
1816 // Try to preserve debug information attached to the dead instruction.
1817 salvageDebugInfo(*DeadInst);
1818 salvageKnowledge(DeadInst);
1819
1820 // Remove the Instruction from MSSA.
1821 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1822 bool IsMemDef = MA && isa<MemoryDef>(MA);
1823 if (MA) {
1824 if (IsMemDef) {
1825 auto *MD = cast<MemoryDef>(MA);
1826 SkipStores.insert(MD);
1827 if (Deleted)
1828 Deleted->insert(MD);
1829 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1830 if (SI->getValueOperand()->getType()->isPointerTy()) {
1831 const Value *UO = getUnderlyingObject(SI->getValueOperand());
1832 if (CapturedBeforeReturn.erase(UO))
1833 ShouldIterateEndOfFunctionDSE = true;
1834 InvisibleToCallerAfterRet.erase(UO);
1835 }
1836 }
1837 }
1838
1839 Updater.removeMemoryAccess(MA);
1840 }
1841
1842 auto I = IOLs.find(DeadInst->getParent());
1843 if (I != IOLs.end())
1844 I->second.erase(DeadInst);
1845 // Remove its operands
1846 for (Use &O : DeadInst->operands())
1847 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1848 O.set(PoisonValue::get(O->getType()));
1849 if (isInstructionTriviallyDead(OpI, &TLI))
1850 NowDeadInsts.push_back(OpI);
1851 }
1852
1853 EA.removeInstruction(DeadInst);
1854 // Remove memory defs directly if they don't produce results, but only
1855 // queue other dead instructions for later removal. They may have been
1856 // used as memory locations that have been cached by BatchAA. Removing
1857 // them here may lead to newly created instructions to be allocated at the
1858 // same address, yielding stale cache entries.
1859 if (IsMemDef && DeadInst->getType()->isVoidTy())
1860 DeadInst->eraseFromParent();
1861 else
1862 ToRemove.push_back(DeadInst);
1863 }
1864 }
1865
1866 // Check for any extra throws between \p KillingI and \p DeadI that block
1867 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1868 // MemoryDef that may throw are handled during the walk from one def to the
1869 // next.
1870 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1871 const Value *KillingUndObj) {
1872 // First see if we can ignore it by using the fact that KillingI is an
1873 // alloca/alloca like object that is not visible to the caller during
1874 // execution of the function.
1875 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1876 return false;
1877
1878 if (KillingI->getParent() == DeadI->getParent())
1879 return ThrowingBlocks.count(KillingI->getParent());
1880 return !ThrowingBlocks.empty();
1881 }
1882
1883 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1884 // instructions act as barriers:
1885 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1886 // object.
1887 // * Atomic stores stronger that monotonic.
1888 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1889 // If DeadI may throw it acts as a barrier, unless we are to an
1890 // alloca/alloca like object that does not escape.
1891 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1892 return true;
1893
1894 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1895 // eliminate/reorder it.
1896 if (DeadI->isAtomic()) {
1897 if (auto *LI = dyn_cast<LoadInst>(DeadI))
1898 return isStrongerThanMonotonic(LI->getOrdering());
1899 if (auto *SI = dyn_cast<StoreInst>(DeadI))
1900 return isStrongerThanMonotonic(SI->getOrdering());
1901 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1902 return isStrongerThanMonotonic(ARMW->getOrdering());
1903 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1904 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1905 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1906 llvm_unreachable("other instructions should be skipped in MemorySSA");
1907 }
1908 return false;
1909 }
1910
1911 /// Eliminate writes to objects that are not visible in the caller and are not
1912 /// accessed before returning from the function.
1913 bool eliminateDeadWritesAtEndOfFunction() {
1914 bool MadeChange = false;
1915 LLVM_DEBUG(
1916 dbgs()
1917 << "Trying to eliminate MemoryDefs at the end of the function\n");
1918 do {
1919 ShouldIterateEndOfFunctionDSE = false;
1920 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1921 if (SkipStores.contains(Def))
1922 continue;
1923
1924 Instruction *DefI = Def->getMemoryInst();
1925 auto DefLoc = getLocForWrite(DefI);
1926 if (!DefLoc || !isRemovable(DefI)) {
1927 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
1928 "instruction not removable.\n");
1929 continue;
1930 }
1931
1932 // NOTE: Currently eliminating writes at the end of a function is
1933 // limited to MemoryDefs with a single underlying object, to save
1934 // compile-time. In practice it appears the case with multiple
1935 // underlying objects is very uncommon. If it turns out to be important,
1936 // we can use getUnderlyingObjects here instead.
1937 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1938 if (!isInvisibleToCallerAfterRet(UO))
1939 continue;
1940
1941 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
1942 // See through pointer-to-pointer bitcasts
1943 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1944 "of the function\n");
1946 ++NumFastStores;
1947 MadeChange = true;
1948 }
1949 }
1950 } while (ShouldIterateEndOfFunctionDSE);
1951 return MadeChange;
1952 }
1953
1954 /// If we have a zero initializing memset following a call to malloc,
1955 /// try folding it into a call to calloc.
1956 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1957 Instruction *DefI = Def->getMemoryInst();
1958 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1959 if (!MemSet)
1960 // TODO: Could handle zero store to small allocation as well.
1961 return false;
1962 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1963 if (!StoredConstant || !StoredConstant->isNullValue())
1964 return false;
1965
1966 if (!isRemovable(DefI))
1967 // The memset might be volatile..
1968 return false;
1969
1970 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1971 F.hasFnAttribute(Attribute::SanitizeAddress) ||
1972 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1973 F.getName() == "calloc")
1974 return false;
1975 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1976 if (!Malloc)
1977 return false;
1978 auto *InnerCallee = Malloc->getCalledFunction();
1979 if (!InnerCallee)
1980 return false;
1981 LibFunc Func;
1982 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1983 Func != LibFunc_malloc)
1984 return false;
1985 // Gracefully handle malloc with unexpected memory attributes.
1986 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
1987 if (!MallocDef)
1988 return false;
1989
1990 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1991 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1992 // of malloc block
1993 auto *MallocBB = Malloc->getParent(),
1994 *MemsetBB = Memset->getParent();
1995 if (MallocBB == MemsetBB)
1996 return true;
1997 auto *Ptr = Memset->getArgOperand(0);
1998 auto *TI = MallocBB->getTerminator();
1999 BasicBlock *TrueBB, *FalseBB;
2000 if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2001 m_Zero()),
2002 TrueBB, FalseBB)))
2003 return false;
2004 if (MemsetBB != FalseBB)
2005 return false;
2006 return true;
2007 };
2008
2009 if (Malloc->getOperand(0) != MemSet->getLength())
2010 return false;
2011 if (!shouldCreateCalloc(Malloc, MemSet) ||
2012 !DT.dominates(Malloc, MemSet) ||
2013 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2014 return false;
2015 IRBuilder<> IRB(Malloc);
2016 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2017 auto *Calloc =
2018 emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0), IRB,
2019 TLI, Malloc->getType()->getPointerAddressSpace());
2020 if (!Calloc)
2021 return false;
2022
2023 MemorySSAUpdater Updater(&MSSA);
2024 auto *NewAccess =
2025 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
2026 MallocDef);
2027 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2028 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2029 Malloc->replaceAllUsesWith(Calloc);
2031 return true;
2032 }
2033
2034 // Check if there is a dominating condition, that implies that the value
2035 // being stored in a ptr is already present in the ptr.
2036 bool dominatingConditionImpliesValue(MemoryDef *Def) {
2037 auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
2038 BasicBlock *StoreBB = StoreI->getParent();
2039 Value *StorePtr = StoreI->getPointerOperand();
2040 Value *StoreVal = StoreI->getValueOperand();
2041
2042 DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
2043 if (!IDom)
2044 return false;
2045
2046 auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
2047 if (!BI || !BI->isConditional())
2048 return false;
2049
2050 // In case both blocks are the same, it is not possible to determine
2051 // if optimization is possible. (We would not want to optimize a store
2052 // in the FalseBB if condition is true and vice versa.)
2053 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2054 return false;
2055
2056 Instruction *ICmpL;
2057 CmpPredicate Pred;
2058 if (!match(BI->getCondition(),
2059 m_c_ICmp(Pred,
2060 m_CombineAnd(m_Load(m_Specific(StorePtr)),
2061 m_Instruction(ICmpL)),
2062 m_Specific(StoreVal))) ||
2063 !ICmpInst::isEquality(Pred))
2064 return false;
2065
2066 // In case the else blocks also branches to the if block or the other way
2067 // around it is not possible to determine if the optimization is possible.
2068 if (Pred == ICmpInst::ICMP_EQ &&
2069 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
2070 StoreBB))
2071 return false;
2072
2073 if (Pred == ICmpInst::ICMP_NE &&
2074 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
2075 StoreBB))
2076 return false;
2077
2078 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
2079 MemoryAccess *ClobAcc =
2080 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2081
2082 return MSSA.dominates(ClobAcc, LoadAcc);
2083 }
2084
2085 /// \returns true if \p Def is a no-op store, either because it
2086 /// directly stores back a loaded value or stores zero to a calloced object.
2087 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2088 Instruction *DefI = Def->getMemoryInst();
2089 StoreInst *Store = dyn_cast<StoreInst>(DefI);
2090 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2091 Constant *StoredConstant = nullptr;
2092 if (Store)
2093 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2094 else if (MemSet)
2095 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2096 else
2097 return false;
2098
2099 if (!isRemovable(DefI))
2100 return false;
2101
2102 if (StoredConstant) {
2103 Constant *InitC =
2104 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2105 // If the clobbering access is LiveOnEntry, no instructions between them
2106 // can modify the memory location.
2107 if (InitC && InitC == StoredConstant)
2108 return MSSA.isLiveOnEntryDef(
2109 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2110 }
2111
2112 if (!Store)
2113 return false;
2114
2115 if (dominatingConditionImpliesValue(Def))
2116 return true;
2117
2118 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2119 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2120 // Get the defining access for the load.
2121 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2122 // Fast path: the defining accesses are the same.
2123 if (LoadAccess == Def->getDefiningAccess())
2124 return true;
2125
2126 // Look through phi accesses. Recursively scan all phi accesses by
2127 // adding them to a worklist. Bail when we run into a memory def that
2128 // does not match LoadAccess.
2130 MemoryAccess *Current =
2131 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2132 // We don't want to bail when we run into the store memory def. But,
2133 // the phi access may point to it. So, pretend like we've already
2134 // checked it.
2135 ToCheck.insert(Def);
2136 ToCheck.insert(Current);
2137 // Start at current (1) to simulate already having checked Def.
2138 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2139 Current = ToCheck[I];
2140 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2141 // Check all the operands.
2142 for (auto &Use : PhiAccess->incoming_values())
2143 ToCheck.insert(cast<MemoryAccess>(&Use));
2144 continue;
2145 }
2146
2147 // If we found a memory def, bail. This happens when we have an
2148 // unrelated write in between an otherwise noop store.
2149 assert(isa<MemoryDef>(Current) &&
2150 "Only MemoryDefs should reach here.");
2151 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2152 // We are searching for the definition of the store's destination.
2153 // So, if that is the same definition as the load, then this is a
2154 // noop. Otherwise, fail.
2155 if (LoadAccess != Current)
2156 return false;
2157 }
2158 return true;
2159 }
2160 }
2161
2162 return false;
2163 }
2164
2165 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2166 bool Changed = false;
2167 for (auto OI : IOL) {
2168 Instruction *DeadI = OI.first;
2169 MemoryLocation Loc = *getLocForWrite(DeadI);
2170 assert(isRemovable(DeadI) && "Expect only removable instruction");
2171
2172 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2173 int64_t DeadStart = 0;
2174 uint64_t DeadSize = Loc.Size.getValue();
2175 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2176 OverlapIntervalsTy &IntervalMap = OI.second;
2177 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2178 if (IntervalMap.empty())
2179 continue;
2180 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2181 }
2182 return Changed;
2183 }
2184
2185 /// Eliminates writes to locations where the value that is being written
2186 /// is already stored at the same location.
2187 bool eliminateRedundantStoresOfExistingValues() {
2188 bool MadeChange = false;
2189 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2190 "already existing value\n");
2191 for (auto *Def : MemDefs) {
2192 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2193 continue;
2194
2195 Instruction *DefInst = Def->getMemoryInst();
2196 auto MaybeDefLoc = getLocForWrite(DefInst);
2197 if (!MaybeDefLoc || !isRemovable(DefInst))
2198 continue;
2199
2200 MemoryDef *UpperDef;
2201 // To conserve compile-time, we avoid walking to the next clobbering def.
2202 // Instead, we just try to get the optimized access, if it exists. DSE
2203 // will try to optimize defs during the earlier traversal.
2204 if (Def->isOptimized())
2205 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2206 else
2207 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2208 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2209 continue;
2210
2211 Instruction *UpperInst = UpperDef->getMemoryInst();
2212 auto IsRedundantStore = [&]() {
2213 if (DefInst->isIdenticalTo(UpperInst))
2214 return true;
2215 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2216 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2217 // MemSetInst must have a write location.
2218 auto UpperLoc = getLocForWrite(UpperInst);
2219 if (!UpperLoc)
2220 return false;
2221 int64_t InstWriteOffset = 0;
2222 int64_t DepWriteOffset = 0;
2223 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2224 InstWriteOffset, DepWriteOffset);
2225 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2226 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2227 OR == OW_Complete;
2228 }
2229 }
2230 return false;
2231 };
2232
2233 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2234 continue;
2235 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2236 << '\n');
2237 deleteDeadInstruction(DefInst);
2238 NumRedundantStores++;
2239 MadeChange = true;
2240 }
2241 return MadeChange;
2242 }
2243
2244 // Return the locations written by the initializes attribute.
2245 // Note that this function considers:
2246 // 1. Unwind edge: use "initializes" attribute only if the callee has
2247 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
2248 // or the argument is invisible to caller on unwind. That is, we don't
2249 // perform incorrect DSE on unwind edges in the current function.
2250 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
2251 // the intersected range list of their "initializes" attributes.
2252 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
2253
2254 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
2255 // killed by `KillingLocWrapper.MemDef`. Return whether
2256 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
2257 std::pair<bool, bool>
2258 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
2259
2260 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
2261 // change state: whether make any change.
2262 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
2263};
2264
2266DSEState::getInitializesArgMemLoc(const Instruction *I) {
2267 const CallBase *CB = dyn_cast<CallBase>(I);
2268 if (!CB)
2269 return {};
2270
2271 // Collect aliasing arguments and their initializes ranges.
2273 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2274 ConstantRangeList Inits;
2275 Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2276 if (InitializesAttr.isValid())
2277 Inits = InitializesAttr.getValueAsConstantRangeList();
2278
2279 Value *CurArg = CB->getArgOperand(Idx);
2280 // We don't perform incorrect DSE on unwind edges in the current function,
2281 // and use the "initializes" attribute to kill dead stores if:
2282 // - The call does not throw exceptions, "CB->doesNotThrow()".
2283 // - Or the callee parameter has "dead_on_unwind" attribute.
2284 // - Or the argument is invisible to caller on unwind, and there are no
2285 // unwind edges from this call in the current function (e.g. `CallInst`).
2286 bool IsDeadOrInvisibleOnUnwind =
2287 CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2288 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2289 ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2290 bool FoundAliasing = false;
2291 for (auto &[Arg, AliasList] : Arguments) {
2292 auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2294 if (AAR == AliasResult::NoAlias) {
2295 continue;
2296 } else if (AAR == AliasResult::MustAlias) {
2297 FoundAliasing = true;
2298 AliasList.push_back(InitInfo);
2299 } else {
2300 // For PartialAlias and MayAlias, there is an offset or may be an
2301 // unknown offset between the arguments and we insert an empty init
2302 // range to discard the entire initializes info while intersecting.
2303 FoundAliasing = true;
2304 AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2306 }
2307 }
2308 if (!FoundAliasing)
2309 Arguments[CurArg] = {InitInfo};
2310 }
2311
2313 for (const auto &[_, Args] : Arguments) {
2314 auto IntersectedRanges =
2315 getIntersectedInitRangeList(Args, CB->doesNotThrow());
2316 if (IntersectedRanges.empty())
2317 continue;
2318
2319 for (const auto &Arg : Args) {
2320 for (const auto &Range : IntersectedRanges) {
2321 int64_t Start = Range.getLower().getSExtValue();
2322 int64_t End = Range.getUpper().getSExtValue();
2323 // For now, we only handle locations starting at offset 0.
2324 if (Start == 0)
2325 Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2326 LocationSize::precise(End - Start),
2327 CB->getAAMetadata()));
2328 }
2329 }
2330 }
2331 return Locations;
2332}
2333
2334std::pair<bool, bool>
2335DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2336 bool Changed = false;
2337 bool DeletedKillingLoc = false;
2338 unsigned ScanLimit = MemorySSAScanLimit;
2339 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2340 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2341 // Worklist of MemoryAccesses that may be killed by
2342 // "KillingLocWrapper.MemDef".
2344 // Track MemoryAccesses that have been deleted in the loop below, so we can
2345 // skip them. Don't use SkipStores for this, which may contain reused
2346 // MemoryAccess addresses.
2348 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2349 ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2350
2351 // Check if MemoryAccesses in the worklist are killed by
2352 // "KillingLocWrapper.MemDef".
2353 for (unsigned I = 0; I < ToCheck.size(); I++) {
2354 MemoryAccess *Current = ToCheck[I];
2355 if (Deleted.contains(Current))
2356 continue;
2357 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2358 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2359 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2360 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2361 KillingLocWrapper.DefByInitializesAttr);
2362
2363 if (!MaybeDeadAccess) {
2364 LLVM_DEBUG(dbgs() << " finished walk\n");
2365 continue;
2366 }
2367 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2368 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2369 if (isa<MemoryPhi>(DeadAccess)) {
2370 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2371 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2372 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2373 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2374 BasicBlock *PhiBlock = DeadAccess->getBlock();
2375
2376 // We only consider incoming MemoryAccesses that come before the
2377 // MemoryPhi. Otherwise we could discover candidates that do not
2378 // strictly dominate our starting def.
2379 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2380 ToCheck.insert(IncomingAccess);
2381 }
2382 continue;
2383 }
2384 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2385 // It would incorrectly consider a call instruction as redundant store
2386 // and remove this call instruction.
2387 // TODO: this conflates the existence of a MemoryLocation with being able
2388 // to delete the instruction. Fix isRemovable() to consider calls with
2389 // side effects that cannot be removed, e.g. calls with the initializes
2390 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2391 MemoryDefWrapper DeadDefWrapper(
2392 cast<MemoryDef>(DeadAccess),
2393 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2394 /*ConsiderInitializesAttr=*/false));
2395 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2396 MemoryLocationWrapper &DeadLocWrapper =
2397 DeadDefWrapper.DefinedLocations.front();
2398 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2399 ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2400 NumGetDomMemoryDefPassed++;
2401
2402 if (!DebugCounter::shouldExecute(MemorySSACounter))
2403 continue;
2404 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2405 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2406 continue;
2407 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2408 << *DeadLocWrapper.DefInst << "\n KILLER: "
2409 << *KillingLocWrapper.DefInst << '\n');
2410 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2411 ++NumFastStores;
2412 Changed = true;
2413 } else {
2414 // Check if DeadI overwrites KillingI.
2415 int64_t KillingOffset = 0;
2416 int64_t DeadOffset = 0;
2417 OverwriteResult OR =
2418 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2419 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2420 KillingOffset, DeadOffset);
2421 if (OR == OW_MaybePartial) {
2422 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2423 OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2424 KillingOffset, DeadOffset,
2425 DeadLocWrapper.DefInst, IOL);
2426 }
2427 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2428 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2429 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2430 // We are re-using tryToMergePartialOverlappingStores, which requires
2431 // DeadSI to dominate KillingSI.
2432 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2433 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2435 KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2436 &DT)) {
2437
2438 // Update stored value of earlier store to merged constant.
2439 DeadSI->setOperand(0, Merged);
2440 ++NumModifiedStores;
2441 Changed = true;
2442 DeletedKillingLoc = true;
2443
2444 // Remove killing store and remove any outstanding overlap
2445 // intervals for the updated store.
2446 deleteDeadInstruction(KillingSI, &Deleted);
2447 auto I = IOLs.find(DeadSI->getParent());
2448 if (I != IOLs.end())
2449 I->second.erase(DeadSI);
2450 break;
2451 }
2452 }
2453 }
2454 if (OR == OW_Complete) {
2455 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2456 << *DeadLocWrapper.DefInst << "\n KILLER: "
2457 << *KillingLocWrapper.DefInst << '\n');
2458 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2459 ++NumFastStores;
2460 Changed = true;
2461 }
2462 }
2463 }
2464
2465 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2466 "SkipStores and Deleted out of sync?");
2467
2468 return {Changed, DeletedKillingLoc};
2469}
2470
2471bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2472 if (KillingDefWrapper.DefinedLocations.empty()) {
2473 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2474 << *KillingDefWrapper.DefInst << "\n");
2475 return false;
2476 }
2477
2478 bool MadeChange = false;
2479 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2480 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2481 << *KillingLocWrapper.MemDef << " ("
2482 << *KillingLocWrapper.DefInst << ")\n");
2483 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2484 MadeChange |= Changed;
2485
2486 // Check if the store is a no-op.
2487 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2488 KillingLocWrapper.UnderlyingObject)) {
2489 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2490 << *KillingLocWrapper.DefInst << '\n');
2491 deleteDeadInstruction(KillingLocWrapper.DefInst);
2492 NumRedundantStores++;
2493 MadeChange = true;
2494 continue;
2495 }
2496 // Can we form a calloc from a memset/malloc pair?
2497 if (!DeletedKillingLoc &&
2498 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2499 KillingLocWrapper.UnderlyingObject)) {
2500 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2501 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2502 deleteDeadInstruction(KillingLocWrapper.DefInst);
2503 MadeChange = true;
2504 continue;
2505 }
2506 }
2507 return MadeChange;
2508}
2509
2510static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2512 const TargetLibraryInfo &TLI,
2513 const LoopInfo &LI) {
2514 bool MadeChange = false;
2515 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2516 // For each store:
2517 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2518 MemoryDef *KillingDef = State.MemDefs[I];
2519 if (State.SkipStores.count(KillingDef))
2520 continue;
2521
2522 MemoryDefWrapper KillingDefWrapper(
2523 KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2525 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2526 }
2527
2529 for (auto &KV : State.IOLs)
2530 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2531
2532 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2533 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2534
2535 while (!State.ToRemove.empty()) {
2536 Instruction *DeadInst = State.ToRemove.pop_back_val();
2537 DeadInst->eraseFromParent();
2538 }
2539
2540 return MadeChange;
2541}
2542} // end anonymous namespace
2543
2544//===----------------------------------------------------------------------===//
2545// DSE Pass
2546//===----------------------------------------------------------------------===//
2551 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2553 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2554
2555 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2556
2557#ifdef LLVM_ENABLE_STATS
2559 for (auto &I : instructions(F))
2560 NumRemainingStores += isa<StoreInst>(&I);
2561#endif
2562
2563 if (!Changed)
2564 return PreservedAnalyses::all();
2565
2569 PA.preserve<LoopAnalysis>();
2570 return PA;
2571}
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A manager for alias analyses.
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
The possible results of an alias query.
Definition: AliasAnalysis.h:77
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:95
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
Definition: Attributes.cpp:412
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:208
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
bool onlyAccessesInaccessibleMemory() const
Determine if the function may only access memory that is inaccessible from the IR.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
Definition: InstrTypes.h:1629
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:1923
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
Definition: InstrTypes.h:1292
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This class represents a list of constant ranges.
ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition: Constant.h:42
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:980
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
bool isTerminator() const
Definition: Instruction.h:277
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
const_iterator begin() const
Definition: IntervalMap.h:1146
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1101
const_iterator end() const
Definition: IntervalMap.h:1158
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
iterator find(const KeyT &Key)
Definition: MapVector.h:167
Value * getLength() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Definition: MemorySSA.h:161
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:370
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:391
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1045
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1603
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:739
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:259
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:256
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:62
Value * getAddr() const
Definition: PHITransAddr.h:58
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
iterator begin() const
Definition: SmallPtrSet.h:472
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Value * getValueOperand()
Definition: Instructions.h:378
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:288
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:826
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:245
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1866
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:240
bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DbgAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
Definition: DebugInfo.cpp:1961
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1732
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1656
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< po_iterator< T > > post_order(const T &G)
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:406
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254