88using namespace PatternMatch;
90#define DEBUG_TYPE "dse"
92STATISTIC(NumRemainingStores,
"Number of stores remaining after DSE");
93STATISTIC(NumRedundantStores,
"Number of redundant stores deleted");
94STATISTIC(NumFastStores,
"Number of stores deleted");
95STATISTIC(NumFastOther,
"Number of other instrs removed");
96STATISTIC(NumCompletePartials,
"Number of stores dead by later partials");
97STATISTIC(NumModifiedStores,
"Number of stores modified");
98STATISTIC(NumCFGChecks,
"Number of stores modified");
102 "Number of times a valid candidate is returned from getDomMemoryDef");
104 "Number iterations check for reads in getDomMemoryDef");
107 "Controls which MemoryDefs are eliminated.");
112 cl::desc(
"Enable partial-overwrite tracking in DSE"));
117 cl::desc(
"Enable partial store merging in DSE"));
121 cl::desc(
"The number of memory instructions to scan for "
122 "dead store elimination (default = 150)"));
125 cl::desc(
"The maximum number of steps while walking upwards to find "
126 "MemoryDefs that may be killed (default = 90)"));
130 cl::desc(
"The maximum number candidates that only partially overwrite the "
131 "killing MemoryDef to consider"
136 cl::desc(
"The number of MemoryDefs we consider as candidates to eliminated "
137 "other stores per basic block (default = 5000)"));
142 "The cost of a step in the same basic block as the killing MemoryDef"
148 cl::desc(
"The cost of a step in a different basic "
149 "block than the killing MemoryDef"
154 cl::desc(
"The maximum number of blocks to check when trying to prove that "
155 "all paths to an exit go through a killing block (default = 50)"));
165 cl::desc(
"Allow DSE to optimize memory accesses."));
170 cl::desc(
"Enable the initializes attr improvement in DSE"));
182 if (isa<StoreInst>(
I))
186 switch (
II->getIntrinsicID()) {
187 default:
return false;
188 case Intrinsic::memset:
189 case Intrinsic::memcpy:
190 case Intrinsic::memcpy_element_unordered_atomic:
191 case Intrinsic::memset_element_unordered_atomic:
208 return isa<AnyMemSetInst>(
I);
226enum OverwriteResult {
230 OW_PartialEarlierWithFullLater,
244 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
245 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
246 if (KillingII ==
nullptr || DeadII ==
nullptr)
248 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
250 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
253 cast<VectorType>(KillingII->getArgOperand(0)->getType());
254 VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
255 if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
258 if (KillingTy->getElementCount() != DeadTy->getElementCount())
263 if (KillingPtr != DeadPtr && !AA.
isMustAlias(KillingPtr, DeadPtr))
267 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
286 int64_t KillingOff, int64_t DeadOff,
297 KillingOff < int64_t(DeadOff + DeadSize) &&
298 int64_t(KillingOff + KillingSize) >= DeadOff) {
301 auto &IM = IOL[DeadI];
302 LLVM_DEBUG(
dbgs() <<
"DSE: Partial overwrite: DeadLoc [" << DeadOff <<
", "
303 << int64_t(DeadOff + DeadSize) <<
") KillingLoc ["
304 << KillingOff <<
", " << int64_t(KillingOff + KillingSize)
311 int64_t KillingIntStart = KillingOff;
312 int64_t KillingIntEnd = KillingOff + KillingSize;
316 auto ILI = IM.lower_bound(KillingIntStart);
317 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
321 KillingIntStart = std::min(KillingIntStart, ILI->second);
322 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
331 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
332 assert(ILI->second > KillingIntStart &&
"Unexpected interval");
333 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
338 IM[KillingIntEnd] = KillingIntStart;
341 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
342 LLVM_DEBUG(
dbgs() <<
"DSE: Full overwrite from partials: DeadLoc ["
343 << DeadOff <<
", " << int64_t(DeadOff + DeadSize)
344 <<
") Composite KillingLoc [" << ILI->second <<
", "
345 << ILI->first <<
")\n");
346 ++NumCompletePartials;
354 int64_t(DeadOff + DeadSize) > KillingOff &&
355 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
356 LLVM_DEBUG(
dbgs() <<
"DSE: Partial overwrite a dead load [" << DeadOff
357 <<
", " << int64_t(DeadOff + DeadSize)
358 <<
") by a killing store [" << KillingOff <<
", "
359 << int64_t(KillingOff + KillingSize) <<
")\n");
361 return OW_PartialEarlierWithFullLater;
374 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
375 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
388 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
389 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
390 "Expect to be handled as OW_Complete");
410 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
422 if (
auto *MemSet = dyn_cast<MemSetInst>(SecondI))
427 auto *MemLocPtr =
const_cast<Value *
>(MemLoc.
Ptr);
432 bool isFirstBlock =
true;
435 while (!WorkList.
empty()) {
447 assert(
B == SecondBB &&
"first block is not the store block");
449 isFirstBlock =
false;
455 for (; BI != EI; ++BI) {
457 if (
I->mayWriteToMemory() &&
I != SecondI)
463 "Should not hit the entry block because SI must be dominated by LI");
473 auto Inserted = Visited.
insert(std::make_pair(Pred, TranslatedPtr));
474 if (!Inserted.second) {
477 if (TranslatedPtr != Inserted.first->second)
482 WorkList.
push_back(std::make_pair(Pred, PredAddr));
491 uint64_t NewSizeInBits,
bool IsOverwriteEnd) {
493 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
495 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
496 auto SetDeadFragExpr = [](
auto *Assign,
500 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
501 Assign->getExpression()
506 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
507 Assign->setExpression(*
NewExpr);
513 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
514 DeadFragment.SizeInBits);
515 Assign->setExpression(Expr);
516 Assign->setKillLocation();
523 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
526 return LinkToNothing;
538 auto InsertAssignForOverlap = [&](
auto *Assign) {
539 std::optional<DIExpression::FragmentInfo> NewFragment;
541 DeadSliceSizeInBits, Assign,
546 Assign->setKillAddress();
547 Assign->setAssignId(GetDeadLink());
551 if (NewFragment->SizeInBits == 0)
555 auto *NewAssign =
static_cast<decltype(Assign)
>(Assign->clone());
556 NewAssign->insertAfter(Assign);
557 NewAssign->setAssignId(GetDeadLink());
559 SetDeadFragExpr(NewAssign, *NewFragment);
560 NewAssign->setKillAddress();
562 for_each(Linked, InsertAssignForOverlap);
563 for_each(LinkedDVRAssigns, InsertAssignForOverlap);
567 uint64_t &DeadSize, int64_t KillingStart,
568 uint64_t KillingSize,
bool IsOverwriteEnd) {
569 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
570 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
586 int64_t ToRemoveStart = 0;
590 if (IsOverwriteEnd) {
595 ToRemoveStart = KillingStart + Off;
596 if (DeadSize <=
uint64_t(ToRemoveStart - DeadStart))
598 ToRemoveSize = DeadSize -
uint64_t(ToRemoveStart - DeadStart);
600 ToRemoveStart = DeadStart;
602 "Not overlapping accesses?");
603 ToRemoveSize = KillingSize -
uint64_t(DeadStart - KillingStart);
608 if (ToRemoveSize <= (PrefAlign.
value() - Off))
610 ToRemoveSize -= PrefAlign.
value() - Off;
613 "Should preserve selected alignment");
616 assert(ToRemoveSize > 0 &&
"Shouldn't reach here if nothing to remove");
617 assert(DeadSize > ToRemoveSize &&
"Can't remove more than original size");
619 uint64_t NewSize = DeadSize - ToRemoveSize;
620 if (
auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
623 const uint32_t ElementSize = AMI->getElementSizeInBytes();
624 if (0 != NewSize % ElementSize)
629 << (IsOverwriteEnd ?
"END" :
"BEGIN") <<
": " << *DeadI
630 <<
"\n KILLER [" << ToRemoveStart <<
", "
631 << int64_t(ToRemoveStart + ToRemoveSize) <<
")\n");
633 Value *DeadWriteLength = DeadIntrinsic->getLength();
634 Value *TrimmedLength = ConstantInt::get(DeadWriteLength->
getType(), NewSize);
635 DeadIntrinsic->setLength(TrimmedLength);
636 DeadIntrinsic->setDestAlignment(PrefAlign);
638 Value *OrigDest = DeadIntrinsic->getRawDest();
639 if (!IsOverwriteEnd) {
640 Value *Indices[1] = {
641 ConstantInt::get(DeadWriteLength->
getType(), ToRemoveSize)};
645 NewDestGEP->
setDebugLoc(DeadIntrinsic->getDebugLoc());
646 DeadIntrinsic->setDest(NewDestGEP);
655 DeadStart += ToRemoveSize;
662 int64_t &DeadStart,
uint64_t &DeadSize) {
667 int64_t KillingStart = OII->second;
668 uint64_t KillingSize = OII->first - KillingStart;
670 assert(OII->first - KillingStart >= 0 &&
"Size expected to be positive");
672 if (KillingStart > DeadStart &&
675 (
uint64_t)(KillingStart - DeadStart) < DeadSize &&
678 KillingSize >= DeadSize - (
uint64_t)(KillingStart - DeadStart)) {
679 if (
tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
690 int64_t &DeadStart,
uint64_t &DeadSize) {
695 int64_t KillingStart = OII->second;
696 uint64_t KillingSize = OII->first - KillingStart;
698 assert(OII->first - KillingStart >= 0 &&
"Size expected to be positive");
700 if (KillingStart <= DeadStart &&
703 KillingSize > (
uint64_t)(DeadStart - KillingStart)) {
706 assert(KillingSize - (
uint64_t)(DeadStart - KillingStart) < DeadSize &&
707 "Should have been handled as OW_Complete");
708 if (
tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
719 int64_t KillingOffset, int64_t DeadOffset,
746 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
747 unsigned LShiftAmount =
748 DL.isBigEndian() ? DeadValue.
getBitWidth() - BitOffsetDiff - KillingBits
751 LShiftAmount + KillingBits);
754 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
756 <<
"\n Killing: " << *KillingI
757 <<
"\n Merged Value: " << Merged <<
'\n');
767 switch (
II->getIntrinsicID()) {
768 case Intrinsic::lifetime_start:
769 case Intrinsic::lifetime_end:
770 case Intrinsic::invariant_end:
771 case Intrinsic::launder_invariant_group:
772 case Intrinsic::assume:
774 case Intrinsic::dbg_declare:
775 case Intrinsic::dbg_label:
776 case Intrinsic::dbg_value:
786bool canSkipDef(
MemoryDef *
D,
bool DefVisibleToCaller) {
790 if (
auto *CB = dyn_cast<CallBase>(DI))
791 if (CB->onlyAccessesInaccessibleMemory())
796 if (DI->
mayThrow() && !DefVisibleToCaller)
804 if (isa<FenceInst>(DI))
808 if (isNoopIntrinsic(DI))
816struct MemoryLocationWrapper {
818 bool DefByInitializesAttr)
819 : MemLoc(MemLoc), MemDef(MemDef),
820 DefByInitializesAttr(DefByInitializesAttr) {
821 assert(MemLoc.
Ptr &&
"MemLoc should be not null");
830 bool DefByInitializesAttr =
false;
835struct MemoryDefWrapper {
837 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
839 for (
auto &[MemLoc, DefByInitializesAttr] : MemLocations)
840 DefinedLocations.push_back(
841 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
852struct ArgumentInitInfo {
854 bool IsDeadOrInvisibleOnUnwind;
863 bool CallHasNoUnwindAttr) {
869 for (
const auto &Arg : Args) {
870 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
872 if (Arg.Inits.empty())
877 for (
auto &Arg :
Args.drop_front())
878 IntersectedIntervals = IntersectedIntervals.
intersectWith(Arg.Inits);
880 return IntersectedIntervals;
906 bool ContainsIrreducibleLoops;
929 bool AnyUnreachableExit;
934 bool ShouldIterateEndOfFunctionDSE;
940 DSEState(
const DSEState &) =
delete;
941 DSEState &operator=(
const DSEState &) =
delete;
946 :
F(
F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
947 PDT(PDT), TLI(TLI),
DL(
F.getDataLayout()), LI(LI) {
952 PostOrderNumbers[BB] = PO++;
955 if (
I.mayThrow() && !MA)
956 ThrowingBlocks.
insert(
I.getParent());
958 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
960 (getLocForWrite(&
I) || isMemTerminatorInst(&
I) ||
969 if (AI.hasPassPointeeByValueCopyAttr())
970 InvisibleToCallerAfterRet.
insert({&AI,
true});
976 return isa<UnreachableInst>(E->getTerminator());
984 auto *MA = cast<MemoryAccess>(
U.getUser());
985 if (Visited.
insert(MA).second)
992 if (
auto *CB = dyn_cast<CallBase>(
I)) {
995 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1004 if (
const auto *Len = dyn_cast<ConstantInt>(CB->
getArgOperand(2)))
1019 OverwriteResult isOverwrite(
const Instruction *KillingI,
1023 int64_t &KillingOff, int64_t &DeadOff) {
1027 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1031 strengthenLocationSize(KillingI, KillingLoc.
Size);
1039 if (DeadUndObj == KillingUndObj && KillingLocSize.
isPrecise() &&
1041 std::optional<TypeSize> KillingUndObjSize =
1043 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.
getValue())
1052 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1053 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1054 if (KillingMemI && DeadMemI) {
1055 const Value *KillingV = KillingMemI->getLength();
1056 const Value *DeadV = DeadMemI->getLength();
1057 if (KillingV == DeadV && BatchAA.
isMustAlias(DeadLoc, KillingLoc))
1070 const bool AnyScalable =
1082 if (KillingSize >= DeadSize)
1089 if (Off >= 0 && (
uint64_t)Off + DeadSize <= KillingSize)
1095 if (DeadUndObj != KillingUndObj) {
1111 const Value *DeadBasePtr =
1113 const Value *KillingBasePtr =
1118 if (DeadBasePtr != KillingBasePtr)
1136 if (DeadOff >= KillingOff) {
1139 if (
uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1143 else if ((
uint64_t)(DeadOff - KillingOff) < KillingSize)
1144 return OW_MaybePartial;
1148 else if ((
uint64_t)(KillingOff - DeadOff) < DeadSize) {
1149 return OW_MaybePartial;
1156 bool isInvisibleToCallerAfterRet(
const Value *V) {
1157 if (isa<AllocaInst>(V))
1159 auto I = InvisibleToCallerAfterRet.
insert({
V,
false});
1161 if (!isInvisibleToCallerOnUnwind(V)) {
1162 I.first->second =
false;
1167 return I.first->second;
1170 bool isInvisibleToCallerOnUnwind(
const Value *V) {
1171 bool RequiresNoCaptureBeforeUnwind;
1174 if (!RequiresNoCaptureBeforeUnwind)
1177 auto I = CapturedBeforeReturn.
insert({
V,
true});
1184 return !
I.first->second;
1187 std::optional<MemoryLocation> getLocForWrite(
Instruction *
I)
const {
1188 if (!
I->mayWriteToMemory())
1189 return std::nullopt;
1191 if (
auto *CB = dyn_cast<CallBase>(
I))
1200 getLocForInst(
Instruction *
I,
bool ConsiderInitializesAttr) {
1202 if (isMemTerminatorInst(
I)) {
1203 if (
auto Loc = getLocForTerminator(
I))
1204 Locations.push_back(std::make_pair(Loc->first,
false));
1208 if (
auto Loc = getLocForWrite(
I))
1209 Locations.push_back(std::make_pair(*Loc,
false));
1211 if (ConsiderInitializesAttr) {
1212 for (
auto &MemLoc : getInitializesArgMemLoc(
I)) {
1213 Locations.push_back(std::make_pair(MemLoc,
true));
1222 assert(getLocForWrite(
I) &&
"Must have analyzable write");
1226 return SI->isUnordered();
1228 if (
auto *CB = dyn_cast<CallBase>(
I)) {
1230 if (
auto *
MI = dyn_cast<MemIntrinsic>(CB))
1231 return !
MI->isVolatile();
1255 if (
auto *CB = dyn_cast<CallBase>(UseInst))
1259 int64_t InstWriteOffset, DepWriteOffset;
1260 if (
auto CC = getLocForWrite(UseInst))
1261 return isOverwrite(UseInst, DefInst, *
CC, DefLoc, InstWriteOffset,
1262 DepWriteOffset) == OW_Complete;
1269 << *
Def->getMemoryInst()
1270 <<
") is at the end the function \n");
1274 pushMemUses(Def, WorkList, Visited);
1275 for (
unsigned I = 0;
I < WorkList.
size();
I++) {
1282 if (isa<MemoryPhi>(UseAccess)) {
1286 if (!isGuaranteedLoopInvariant(DefLoc.
Ptr))
1289 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1294 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1295 if (isReadClobber(DefLoc, UseInst)) {
1296 LLVM_DEBUG(
dbgs() <<
" ... hit read clobber " << *UseInst <<
".\n");
1300 if (
MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1301 pushMemUses(UseDef, WorkList, Visited);
1309 std::optional<std::pair<MemoryLocation, bool>>
1317 if (
auto *CB = dyn_cast<CallBase>(
I)) {
1322 return std::nullopt;
1328 auto *CB = dyn_cast<CallBase>(
I);
1337 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1338 getLocForTerminator(MaybeTerm);
1349 auto TermLoc = MaybeTermLoc->first;
1350 if (MaybeTermLoc->second) {
1354 int64_t InstWriteOffset = 0;
1355 int64_t DepWriteOffset = 0;
1356 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1357 DepWriteOffset) == OW_Complete;
1362 if (isNoopIntrinsic(UseInst))
1367 if (
auto SI = dyn_cast<StoreInst>(UseInst))
1373 if (
auto *CB = dyn_cast<CallBase>(UseInst))
1385 bool isGuaranteedLoopIndependent(
const Instruction *Current,
1395 if (!ContainsIrreducibleLoops && CurrentLI &&
1399 return isGuaranteedLoopInvariant(CurrentLoc.
Ptr);
1405 bool isGuaranteedLoopInvariant(
const Value *
Ptr) {
1406 Ptr =
Ptr->stripPointerCasts();
1407 if (
auto *
GEP = dyn_cast<GEPOperator>(
Ptr))
1408 if (
GEP->hasAllConstantIndices())
1409 Ptr =
GEP->getPointerOperand()->stripPointerCasts();
1411 if (
auto *
I = dyn_cast<Instruction>(
Ptr)) {
1412 return I->getParent()->isEntryBlock() ||
1413 (!ContainsIrreducibleLoops && !LI.
getLoopFor(
I->getParent()));
1424 std::optional<MemoryAccess *>
1427 unsigned &ScanLimit,
unsigned &WalkerStepLimit,
1428 bool IsMemTerm,
unsigned &PartialLimit,
1429 bool IsInitializesAttrMemLoc) {
1430 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1432 return std::nullopt;
1449 std::optional<MemoryLocation> CurrentLoc;
1450 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1452 dbgs() <<
" visiting " << *Current;
1454 dbgs() <<
" (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1465 return std::nullopt;
1473 if (WalkerStepLimit <= StepCost) {
1475 return std::nullopt;
1477 WalkerStepLimit -= StepCost;
1481 if (isa<MemoryPhi>(Current)) {
1488 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1491 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1492 CanOptimize =
false;
1498 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1500 return std::nullopt;
1505 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1507 return std::nullopt;
1514 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1515 return std::nullopt;
1518 if (
any_of(Current->
uses(), [
this, &KillingLoc, StartAccess](
Use &U) {
1519 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1520 return !MSSA.dominates(StartAccess, UseOrDef) &&
1521 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1525 return std::nullopt;
1530 CurrentLoc = getLocForWrite(CurrentI);
1531 if (!CurrentLoc || !isRemovable(CurrentI)) {
1532 CanOptimize =
false;
1539 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1541 CanOptimize =
false;
1549 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1550 CanOptimize =
false;
1554 int64_t KillingOffset = 0;
1555 int64_t DeadOffset = 0;
1556 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1557 KillingOffset, DeadOffset);
1563 (OR == OW_Complete || OR == OW_MaybePartial))
1569 CanOptimize =
false;
1574 if (OR == OW_Unknown || OR == OW_None)
1576 else if (OR == OW_MaybePartial) {
1581 if (PartialLimit <= 1) {
1582 WalkerStepLimit -= 1;
1583 LLVM_DEBUG(
dbgs() <<
" ... reached partial limit ... continue with next access\n");
1600 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1601 LLVM_DEBUG(
dbgs() <<
" Checking for reads of " << *MaybeDeadAccess <<
" ("
1602 << *MaybeDeadI <<
")\n");
1606 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1609 for (
unsigned I = 0;
I < WorkList.
size();
I++) {
1614 if (ScanLimit < (WorkList.
size() -
I)) {
1616 return std::nullopt;
1619 NumDomMemDefChecks++;
1621 if (isa<MemoryPhi>(UseAccess)) {
1626 LLVM_DEBUG(
dbgs() <<
" ... skipping, dominated by killing block\n");
1630 pushMemUses(UseAccess, WorkList, Visited);
1634 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1640 LLVM_DEBUG(
dbgs() <<
" ... skipping, dominated by killing def\n");
1646 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1649 <<
" ... skipping, memterminator invalidates following accesses\n");
1653 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1655 pushMemUses(UseAccess, WorkList, Visited);
1659 if (UseInst->
mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1661 return std::nullopt;
1668 bool IsKillingDefFromInitAttr =
false;
1669 if (IsInitializesAttrMemLoc) {
1670 if (KillingI == UseInst &&
1672 IsKillingDefFromInitAttr =
true;
1675 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1677 return std::nullopt;
1683 if (MaybeDeadAccess == UseAccess &&
1684 !isGuaranteedLoopInvariant(MaybeDeadLoc.
Ptr)) {
1685 LLVM_DEBUG(
dbgs() <<
" ... found not loop invariant self access\n");
1686 return std::nullopt;
1692 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1707 if (
MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1708 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1710 if (PostOrderNumbers.
find(MaybeKillingBlock)->second <
1711 PostOrderNumbers.
find(MaybeDeadAccess->
getBlock())->second) {
1712 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1714 <<
" ... found killing def " << *UseInst <<
"\n");
1715 KillingDefs.
insert(UseInst);
1719 <<
" ... found preceeding def " << *UseInst <<
"\n");
1720 return std::nullopt;
1723 pushMemUses(UseDef, WorkList, Visited);
1730 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1733 KillingBlocks.
insert(KD->getParent());
1735 "Expected at least a single killing block");
1749 if (!AnyUnreachableExit)
1750 return std::nullopt;
1754 CommonPred =
nullptr;
1758 if (KillingBlocks.
count(CommonPred))
1759 return {MaybeDeadAccess};
1765 WorkList.
insert(CommonPred);
1768 if (!isa<UnreachableInst>(
R->getTerminator()))
1775 for (
unsigned I = 0;
I < WorkList.
size();
I++) {
1778 if (KillingBlocks.
count(Current))
1780 if (Current == MaybeDeadAccess->
getBlock())
1781 return std::nullopt;
1792 return std::nullopt;
1799 return {MaybeDeadAccess};
1812 while (!NowDeadInsts.
empty()) {
1822 bool IsMemDef = MA && isa<MemoryDef>(MA);
1825 auto *MD = cast<MemoryDef>(MA);
1829 if (
auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1830 if (
SI->getValueOperand()->getType()->isPointerTy()) {
1832 if (CapturedBeforeReturn.
erase(UO))
1833 ShouldIterateEndOfFunctionDSE =
true;
1834 InvisibleToCallerAfterRet.
erase(UO);
1839 Updater.removeMemoryAccess(MA);
1843 if (
I != IOLs.
end())
1844 I->second.erase(DeadInst);
1847 if (
Instruction *OpI = dyn_cast<Instruction>(O)) {
1871 const Value *KillingUndObj) {
1875 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1880 return !ThrowingBlocks.
empty();
1891 if (DeadI->
mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1897 if (
auto *LI = dyn_cast<LoadInst>(DeadI))
1899 if (
auto *SI = dyn_cast<StoreInst>(DeadI))
1901 if (
auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1903 if (
auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1913 bool eliminateDeadWritesAtEndOfFunction() {
1914 bool MadeChange =
false;
1917 <<
"Trying to eliminate MemoryDefs at the end of the function\n");
1919 ShouldIterateEndOfFunctionDSE =
false;
1925 auto DefLoc = getLocForWrite(DefI);
1926 if (!DefLoc || !isRemovable(DefI)) {
1928 "instruction not removable.\n");
1938 if (!isInvisibleToCallerAfterRet(UO))
1941 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
1943 LLVM_DEBUG(
dbgs() <<
" ... MemoryDef is not accessed until the end "
1944 "of the function\n");
1950 }
while (ShouldIterateEndOfFunctionDSE);
1958 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1963 if (!StoredConstant || !StoredConstant->
isNullValue())
1966 if (!isRemovable(DefI))
1970 if (
F.hasFnAttribute(Attribute::SanitizeMemory) ||
1971 F.hasFnAttribute(Attribute::SanitizeAddress) ||
1972 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1973 F.getName() ==
"calloc")
1975 auto *
Malloc =
const_cast<CallInst *
>(dyn_cast<CallInst>(DefUO));
1978 auto *InnerCallee =
Malloc->getCalledFunction();
1982 if (!TLI.
getLibFunc(*InnerCallee, Func) || !TLI.
has(Func) ||
1983 Func != LibFunc_malloc)
1993 auto *MallocBB =
Malloc->getParent(),
1994 *MemsetBB = Memset->getParent();
1995 if (MallocBB == MemsetBB)
1997 auto *
Ptr = Memset->getArgOperand(0);
1998 auto *TI = MallocBB->getTerminator();
2004 if (MemsetBB != FalseBB)
2011 if (!shouldCreateCalloc(
Malloc, MemSet) ||
2016 Type *SizeTTy =
Malloc->getArgOperand(0)->getType();
2019 TLI,
Malloc->getType()->getPointerAddressSpace());
2025 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc),
nullptr,
2027 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2028 Updater.insertDef(NewAccessMD,
true);
2029 Malloc->replaceAllUsesWith(Calloc);
2036 bool dominatingConditionImpliesValue(
MemoryDef *Def) {
2037 auto *StoreI = cast<StoreInst>(
Def->getMemoryInst());
2039 Value *StorePtr = StoreI->getPointerOperand();
2040 Value *StoreVal = StoreI->getValueOperand();
2047 if (!BI || !BI->isConditional())
2053 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2058 if (!
match(BI->getCondition(),
2068 if (Pred == ICmpInst::ICMP_EQ &&
2073 if (Pred == ICmpInst::ICMP_NE &&
2082 return MSSA.
dominates(ClobAcc, LoadAcc);
2090 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2091 Constant *StoredConstant =
nullptr;
2093 StoredConstant = dyn_cast<Constant>(
Store->getOperand(0));
2095 StoredConstant = dyn_cast<Constant>(MemSet->
getValue());
2099 if (!isRemovable(DefI))
2102 if (StoredConstant) {
2107 if (InitC && InitC == StoredConstant)
2115 if (dominatingConditionImpliesValue(Def))
2118 if (
auto *LoadI = dyn_cast<LoadInst>(
Store->getOperand(0))) {
2119 if (LoadI->getPointerOperand() ==
Store->getOperand(1)) {
2123 if (LoadAccess ==
Def->getDefiningAccess())
2138 for (
unsigned I = 1;
I < ToCheck.
size(); ++
I) {
2139 Current = ToCheck[
I];
2140 if (
auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2142 for (
auto &
Use : PhiAccess->incoming_values())
2143 ToCheck.
insert(cast<MemoryAccess>(&
Use));
2149 assert(isa<MemoryDef>(Current) &&
2150 "Only MemoryDefs should reach here.");
2155 if (LoadAccess != Current)
2166 bool Changed =
false;
2167 for (
auto OI : IOL) {
2170 assert(isRemovable(DeadI) &&
"Expect only removable instruction");
2173 int64_t DeadStart = 0;
2187 bool eliminateRedundantStoresOfExistingValues() {
2188 bool MadeChange =
false;
2189 LLVM_DEBUG(
dbgs() <<
"Trying to eliminate MemoryDefs that write the "
2190 "already existing value\n");
2191 for (
auto *Def : MemDefs) {
2196 auto MaybeDefLoc = getLocForWrite(DefInst);
2197 if (!MaybeDefLoc || !isRemovable(DefInst))
2204 if (
Def->isOptimized())
2205 UpperDef = dyn_cast<MemoryDef>(
Def->getOptimized());
2207 UpperDef = dyn_cast<MemoryDef>(
Def->getDefiningAccess());
2212 auto IsRedundantStore = [&]() {
2215 if (
auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2216 if (
auto *SI = dyn_cast<StoreInst>(DefInst)) {
2218 auto UpperLoc = getLocForWrite(UpperInst);
2221 int64_t InstWriteOffset = 0;
2222 int64_t DepWriteOffset = 0;
2223 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2224 InstWriteOffset, DepWriteOffset);
2226 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2233 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2235 LLVM_DEBUG(
dbgs() <<
"DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2238 NumRedundantStores++;
2257 std::pair<bool, bool>
2258 eliminateDeadDefs(
const MemoryLocationWrapper &KillingLocWrapper);
2262 bool eliminateDeadDefs(
const MemoryDefWrapper &KillingDefWrapper);
2266DSEState::getInitializesArgMemLoc(
const Instruction *
I) {
2267 const CallBase *CB = dyn_cast<CallBase>(
I);
2276 if (InitializesAttr.
isValid())
2286 bool IsDeadOrInvisibleOnUnwind =
2288 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2289 ArgumentInitInfo InitInfo{
Idx, IsDeadOrInvisibleOnUnwind, Inits};
2290 bool FoundAliasing =
false;
2291 for (
auto &[Arg, AliasList] :
Arguments) {
2297 FoundAliasing =
true;
2298 AliasList.push_back(InitInfo);
2303 FoundAliasing =
true;
2304 AliasList.push_back(ArgumentInitInfo{
Idx, IsDeadOrInvisibleOnUnwind,
2314 auto IntersectedRanges =
2316 if (IntersectedRanges.empty())
2319 for (
const auto &Arg : Args) {
2320 for (
const auto &
Range : IntersectedRanges) {
2334std::pair<bool, bool>
2335DSEState::eliminateDeadDefs(
const MemoryLocationWrapper &KillingLocWrapper) {
2336 bool Changed =
false;
2337 bool DeletedKillingLoc =
false;
2348 [[maybe_unused]]
unsigned OrigNumSkipStores = SkipStores.
size();
2349 ToCheck.
insert(KillingLocWrapper.MemDef->getDefiningAccess());
2353 for (
unsigned I = 0;
I < ToCheck.
size();
I++) {
2355 if (
Deleted.contains(Current))
2357 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2358 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2359 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2360 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2361 KillingLocWrapper.DefByInitializesAttr);
2363 if (!MaybeDeadAccess) {
2368 LLVM_DEBUG(
dbgs() <<
" Checking if we can kill " << *DeadAccess);
2369 if (isa<MemoryPhi>(DeadAccess)) {
2370 LLVM_DEBUG(
dbgs() <<
"\n ... adding incoming values to worklist\n");
2371 for (
Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2379 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2380 ToCheck.
insert(IncomingAccess);
2391 MemoryDefWrapper DeadDefWrapper(
2392 cast<MemoryDef>(DeadAccess),
2393 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2395 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2396 MemoryLocationWrapper &DeadLocWrapper =
2397 DeadDefWrapper.DefinedLocations.front();
2399 ToCheck.
insert(DeadLocWrapper.MemDef->getDefiningAccess());
2400 NumGetDomMemoryDefPassed++;
2404 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2405 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2408 << *DeadLocWrapper.DefInst <<
"\n KILLER: "
2409 << *KillingLocWrapper.DefInst <<
'\n');
2415 int64_t KillingOffset = 0;
2416 int64_t DeadOffset = 0;
2417 OverwriteResult
OR =
2418 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2419 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2420 KillingOffset, DeadOffset);
2421 if (OR == OW_MaybePartial) {
2422 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2424 KillingOffset, DeadOffset,
2425 DeadLocWrapper.DefInst, IOL);
2428 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2429 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2433 if (DeadSI && KillingSI && DT.
dominates(DeadSI, KillingSI)) {
2435 KillingSI, DeadSI, KillingOffset, DeadOffset,
DL, BatchAA,
2439 DeadSI->setOperand(0, Merged);
2440 ++NumModifiedStores;
2442 DeletedKillingLoc =
true;
2447 auto I = IOLs.
find(DeadSI->getParent());
2448 if (
I != IOLs.
end())
2449 I->second.erase(DeadSI);
2454 if (OR == OW_Complete) {
2456 << *DeadLocWrapper.DefInst <<
"\n KILLER: "
2457 << *KillingLocWrapper.DefInst <<
'\n');
2466 "SkipStores and Deleted out of sync?");
2468 return {Changed, DeletedKillingLoc};
2471bool DSEState::eliminateDeadDefs(
const MemoryDefWrapper &KillingDefWrapper) {
2472 if (KillingDefWrapper.DefinedLocations.empty()) {
2473 LLVM_DEBUG(
dbgs() <<
"Failed to find analyzable write location for "
2474 << *KillingDefWrapper.DefInst <<
"\n");
2478 bool MadeChange =
false;
2479 for (
auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2481 << *KillingLocWrapper.MemDef <<
" ("
2482 << *KillingLocWrapper.DefInst <<
")\n");
2483 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2484 MadeChange |= Changed;
2487 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2488 KillingLocWrapper.UnderlyingObject)) {
2490 << *KillingLocWrapper.DefInst <<
'\n');
2492 NumRedundantStores++;
2497 if (!DeletedKillingLoc &&
2498 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2499 KillingLocWrapper.UnderlyingObject)) {
2500 LLVM_DEBUG(
dbgs() <<
"DSE: Remove memset after forming calloc:\n"
2501 <<
" DEAD: " << *KillingLocWrapper.DefInst <<
'\n');
2514 bool MadeChange =
false;
2515 DSEState State(
F, AA, MSSA, DT, PDT, TLI, LI);
2517 for (
unsigned I = 0;
I < State.MemDefs.size();
I++) {
2519 if (State.SkipStores.count(KillingDef))
2522 MemoryDefWrapper KillingDefWrapper(
2523 KillingDef, State.getLocForInst(KillingDef->
getMemoryInst(),
2525 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2529 for (
auto &KV : State.IOLs)
2530 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2532 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2533 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2535 while (!State.ToRemove.empty()) {
2536 Instruction *DeadInst = State.ToRemove.pop_back_val();
2555 bool Changed = eliminateDeadStores(
F, AA, MSSA, DT, PDT, TLI, LI);
2557#ifdef LLVM_ENABLE_STATS
2560 NumRemainingStores += isa<StoreInst>(&
I);
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
unsigned getBitWidth() const
Return the number of bits in the APInt.
int64_t getSExtValue() const
Get sign extended value.
The possible results of an alias query.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool onlyAccessesInaccessibleMemory() const
Determine if the function may only access memory that is inaccessible from the IR.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This class represents a list of constant ranges.
ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static DIAssignID * getDistinct(LLVMContext &Context)
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
static bool shouldExecute(unsigned CounterName)
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase * getIDom() const
Analysis pass which computes a DominatorTree.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
const BasicBlock & getEntryBlock() const
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isTerminator() const
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
const_iterator begin() const
bool empty() const
empty - Return true when no intervals are mapped.
const_iterator end() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
Value * getLength() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
void setOptimized(MemoryAccess *MA)
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
An analysis that produces MemorySSA for a function.
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Encapsulates MemorySSA, including all data associated with memory accesses.
MemorySSAWalker * getSkipSelfWalker()
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
PHITransAddr - An address value which tracks and handles phi translation.
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DbgAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< po_iterator< T > > post_order(const T &G)
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
bool isModSet(const ModRefInfo MRI)
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool AreStatisticsEnabled()
Check if statistics are enabled.
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
A MapVector that performs no allocations if smaller than a certain size.