63#define DEBUG_TYPE "early-cse"
65STATISTIC(NumSimplify,
"Number of instructions simplified or DCE'd");
67STATISTIC(NumCSECVP,
"Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad,
"Number of load instructions CSE'd");
69STATISTIC(NumCSECall,
"Number of call instructions CSE'd");
70STATISTIC(NumCSEGEP,
"Number of GEP instructions CSE'd");
71STATISTIC(NumDSE,
"Number of trivial dead stores removed");
74 "Controls which instructions are removed");
78 cl::desc(
"Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
83 cl::desc(
"Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
109 if (
CallInst *CI = dyn_cast<CallInst>(Inst)) {
110 if (
Function *
F = CI->getCalledFunction()) {
112 case Intrinsic::experimental_constrained_fadd:
113 case Intrinsic::experimental_constrained_fsub:
114 case Intrinsic::experimental_constrained_fmul:
115 case Intrinsic::experimental_constrained_fdiv:
116 case Intrinsic::experimental_constrained_frem:
117 case Intrinsic::experimental_constrained_fptosi:
118 case Intrinsic::experimental_constrained_sitofp:
119 case Intrinsic::experimental_constrained_fptoui:
120 case Intrinsic::experimental_constrained_uitofp:
121 case Intrinsic::experimental_constrained_fcmp:
122 case Intrinsic::experimental_constrained_fcmps: {
123 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
124 if (CFP->getExceptionBehavior() &&
129 if (CFP->getRoundingMode() &&
130 CFP->getRoundingMode() == RoundingMode::Dynamic)
136 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
144 !CI->getFunction()->isPresplitCoroutine();
146 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
147 isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
148 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
149 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
150 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
151 isa<FreezeInst>(Inst);
169 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
203 Pred = ICmpInst::getSwappedPredicate(Pred);
241 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
247 if (
CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
256 if (std::tie(
LHS, Pred) > std::tie(
RHS, SwappedPred)) {
297 if (
CastInst *CI = dyn_cast<CastInst>(Inst))
298 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
300 if (
FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
301 return hash_combine(FI->getOpcode(), FI->getOperand(0));
304 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
308 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
312 assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
313 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
314 isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
315 "Invalid/unknown instruction");
318 auto *
II = dyn_cast<IntrinsicInst>(Inst);
319 if (
II &&
II->isCommutative() &&
II->arg_size() >= 2) {
332 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
333 GCR->getBasePtr(), GCR->getDerivedPtr());
337 if (
CallInst *CI = dyn_cast<CallInst>(Inst))
361 if (
LHS.isSentinel() ||
RHS.isSentinel())
364 if (LHSI->
getOpcode() != RHSI->getOpcode())
370 if (
CallInst *CI = dyn_cast<CallInst>(LHSI);
379 if (!LHSBinOp->isCommutative())
382 assert(isa<BinaryOperator>(RHSI) &&
383 "same opcode, but different instruction type?");
387 return LHSBinOp->getOperand(0) == RHSBinOp->
getOperand(1) &&
388 LHSBinOp->getOperand(1) == RHSBinOp->
getOperand(0);
390 if (
CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
391 assert(isa<CmpInst>(RHSI) &&
392 "same opcode, but different instruction type?");
393 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
395 return LHSCmp->getOperand(0) == RHSCmp->
getOperand(1) &&
396 LHSCmp->getOperand(1) == RHSCmp->
getOperand(0) &&
397 LHSCmp->getSwappedPredicate() == RHSCmp->
getPredicate();
400 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
401 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
402 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
403 LII->isCommutative() && LII->arg_size() >= 2) {
404 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
405 LII->getArgOperand(1) == RII->getArgOperand(0) &&
406 std::equal(LII->arg_begin() + 2, LII->arg_end(),
407 RII->arg_begin() + 2, RII->arg_end());
413 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
414 GCR1->getBasePtr() == GCR2->getBasePtr() &&
415 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
421 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
428 return ((LHSA == RHSA && LHSB == RHSB) ||
429 (LHSA == RHSB && LHSB == RHSA));
432 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
454 if (LHSA == RHSB && LHSB == RHSA) {
501 CallInst *CI = dyn_cast<CallInst>(Inst);
530 static bool isEqual(CallValue LHS, CallValue RHS);
543 if (
LHS.isSentinel() ||
RHS.isSentinel())
544 return LHS.Inst ==
RHS.Inst;
566 std::optional<int64_t> ConstantOffset;
572 GEPValue(
Instruction *
I, std::optional<int64_t> ConstantOffset)
573 : Inst(
I), ConstantOffset(ConstantOffset) {
583 return isa<GetElementPtrInst>(Inst);
601 static bool isEqual(
const GEPValue &LHS,
const GEPValue &RHS);
607 auto *
GEP = cast<GetElementPtrInst>(Val.Inst);
608 if (Val.ConstantOffset.has_value())
610 Val.ConstantOffset.value());
617 if (
LHS.isSentinel() ||
RHS.isSentinel())
618 return LHS.Inst ==
RHS.Inst;
619 auto *LGEP = cast<GetElementPtrInst>(
LHS.Inst);
620 auto *RGEP = cast<GetElementPtrInst>(
RHS.Inst);
621 if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
623 if (
LHS.ConstantOffset.has_value() &&
RHS.ConstantOffset.has_value())
624 return LHS.ConstantOffset.value() ==
RHS.ConstantOffset.value();
625 return LGEP->isIdenticalToWhenDefined(RGEP);
649 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
664 ScopedHTType AvailableValues;
682 unsigned Generation = 0;
684 bool IsAtomic =
false;
689 bool IsAtomic,
bool IsLoad)
690 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
691 IsAtomic(IsAtomic), IsLoad(IsLoad) {}
694 using LoadMapAllocator =
701 LoadHTType AvailableLoads;
706 using InvariantMapAllocator =
709 using InvariantHTType =
711 InvariantMapAllocator>;
712 InvariantHTType AvailableInvariants;
720 CallHTType AvailableCalls;
722 using GEPMapAllocatorTy =
727 GEPHTType AvailableGEPs;
730 unsigned CurrentGeneration = 0;
736 : TLI(TLI),
TTI(
TTI), DT(DT), AC(AC), SQ(
DL, &TLI, &DT, &AC), MSSA(MSSA),
742 unsigned ClobberCounter = 0;
748 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
749 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
750 GEPHTType &AvailableGEPs)
751 :
Scope(AvailableValues), LoadScope(AvailableLoads),
752 InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
753 GEPScope(AvailableGEPs) {}
754 NodeScope(
const NodeScope &) =
delete;
755 NodeScope &operator=(
const NodeScope &) =
delete;
771 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
772 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
773 GEPHTType &AvailableGEPs,
unsigned cg,
DomTreeNode *n,
776 : CurrentGeneration(cg), ChildGeneration(cg),
Node(n), ChildIter(child),
778 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
779 AvailableCalls, AvailableGEPs) {}
784 unsigned currentGeneration()
const {
return CurrentGeneration; }
785 unsigned childGeneration()
const {
return ChildGeneration; }
797 bool isProcessed()
const {
return Processed; }
798 void process() { Processed =
true; }
801 unsigned CurrentGeneration;
802 unsigned ChildGeneration;
807 bool Processed =
false;
812 class ParseMemoryInst {
817 IntrID =
II->getIntrinsicID();
820 if (isHandledNonTargetIntrinsic(IntrID)) {
822 case Intrinsic::masked_load:
824 Info.MatchingId = Intrinsic::masked_load;
826 Info.WriteMem =
false;
827 Info.IsVolatile =
false;
829 case Intrinsic::masked_store:
837 Info.MatchingId = Intrinsic::masked_load;
838 Info.ReadMem =
false;
839 Info.WriteMem =
true;
840 Info.IsVolatile =
false;
853 return isa<LoadInst>(Inst);
858 return Info.WriteMem;
859 return isa<StoreInst>(Inst);
862 bool isAtomic()
const {
864 return Info.Ordering != AtomicOrdering::NotAtomic;
868 bool isUnordered()
const {
870 return Info.isUnordered();
872 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
873 return LI->isUnordered();
874 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
875 return SI->isUnordered();
881 bool isVolatile()
const {
883 return Info.IsVolatile;
885 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
886 return LI->isVolatile();
887 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
888 return SI->isVolatile();
894 bool isInvariantLoad()
const {
895 if (
auto *LI = dyn_cast<LoadInst>(Inst))
896 return LI->hasMetadata(LLVMContext::MD_invariant_load);
906 int getMatchingId()
const {
908 return Info.MatchingId;
923 bool mayReadFromMemory()
const {
929 bool mayWriteToMemory()
const {
931 return Info.WriteMem;
945 case Intrinsic::masked_load:
946 case Intrinsic::masked_store:
951 static bool isHandledNonTargetIntrinsic(
const Value *V) {
952 if (
auto *
II = dyn_cast<IntrinsicInst>(V))
953 return isHandledNonTargetIntrinsic(
II->getIntrinsicID());
963 unsigned CurrentGeneration);
965 bool overridingStores(
const ParseMemoryInst &Earlier,
966 const ParseMemoryInst &Later);
972 if (
auto *
II = dyn_cast<IntrinsicInst>(Inst)) {
973 switch (
II->getIntrinsicID()) {
974 case Intrinsic::masked_load:
977 case Intrinsic::masked_store:
978 V =
II->getOperand(0);
984 V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
987 return V->getType() == ExpectedType ?
V :
nullptr;
992 bool isOperatingOnInvariantMemAt(
Instruction *
I,
unsigned GenAt);
994 bool isSameMemGeneration(
unsigned EarlierGeneration,
unsigned LaterGeneration,
999 auto IsSubmask = [](
const Value *Mask0,
const Value *Mask1) {
1003 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
1005 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
1006 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
1009 if (Vec0->getType() != Vec1->getType())
1011 for (
int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
1014 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
1015 if (Int0 && Int0->isZero())
1017 auto *
Int1 = dyn_cast<ConstantInt>(Elem1);
1020 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
1029 if (
II->getIntrinsicID() == Intrinsic::masked_load)
1030 return II->getOperand(0);
1031 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1032 return II->getOperand(1);
1036 if (
II->getIntrinsicID() == Intrinsic::masked_load)
1037 return II->getOperand(2);
1038 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1039 return II->getOperand(3);
1043 if (
II->getIntrinsicID() == Intrinsic::masked_load)
1044 return II->getOperand(3);
1048 if (PtrOp(Earlier) != PtrOp(Later))
1055 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
1061 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
1063 if (!isa<UndefValue>(ThruOp(Later)))
1065 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1067 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
1072 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
1074 return isa<UndefValue>(ThruOp(Later));
1076 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
1080 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1082 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1087 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1103 MSSAUpdater->removeMemoryAccess(&Inst,
true);
1125bool EarlyCSE::isSameMemGeneration(
unsigned EarlierGeneration,
1126 unsigned LaterGeneration,
1130 if (EarlierGeneration == LaterGeneration)
1159 LaterDef = LaterMA->getDefiningAccess();
1161 return MSSA->
dominates(LaterDef, EarlierMA);
1164bool EarlyCSE::isOperatingOnInvariantMemAt(
Instruction *
I,
unsigned GenAt) {
1167 if (
auto *LI = dyn_cast<LoadInst>(
I))
1168 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1177 if (!AvailableInvariants.count(MemLoc))
1182 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1185bool EarlyCSE::handleBranchCondition(
Instruction *CondInst,
1196 if (Opcode == Instruction::And &&
1199 else if (Opcode == Instruction::Or &&
1207 unsigned PropagateOpcode =
1208 (BI->
getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1210 bool MadeChanges =
false;
1214 while (!WorkList.
empty()) {
1217 AvailableValues.insert(Curr, TorF);
1219 << Curr->
getName() <<
"' as " << *TorF <<
" in "
1233 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1236 if (SimpleValue::canHandle(OPI) && Visited.
insert(OPI).second)
1243Value *EarlyCSE::getMatchingValue(
LoadValue &InVal, ParseMemoryInst &MemInst,
1244 unsigned CurrentGeneration) {
1245 if (InVal.DefInst ==
nullptr)
1247 if (InVal.MatchingId != MemInst.getMatchingId())
1250 if (MemInst.isVolatile() || !MemInst.isUnordered())
1253 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1259 bool MemInstMatching = !MemInst.isLoad();
1260 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1266 ? getOrCreateResult(Matching,
Other->getType())
1268 if (MemInst.isStore() && InVal.DefInst != Result)
1272 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1273 bool OtherNTI = isHandledNonTargetIntrinsic(
Other);
1274 if (OtherNTI != MatchingNTI)
1276 if (OtherNTI && MatchingNTI) {
1277 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1278 cast<IntrinsicInst>(MemInst.get())))
1282 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.
Generation) &&
1283 !isSameMemGeneration(InVal.
Generation, CurrentGeneration, InVal.DefInst,
1288 Result = getOrCreateResult(Matching,
Other->getType());
1293 if (
auto *
I = dyn_cast<Instruction>(To)) {
1300 if (isa<FPMathOperator>(
I) ||
1302 I->andIRFlags(&
From);
1304 if (isa<CallBase>(&
From) && isa<CallBase>(To)) {
1315 cast<CallBase>(To)->tryIntersectAttributes(cast<CallBase>(&
From));
1316 assert(
Success &&
"Failed to intersect attributes in callsites that "
1317 "passed identical check");
1323bool EarlyCSE::overridingStores(
const ParseMemoryInst &Earlier,
1324 const ParseMemoryInst &Later) {
1327 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1328 "Violated invariant");
1329 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1331 if (!Earlier.getValueType() || !Later.getValueType() ||
1332 Earlier.getValueType() != Later.getValueType())
1334 if (Earlier.getMatchingId() != Later.getMatchingId())
1341 if (!Earlier.isUnordered() || !Later.isUnordered())
1345 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1346 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1348 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1349 cast<IntrinsicInst>(Later.get()));
1354 return ENTI == LNTI;
1358 bool Changed =
false;
1368 ++CurrentGeneration;
1379 auto *CondInst = dyn_cast<Instruction>(BI->
getCondition());
1380 if (CondInst && SimpleValue::canHandle(CondInst))
1381 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1415 if (
auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1416 auto *CondI = dyn_cast<Instruction>(
Assume->getArgOperand(0));
1417 if (CondI && SimpleValue::canHandle(CondI)) {
1422 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping assumption: " << Inst <<
'\n');
1428 m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1429 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping noalias intrinsic: " << Inst
1435 if (
match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1436 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping sideeffect: " << Inst <<
'\n');
1441 if (
match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1442 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping pseudoprobe: " << Inst <<
'\n');
1459 if (
match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1466 if (!AvailableInvariants.count(MemLoc))
1467 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1473 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1474 if (SimpleValue::canHandle(CondI)) {
1476 if (
auto *KnownCond = AvailableValues.lookup(CondI)) {
1478 if (isa<ConstantInt>(KnownCond) &&
1479 cast<ConstantInt>(KnownCond)->isOne()) {
1481 <<
"EarlyCSE removing guard: " << Inst <<
'\n');
1489 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1500 LastStore =
nullptr;
1507 LLVM_DEBUG(
dbgs() <<
"EarlyCSE Simplify: " << Inst <<
" to: " << *V
1512 bool Killed =
false;
1532 if (SimpleValue::canHandle(&Inst)) {
1533 if ([[maybe_unused]]
auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1535 "Unexpected ebStrict from SimpleValue::canHandle()");
1536 assert((!CI->getRoundingMode() ||
1537 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1538 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1541 if (
Value *V = AvailableValues.lookup(&Inst)) {
1559 AvailableValues.insert(&Inst, &Inst);
1563 ParseMemoryInst MemInst(&Inst,
TTI);
1565 if (MemInst.isValid() && MemInst.isLoad()) {
1568 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1569 LastStore =
nullptr;
1570 ++CurrentGeneration;
1573 if (MemInst.isInvariantLoad()) {
1580 if (!AvailableInvariants.count(MemLoc))
1581 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1591 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1594 <<
" to: " << *InVal.DefInst <<
'\n');
1600 if (
auto *
I = dyn_cast<Instruction>(
Op))
1613 AvailableLoads.insert(MemInst.getPointerOperand(),
1615 MemInst.getMatchingId(),
1618 LastStore =
nullptr;
1629 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1630 LastStore =
nullptr;
1633 if (CallValue::canHandle(&Inst)) {
1636 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1637 if (InVal.first !=
nullptr &&
1638 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1641 <<
" to: " << *InVal.first <<
'\n');
1658 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1663 if (GEPValue::canHandle(&Inst)) {
1664 auto *
GEP = cast<GetElementPtrInst>(&Inst);
1666 GEPValue GEPVal(
GEP,
GEP->accumulateConstantOffset(SQ.
DL,
Offset)
1669 if (
Value *V = AvailableGEPs.lookup(GEPVal)) {
1670 LLVM_DEBUG(
dbgs() <<
"EarlyCSE CSE GEP: " << Inst <<
" to: " << *V
1683 AvailableGEPs.insert(GEPVal, &Inst);
1692 if (
auto *FI = dyn_cast<FenceInst>(&Inst))
1693 if (FI->getOrdering() == AtomicOrdering::Release) {
1703 if (MemInst.isValid() && MemInst.isStore()) {
1704 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1705 if (InVal.DefInst &&
1713 MemInst.getPointerOperand() ||
1715 "can't have an intervening store if not using MemorySSA!");
1716 LLVM_DEBUG(
dbgs() <<
"EarlyCSE DSE (writeback): " << Inst <<
'\n');
1736 ++CurrentGeneration;
1738 if (MemInst.isValid() && MemInst.isStore()) {
1742 if (overridingStores(ParseMemoryInst(LastStore,
TTI), MemInst)) {
1744 <<
" due to: " << Inst <<
'\n');
1749 removeMSSA(*LastStore);
1753 LastStore =
nullptr;
1764 AvailableLoads.insert(MemInst.getPointerOperand(),
1766 MemInst.getMatchingId(),
1777 if (MemInst.isUnordered() && !MemInst.isVolatile())
1780 LastStore =
nullptr;
1788bool EarlyCSE::run() {
1794 std::deque<StackNode *> nodesToProcess;
1796 bool Changed =
false;
1800 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1801 AvailableGEPs, CurrentGeneration, DT.
getRootNode(),
1804 assert(!CurrentGeneration &&
"Create a new EarlyCSE instance to rerun it.");
1807 while (!nodesToProcess.empty()) {
1810 StackNode *NodeToProcess = nodesToProcess.back();
1818 Changed |= processNode(NodeToProcess->
node());
1821 }
else if (NodeToProcess->
childIter() != NodeToProcess->
end()) {
1825 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1831 delete NodeToProcess;
1832 nodesToProcess.pop_back();
1848 EarlyCSE
CSE(
F.getDataLayout(), TLI,
TTI, DT, AC, MSSA);
1863 OS, MapClassName2PassName);
1879template<
bool UseMemorySSA>
1892 if (skipFunction(
F))
1895 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1896 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
1897 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1898 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1900 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() :
nullptr;
1902 EarlyCSE
CSE(
F.getDataLayout(), TLI,
TTI, DT, AC, MSSA);
1928char EarlyCSELegacyPass::ID = 0;
1938using EarlyCSEMemSSALegacyPass =
1939 EarlyCSELegacyCommonPass<
true>;
1942char EarlyCSEMemSSALegacyPass::
ID = 0;
1946 return new EarlyCSEMemSSALegacyPass();
1952 "Early CSE w/ MemorySSA",
false,
false)
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines DenseMapInfo traits for DenseMap.
std::optional< std::vector< StOtherPiece > > Other
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
static void combineIRFlags(Instruction &From, Value *To)
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
static unsigned getHashValueImpl(SimpleValue Val)
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
static unsigned hashCallInst(CallInst *CI)
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
Value * getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, BatchAAResults &BAA, function_ref< MemorySSA *()> GetMSSA)
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
unsigned currentGeneration() const
unsigned childGeneration() const
DomTreeNode::const_iterator end() const
DomTreeNode * nextChild()
DomTreeNode::const_iterator childIter() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
bool onlyReadsMemory(unsigned OpNo) const
bool isConvergent() const
Determine if the invoke is convergent.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
static bool shouldExecute(unsigned CounterName)
typename SmallVector< DomTreeNodeBase *, 4 >::const_iterator const_iterator
Analysis pass which computes a DominatorTree.
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Represents calls to the gc.relocate intrinsic.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction inserts a struct field of array element value into an aggregate value.
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
const Function * getFunction() const
Return the function this instruction belongs to.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVoidTy() const
Return true if this is 'void'.
value_op_iterator value_op_end()
Value * getOperand(unsigned i) const
value_op_iterator value_op_begin()
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ ebStrict
This corresponds to "fpexcept.strict".
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
@ Assume
Do not drop type tests (default).
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
void initializeEarlyCSELegacyPassPass(PassRegistry &)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static unsigned getHashValue(CallValue Val)
static CallValue getTombstoneKey()
static bool isEqual(CallValue LHS, CallValue RHS)
static CallValue getEmptyKey()
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS)
static unsigned getHashValue(const GEPValue &Val)
static GEPValue getTombstoneKey()
static GEPValue getEmptyKey()
static SimpleValue getEmptyKey()
static unsigned getHashValue(SimpleValue Val)
static SimpleValue getTombstoneKey()
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Information about a load/store intrinsic defined by the target.
A CRTP mix-in to automatically provide informational APIs needed for passes.