63#define DEBUG_TYPE "early-cse"
65STATISTIC(NumSimplify,
"Number of instructions simplified or DCE'd");
67STATISTIC(NumCSECVP,
"Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad,
"Number of load instructions CSE'd");
69STATISTIC(NumCSECall,
"Number of call instructions CSE'd");
70STATISTIC(NumCSEGEP,
"Number of GEP instructions CSE'd");
71STATISTIC(NumDSE,
"Number of trivial dead stores removed");
74 "Controls which instructions are removed");
78 cl::desc(
"Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
83 cl::desc(
"Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
109 if (
CallInst *CI = dyn_cast<CallInst>(Inst)) {
110 if (
Function *
F = CI->getCalledFunction()) {
112 case Intrinsic::experimental_constrained_fadd:
113 case Intrinsic::experimental_constrained_fsub:
114 case Intrinsic::experimental_constrained_fmul:
115 case Intrinsic::experimental_constrained_fdiv:
116 case Intrinsic::experimental_constrained_frem:
117 case Intrinsic::experimental_constrained_fptosi:
118 case Intrinsic::experimental_constrained_sitofp:
119 case Intrinsic::experimental_constrained_fptoui:
120 case Intrinsic::experimental_constrained_uitofp:
121 case Intrinsic::experimental_constrained_fcmp:
122 case Intrinsic::experimental_constrained_fcmps: {
123 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
124 if (CFP->getExceptionBehavior() &&
129 if (CFP->getRoundingMode() &&
130 CFP->getRoundingMode() == RoundingMode::Dynamic)
136 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
144 !CI->getFunction()->isPresplitCoroutine();
146 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
147 isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
148 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
149 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
150 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
151 isa<FreezeInst>(Inst);
169 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
203 Pred = ICmpInst::getSwappedPredicate(Pred);
241 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
247 if (
CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
256 if (std::tie(
LHS, Pred) > std::tie(
RHS, SwappedPred)) {
296 if (
CastInst *CI = dyn_cast<CastInst>(Inst))
297 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
299 if (
FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
300 return hash_combine(FI->getOpcode(), FI->getOperand(0));
303 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
307 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
311 assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
312 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
313 isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
314 "Invalid/unknown instruction");
317 auto *
II = dyn_cast<IntrinsicInst>(Inst);
318 if (
II &&
II->isCommutative() &&
II->arg_size() >= 2) {
331 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
332 GCR->getBasePtr(), GCR->getDerivedPtr());
336 if (
CallInst *CI = dyn_cast<CallInst>(Inst))
360 if (
LHS.isSentinel() ||
RHS.isSentinel())
363 if (LHSI->
getOpcode() != RHSI->getOpcode())
369 if (
CallInst *CI = dyn_cast<CallInst>(LHSI);
378 if (!LHSBinOp->isCommutative())
381 assert(isa<BinaryOperator>(RHSI) &&
382 "same opcode, but different instruction type?");
386 return LHSBinOp->getOperand(0) == RHSBinOp->
getOperand(1) &&
387 LHSBinOp->getOperand(1) == RHSBinOp->
getOperand(0);
389 if (
CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
390 assert(isa<CmpInst>(RHSI) &&
391 "same opcode, but different instruction type?");
392 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
394 return LHSCmp->getOperand(0) == RHSCmp->
getOperand(1) &&
395 LHSCmp->getOperand(1) == RHSCmp->
getOperand(0) &&
396 LHSCmp->getSwappedPredicate() == RHSCmp->
getPredicate();
399 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
400 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
401 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
402 LII->isCommutative() && LII->arg_size() >= 2) {
403 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
404 LII->getArgOperand(1) == RII->getArgOperand(0) &&
405 std::equal(LII->arg_begin() + 2, LII->arg_end(),
406 RII->arg_begin() + 2, RII->arg_end());
412 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
413 GCR1->getBasePtr() == GCR2->getBasePtr() &&
414 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
420 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
427 return ((LHSA == RHSA && LHSB == RHSB) ||
428 (LHSA == RHSB && LHSB == RHSA));
431 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
453 if (LHSA == RHSB && LHSB == RHSA) {
500 CallInst *CI = dyn_cast<CallInst>(Inst);
529 static bool isEqual(CallValue LHS, CallValue RHS);
542 if (
LHS.isSentinel() ||
RHS.isSentinel())
543 return LHS.Inst ==
RHS.Inst;
565 std::optional<int64_t> ConstantOffset;
571 GEPValue(
Instruction *
I, std::optional<int64_t> ConstantOffset)
572 : Inst(
I), ConstantOffset(ConstantOffset) {
582 return isa<GetElementPtrInst>(Inst);
600 static bool isEqual(
const GEPValue &LHS,
const GEPValue &RHS);
606 auto *
GEP = cast<GetElementPtrInst>(Val.Inst);
607 if (Val.ConstantOffset.has_value())
609 Val.ConstantOffset.value());
616 if (
LHS.isSentinel() ||
RHS.isSentinel())
617 return LHS.Inst ==
RHS.Inst;
618 auto *LGEP = cast<GetElementPtrInst>(
LHS.Inst);
619 auto *RGEP = cast<GetElementPtrInst>(
RHS.Inst);
620 if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
622 if (
LHS.ConstantOffset.has_value() &&
RHS.ConstantOffset.has_value())
623 return LHS.ConstantOffset.value() ==
RHS.ConstantOffset.value();
624 return LGEP->isIdenticalToWhenDefined(RGEP);
648 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
663 ScopedHTType AvailableValues;
681 unsigned Generation = 0;
683 bool IsAtomic =
false;
688 bool IsAtomic,
bool IsLoad)
689 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
690 IsAtomic(IsAtomic), IsLoad(IsLoad) {}
693 using LoadMapAllocator =
700 LoadHTType AvailableLoads;
705 using InvariantMapAllocator =
708 using InvariantHTType =
710 InvariantMapAllocator>;
711 InvariantHTType AvailableInvariants;
719 CallHTType AvailableCalls;
721 using GEPMapAllocatorTy =
726 GEPHTType AvailableGEPs;
729 unsigned CurrentGeneration = 0;
735 : TLI(TLI),
TTI(
TTI), DT(DT), AC(AC), SQ(
DL, &TLI, &DT, &AC), MSSA(MSSA),
741 unsigned ClobberCounter = 0;
747 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
748 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
749 GEPHTType &AvailableGEPs)
750 :
Scope(AvailableValues), LoadScope(AvailableLoads),
751 InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
752 GEPScope(AvailableGEPs) {}
753 NodeScope(
const NodeScope &) =
delete;
754 NodeScope &operator=(
const NodeScope &) =
delete;
770 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
771 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
772 GEPHTType &AvailableGEPs,
unsigned cg,
DomTreeNode *n,
775 : CurrentGeneration(cg), ChildGeneration(cg),
Node(n), ChildIter(child),
777 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
778 AvailableCalls, AvailableGEPs) {}
783 unsigned currentGeneration()
const {
return CurrentGeneration; }
784 unsigned childGeneration()
const {
return ChildGeneration; }
796 bool isProcessed()
const {
return Processed; }
797 void process() { Processed =
true; }
800 unsigned CurrentGeneration;
801 unsigned ChildGeneration;
806 bool Processed =
false;
811 class ParseMemoryInst {
816 IntrID =
II->getIntrinsicID();
819 if (isHandledNonTargetIntrinsic(IntrID)) {
821 case Intrinsic::masked_load:
823 Info.MatchingId = Intrinsic::masked_load;
825 Info.WriteMem =
false;
826 Info.IsVolatile =
false;
828 case Intrinsic::masked_store:
836 Info.MatchingId = Intrinsic::masked_load;
837 Info.ReadMem =
false;
838 Info.WriteMem =
true;
839 Info.IsVolatile =
false;
852 return isa<LoadInst>(Inst);
857 return Info.WriteMem;
858 return isa<StoreInst>(Inst);
861 bool isAtomic()
const {
863 return Info.Ordering != AtomicOrdering::NotAtomic;
867 bool isUnordered()
const {
869 return Info.isUnordered();
871 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
872 return LI->isUnordered();
873 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
874 return SI->isUnordered();
880 bool isVolatile()
const {
882 return Info.IsVolatile;
884 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
885 return LI->isVolatile();
886 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
887 return SI->isVolatile();
893 bool isInvariantLoad()
const {
894 if (
auto *LI = dyn_cast<LoadInst>(Inst))
895 return LI->hasMetadata(LLVMContext::MD_invariant_load);
905 int getMatchingId()
const {
907 return Info.MatchingId;
917 Type *getValueType()
const {
922 bool mayReadFromMemory()
const {
928 bool mayWriteToMemory()
const {
930 return Info.WriteMem;
944 case Intrinsic::masked_load:
945 case Intrinsic::masked_store:
950 static bool isHandledNonTargetIntrinsic(
const Value *V) {
951 if (
auto *
II = dyn_cast<IntrinsicInst>(V))
952 return isHandledNonTargetIntrinsic(
II->getIntrinsicID());
962 unsigned CurrentGeneration);
964 bool overridingStores(
const ParseMemoryInst &Earlier,
965 const ParseMemoryInst &Later);
969 if (
auto *LI = dyn_cast<LoadInst>(Inst))
970 return LI->
getType() == ExpectedType ? LI :
nullptr;
971 if (
auto *SI = dyn_cast<StoreInst>(Inst)) {
973 return V->getType() == ExpectedType ?
V :
nullptr;
975 assert(isa<IntrinsicInst>(Inst) &&
"Instruction not supported");
976 auto *
II = cast<IntrinsicInst>(Inst);
977 if (isHandledNonTargetIntrinsic(
II->getIntrinsicID()))
978 return getOrCreateResultNonTargetMemIntrinsic(
II, ExpectedType);
983 Type *ExpectedType)
const {
985 switch (
II->getIntrinsicID()) {
986 case Intrinsic::masked_load:
987 return II->getType() == ExpectedType ?
II :
nullptr;
988 case Intrinsic::masked_store: {
990 return V->getType() == ExpectedType ?
V :
nullptr;
998 bool isOperatingOnInvariantMemAt(
Instruction *
I,
unsigned GenAt);
1000 bool isSameMemGeneration(
unsigned EarlierGeneration,
unsigned LaterGeneration,
1003 bool isNonTargetIntrinsicMatch(
const IntrinsicInst *Earlier,
1005 auto IsSubmask = [](
const Value *Mask0,
const Value *Mask1) {
1009 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
1011 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
1012 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
1015 if (Vec0->getType() != Vec1->getType())
1017 for (
int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
1020 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
1021 if (Int0 && Int0->isZero())
1023 auto *
Int1 = dyn_cast<ConstantInt>(Elem1);
1026 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
1035 if (
II->getIntrinsicID() == Intrinsic::masked_load)
1036 return II->getOperand(0);
1037 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1038 return II->getOperand(1);
1042 if (
II->getIntrinsicID() == Intrinsic::masked_load)
1043 return II->getOperand(2);
1044 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1045 return II->getOperand(3);
1049 if (
II->getIntrinsicID() == Intrinsic::masked_load)
1050 return II->getOperand(3);
1054 if (PtrOp(Earlier) != PtrOp(Later))
1061 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
1067 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
1069 if (!isa<UndefValue>(ThruOp(Later)))
1071 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1073 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
1078 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
1080 return isa<UndefValue>(ThruOp(Later));
1082 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
1086 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1088 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1093 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1109 MSSAUpdater->removeMemoryAccess(&Inst,
true);
1131bool EarlyCSE::isSameMemGeneration(
unsigned EarlierGeneration,
1132 unsigned LaterGeneration,
1136 if (EarlierGeneration == LaterGeneration)
1165 LaterDef = LaterMA->getDefiningAccess();
1167 return MSSA->
dominates(LaterDef, EarlierMA);
1170bool EarlyCSE::isOperatingOnInvariantMemAt(
Instruction *
I,
unsigned GenAt) {
1173 if (
auto *LI = dyn_cast<LoadInst>(
I))
1174 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1183 if (!AvailableInvariants.count(MemLoc))
1188 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1191bool EarlyCSE::handleBranchCondition(
Instruction *CondInst,
1202 if (Opcode == Instruction::And &&
1205 else if (Opcode == Instruction::Or &&
1213 unsigned PropagateOpcode =
1214 (BI->
getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1216 bool MadeChanges =
false;
1220 while (!WorkList.
empty()) {
1223 AvailableValues.insert(Curr, TorF);
1225 << Curr->
getName() <<
"' as " << *TorF <<
" in "
1239 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1242 if (SimpleValue::canHandle(OPI) && Visited.
insert(OPI).second)
1249Value *EarlyCSE::getMatchingValue(
LoadValue &InVal, ParseMemoryInst &MemInst,
1250 unsigned CurrentGeneration) {
1251 if (InVal.DefInst ==
nullptr)
1253 if (InVal.MatchingId != MemInst.getMatchingId())
1256 if (MemInst.isVolatile() || !MemInst.isUnordered())
1259 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1265 bool MemInstMatching = !MemInst.isLoad();
1266 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1272 ? getOrCreateResult(Matching,
Other->getType())
1274 if (MemInst.isStore() && InVal.DefInst != Result)
1278 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1279 bool OtherNTI = isHandledNonTargetIntrinsic(
Other);
1280 if (OtherNTI != MatchingNTI)
1282 if (OtherNTI && MatchingNTI) {
1283 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1284 cast<IntrinsicInst>(MemInst.get())))
1288 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.
Generation) &&
1289 !isSameMemGeneration(InVal.
Generation, CurrentGeneration, InVal.DefInst,
1294 Result = getOrCreateResult(Matching,
Other->getType());
1299 if (
auto *
I = dyn_cast<Instruction>(To)) {
1306 if (isa<FPMathOperator>(
I) ||
1308 I->andIRFlags(&
From);
1312bool EarlyCSE::overridingStores(
const ParseMemoryInst &Earlier,
1313 const ParseMemoryInst &Later) {
1316 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1317 "Violated invariant");
1318 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1320 if (!Earlier.getValueType() || !Later.getValueType() ||
1321 Earlier.getValueType() != Later.getValueType())
1323 if (Earlier.getMatchingId() != Later.getMatchingId())
1330 if (!Earlier.isUnordered() || !Later.isUnordered())
1334 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1335 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1337 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1338 cast<IntrinsicInst>(Later.get()));
1343 return ENTI == LNTI;
1347 bool Changed =
false;
1357 ++CurrentGeneration;
1368 auto *CondInst = dyn_cast<Instruction>(BI->
getCondition());
1369 if (CondInst && SimpleValue::canHandle(CondInst))
1370 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1404 if (
auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1405 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1406 if (CondI && SimpleValue::canHandle(CondI)) {
1411 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping assumption: " << Inst <<
'\n');
1417 m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1418 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping noalias intrinsic: " << Inst
1424 if (
match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1425 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping sideeffect: " << Inst <<
'\n');
1430 if (
match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1431 LLVM_DEBUG(
dbgs() <<
"EarlyCSE skipping pseudoprobe: " << Inst <<
'\n');
1448 if (
match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1455 if (!AvailableInvariants.count(MemLoc))
1456 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1462 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1463 if (SimpleValue::canHandle(CondI)) {
1465 if (
auto *KnownCond = AvailableValues.lookup(CondI)) {
1467 if (isa<ConstantInt>(KnownCond) &&
1468 cast<ConstantInt>(KnownCond)->isOne()) {
1470 <<
"EarlyCSE removing guard: " << Inst <<
'\n');
1478 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1489 LastStore =
nullptr;
1496 LLVM_DEBUG(
dbgs() <<
"EarlyCSE Simplify: " << Inst <<
" to: " << *V
1501 bool Killed =
false;
1521 if (SimpleValue::canHandle(&Inst)) {
1522 if ([[maybe_unused]]
auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1524 "Unexpected ebStrict from SimpleValue::canHandle()");
1525 assert((!CI->getRoundingMode() ||
1526 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1527 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1530 if (
Value *V = AvailableValues.lookup(&Inst)) {
1548 AvailableValues.insert(&Inst, &Inst);
1552 ParseMemoryInst MemInst(&Inst,
TTI);
1554 if (MemInst.isValid() && MemInst.isLoad()) {
1557 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1558 LastStore =
nullptr;
1559 ++CurrentGeneration;
1562 if (MemInst.isInvariantLoad()) {
1569 if (!AvailableInvariants.count(MemLoc))
1570 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1580 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1583 <<
" to: " << *InVal.DefInst <<
'\n');
1589 if (
auto *
I = dyn_cast<Instruction>(
Op))
1602 AvailableLoads.insert(MemInst.getPointerOperand(),
1604 MemInst.getMatchingId(),
1607 LastStore =
nullptr;
1618 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1619 LastStore =
nullptr;
1622 if (CallValue::canHandle(&Inst)) {
1625 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1626 if (InVal.first !=
nullptr &&
1627 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1630 <<
" to: " << *InVal.first <<
'\n');
1646 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1651 if (GEPValue::canHandle(&Inst)) {
1652 auto *
GEP = cast<GetElementPtrInst>(&Inst);
1654 GEPValue GEPVal(
GEP,
GEP->accumulateConstantOffset(SQ.
DL,
Offset)
1657 if (
Value *V = AvailableGEPs.lookup(GEPVal)) {
1658 LLVM_DEBUG(
dbgs() <<
"EarlyCSE CSE GEP: " << Inst <<
" to: " << *V
1671 AvailableGEPs.insert(GEPVal, &Inst);
1680 if (
auto *FI = dyn_cast<FenceInst>(&Inst))
1681 if (FI->getOrdering() == AtomicOrdering::Release) {
1691 if (MemInst.isValid() && MemInst.isStore()) {
1692 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1693 if (InVal.DefInst &&
1701 MemInst.getPointerOperand() ||
1703 "can't have an intervening store if not using MemorySSA!");
1704 LLVM_DEBUG(
dbgs() <<
"EarlyCSE DSE (writeback): " << Inst <<
'\n');
1724 ++CurrentGeneration;
1726 if (MemInst.isValid() && MemInst.isStore()) {
1730 if (overridingStores(ParseMemoryInst(LastStore,
TTI), MemInst)) {
1732 <<
" due to: " << Inst <<
'\n');
1737 removeMSSA(*LastStore);
1741 LastStore =
nullptr;
1752 AvailableLoads.insert(MemInst.getPointerOperand(),
1754 MemInst.getMatchingId(),
1765 if (MemInst.isUnordered() && !MemInst.isVolatile())
1768 LastStore =
nullptr;
1776bool EarlyCSE::run() {
1782 std::deque<StackNode *> nodesToProcess;
1784 bool Changed =
false;
1788 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1789 AvailableGEPs, CurrentGeneration, DT.
getRootNode(),
1792 assert(!CurrentGeneration &&
"Create a new EarlyCSE instance to rerun it.");
1795 while (!nodesToProcess.empty()) {
1798 StackNode *NodeToProcess = nodesToProcess.back();
1806 Changed |= processNode(NodeToProcess->
node());
1809 }
else if (NodeToProcess->
childIter() != NodeToProcess->
end()) {
1813 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1819 delete NodeToProcess;
1820 nodesToProcess.pop_back();
1836 EarlyCSE
CSE(
F.getDataLayout(), TLI,
TTI, DT, AC, MSSA);
1851 OS, MapClassName2PassName);
1867template<
bool UseMemorySSA>
1880 if (skipFunction(
F))
1883 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1884 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
1885 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1886 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1888 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() :
nullptr;
1890 EarlyCSE
CSE(
F.getDataLayout(), TLI,
TTI, DT, AC, MSSA);
1916char EarlyCSELegacyPass::ID = 0;
1926using EarlyCSEMemSSALegacyPass =
1927 EarlyCSELegacyCommonPass<
true>;
1930char EarlyCSEMemSSALegacyPass::
ID = 0;
1934 return new EarlyCSEMemSSALegacyPass();
1940 "Early CSE w/ MemorySSA",
false,
false)
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines DenseMapInfo traits for DenseMap.
std::optional< std::vector< StOtherPiece > > Other
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
static void combineIRFlags(Instruction &From, Value *To)
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
static unsigned getHashValueImpl(SimpleValue Val)
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
static unsigned hashCallInst(CallInst *CI)
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
Value * getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, BatchAAResults &BAA, function_ref< MemorySSA *()> GetMSSA)
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
unsigned currentGeneration() const
unsigned childGeneration() const
DomTreeNode::const_iterator end() const
DomTreeNode * nextChild()
DomTreeNode::const_iterator childIter() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
bool onlyReadsMemory(unsigned OpNo) const
bool isConvergent() const
Determine if the invoke is convergent.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
static bool shouldExecute(unsigned CounterName)
typename SmallVector< DomTreeNodeBase *, 4 >::const_iterator const_iterator
Analysis pass which computes a DominatorTree.
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Represents calls to the gc.relocate intrinsic.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction inserts a struct field of array element value into an aggregate value.
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isIdenticalToWhenDefined(const Instruction *I) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
const Function * getFunction() const
Return the function this instruction belongs to.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVoidTy() const
Return true if this is 'void'.
value_op_iterator value_op_end()
Value * getOperand(unsigned i) const
value_op_iterator value_op_begin()
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ ebStrict
This corresponds to "fpexcept.strict".
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
void initializeEarlyCSELegacyPassPass(PassRegistry &)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static unsigned getHashValue(CallValue Val)
static CallValue getTombstoneKey()
static bool isEqual(CallValue LHS, CallValue RHS)
static CallValue getEmptyKey()
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS)
static unsigned getHashValue(const GEPValue &Val)
static GEPValue getTombstoneKey()
static GEPValue getEmptyKey()
static SimpleValue getEmptyKey()
static unsigned getHashValue(SimpleValue Val)
static SimpleValue getTombstoneKey()
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Information about a load/store intrinsic defined by the target.
A CRTP mix-in to automatically provide informational APIs needed for passes.