84#include "llvm/Config/llvm-config.h"
133#define DEBUG_TYPE "loop-reduce"
150 cl::desc(
"Enable LSR phi elimination"));
155 cl::desc(
"Add instruction count to a LSR cost model"));
160 cl::desc(
"Narrow LSR complex solution using"
161 " expectation of registers number"));
167 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
168 " with the same ScaledReg and Scale"));
172 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Don't prefer any addressing mode"),
178 "Prefer pre-indexed addressing mode"),
181 "Prefer post-indexed addressing mode")));
185 cl::init(std::numeric_limits<uint16_t>::max()),
186 cl::desc(
"LSR search space complexity limit"));
190 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
194 cl::desc(
"Attempt to replace primary IV with other IV."));
198 cl::desc(
"Attempt to drop solution if it is less profitable"));
201 "Number of terminating condition fold recognized and performed");
207 cl::desc(
"Stress test LSR IV chains"));
216 static const unsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
219 Type *MemTy =
nullptr;
220 unsigned AddrSpace = UnknownAddressSpace;
222 MemAccessTy() =
default;
223 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
226 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
232 unsigned AS = UnknownAddressSpace) {
252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
254 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
268 RegUsesTy RegUsesMap;
272 void countRegister(
const SCEV *Reg,
size_t LUIdx);
273 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
274 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
276 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
294RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
295 std::pair<RegUsesTy::iterator, bool> Pair =
296 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
297 RegSortData &RSD = Pair.first->second;
300 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
301 RSD.UsedByIndices.set(LUIdx);
305RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
306 RegUsesTy::iterator It = RegUsesMap.find(Reg);
307 assert(It != RegUsesMap.end());
308 RegSortData &RSD = It->second;
309 assert(RSD.UsedByIndices.size() > LUIdx);
310 RSD.UsedByIndices.reset(LUIdx);
314RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
315 assert(LUIdx <= LastLUIdx);
319 for (
auto &Pair : RegUsesMap) {
321 if (LUIdx < UsedByIndices.
size())
322 UsedByIndices[LUIdx] =
323 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
324 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
329RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
330 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
331 if (
I == RegUsesMap.end())
335 if (i == -1)
return false;
336 if ((
size_t)i != LUIdx)
return true;
341 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
342 assert(
I != RegUsesMap.end() &&
"Unknown register!");
343 return I->second.UsedByIndices;
346void RegUseTracker::clear() {
360 int64_t BaseOffset = 0;
363 bool HasBaseReg =
false;
386 const SCEV *ScaledReg =
nullptr;
391 int64_t UnfoldedOffset = 0;
399 void canonicalize(
const Loop &L);
403 bool hasZeroEnd()
const;
405 size_t getNumRegs()
const;
408 void deleteBaseReg(
const SCEV *&S);
410 bool referencesReg(
const SCEV *S)
const;
411 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
412 const RegUseTracker &RegUses)
const;
433 for (
const SCEV *S :
Add->operands())
440 if (!AR->getStart()->isZero() && AR->isAffine()) {
443 AR->getStepRecurrence(SE),
459 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
461 for (
const SCEV *S : MyGood)
463 for (
const SCEV *S : MyBad)
482 BaseRegs.push_back(Sum);
488 BaseRegs.push_back(Sum);
496 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
503bool Formula::isCanonical(
const Loop &L)
const {
505 return BaseRegs.size() <= 1;
510 if (Scale == 1 && BaseRegs.empty())
530void Formula::canonicalize(
const Loop &L) {
534 if (BaseRegs.empty()) {
536 assert(ScaledReg &&
"Expected 1*reg => reg");
537 assert(Scale == 1 &&
"Expected 1*reg => reg");
538 BaseRegs.push_back(ScaledReg);
546 ScaledReg = BaseRegs.pop_back_val();
557 if (
I != BaseRegs.end())
567bool Formula::unscale() {
571 BaseRegs.push_back(ScaledReg);
576bool Formula::hasZeroEnd()
const {
577 if (UnfoldedOffset || BaseOffset)
579 if (BaseRegs.size() != 1 || ScaledReg)
586size_t Formula::getNumRegs()
const {
587 return !!ScaledReg + BaseRegs.size();
592Type *Formula::getType()
const {
593 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
594 ScaledReg ? ScaledReg->getType() :
595 BaseGV ? BaseGV->getType() :
600void Formula::deleteBaseReg(
const SCEV *&S) {
601 if (&S != &BaseRegs.back())
607bool Formula::referencesReg(
const SCEV *S)
const {
613bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
614 const RegUseTracker &RegUses)
const {
616 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
618 for (
const SCEV *BaseReg : BaseRegs)
619 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
624#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
629 BaseGV->printAsOperand(
OS,
false);
631 if (BaseOffset != 0) {
635 for (
const SCEV *BaseReg : BaseRegs) {
637 OS <<
"reg(" << *BaseReg <<
')';
639 if (HasBaseReg && BaseRegs.empty()) {
641 OS <<
"**error: HasBaseReg**";
642 }
else if (!HasBaseReg && !BaseRegs.empty()) {
644 OS <<
"**error: !HasBaseReg**";
648 OS << Scale <<
"*reg(";
655 if (UnfoldedOffset != 0) {
657 OS <<
"imm(" << UnfoldedOffset <<
')';
698 bool IgnoreSignificantBits =
false) {
709 if (
RA.isAllOnes()) {
723 const APInt &LA =
C->getAPInt();
732 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
734 IgnoreSignificantBits);
735 if (!Step)
return nullptr;
737 IgnoreSignificantBits);
738 if (!Start)
return nullptr;
751 for (
const SCEV *S :
Add->operands()) {
753 if (!
Op)
return nullptr;
769 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
784 IgnoreSignificantBits)) {
803 if (
C->getAPInt().getSignificantBits() <= 64) {
805 return C->getValue()->getSExtValue();
813 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
828 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
829 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
839 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
855 bool isAddress = isa<LoadInst>(Inst);
856 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
857 if (SI->getPointerOperand() == OperandVal)
862 switch (
II->getIntrinsicID()) {
863 case Intrinsic::memset:
864 case Intrinsic::prefetch:
865 case Intrinsic::masked_load:
866 if (
II->getArgOperand(0) == OperandVal)
869 case Intrinsic::masked_store:
870 if (
II->getArgOperand(1) == OperandVal)
873 case Intrinsic::memmove:
874 case Intrinsic::memcpy:
875 if (
II->getArgOperand(0) == OperandVal ||
876 II->getArgOperand(1) == OperandVal)
882 if (IntrInfo.
PtrVal == OperandVal)
887 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
888 if (RMW->getPointerOperand() == OperandVal)
891 if (CmpX->getPointerOperand() == OperandVal)
900 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
907 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
908 AccessTy.AddrSpace = SI->getPointerAddressSpace();
909 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
910 AccessTy.AddrSpace = LI->getPointerAddressSpace();
911 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
912 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
914 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
916 switch (
II->getIntrinsicID()) {
917 case Intrinsic::prefetch:
918 case Intrinsic::memset:
919 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
920 AccessTy.MemTy = OperandVal->
getType();
922 case Intrinsic::memmove:
923 case Intrinsic::memcpy:
925 AccessTy.MemTy = OperandVal->
getType();
927 case Intrinsic::masked_load:
929 II->getArgOperand(0)->getType()->getPointerAddressSpace();
931 case Intrinsic::masked_store:
933 II->getArgOperand(1)->getType()->getPointerAddressSpace();
993 if (!Processed.
insert(S).second)
997 for (
const SCEV *S :
Add->operands()) {
1013 Value *UVal = U->getValue();
1017 if (UI && UI->
getOpcode() == Instruction::Mul &&
1051 const LSRUse &LU,
const Formula &
F);
1055 const LSRUse &LU,
const Formula &
F,
1062 const Loop *
L =
nullptr;
1072 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1090 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1091 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1092 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1093 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1099 return C.NumRegs == ~0
u;
1102 void RateFormula(
const Formula &
F,
1112 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1114 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1127 Value *OperandValToReplace =
nullptr;
1139 LSRFixup() =
default;
1141 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1149struct UniquifierDenseMapInfo {
1152 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1158 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1194 MemAccessTy AccessTy;
1200 int64_t MinOffset = std::numeric_limits<int64_t>::max();
1201 int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1205 bool AllFixupsOutsideLoop =
true;
1212 bool RigidFormula =
false;
1218 Type *WidestFixupType =
nullptr;
1228 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1230 LSRFixup &getNewFixup() {
1231 Fixups.push_back(LSRFixup());
1235 void pushFixup(LSRFixup &f) {
1237 if (
f.Offset > MaxOffset)
1238 MaxOffset =
f.Offset;
1239 if (
f.Offset < MinOffset)
1240 MinOffset =
f.Offset;
1243 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1244 float getNotSelectedProbability(
const SCEV *Reg)
const;
1245 bool InsertFormula(
const Formula &
F,
const Loop &L);
1246 void DeleteFormula(Formula &
F);
1247 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1256 LSRUse::KindType Kind, MemAccessTy AccessTy,
1258 bool HasBaseReg, int64_t Scale,
1260 int64_t ScalableOffset = 0);
1263 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1267 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1269 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1271 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1273 [&](
unsigned i,
const SCEV *Reg) {
1274 return i + getSetupCost(Reg, Depth - 1);
1276 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1283void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1289 if (AR->getLoop() != L) {
1296 if (!AR->getLoop()->contains(L)) {
1306 unsigned LoopCost = 1;
1313 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1314 if (Step->getAPInt() ==
F.BaseOffset)
1317 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1318 if (isa<SCEVConstant>(LoopStep)) {
1319 const SCEV *LoopStart = AR->getStart();
1320 if (!isa<SCEVConstant>(LoopStart) &&
1326 C.AddRecCost += LoopCost;
1330 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1331 if (!Regs.
count(AR->getOperand(1))) {
1332 RateRegister(
F, AR->getOperand(1), Regs);
1344 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1346 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1353void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1356 if (LoserRegs && LoserRegs->
count(Reg)) {
1360 if (Regs.
insert(Reg).second) {
1361 RateRegister(
F, Reg, Regs);
1362 if (LoserRegs && isLoser())
1367void Cost::RateFormula(
const Formula &
F,
1374 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1376 unsigned PrevAddRecCost =
C.AddRecCost;
1377 unsigned PrevNumRegs =
C.NumRegs;
1378 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1379 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1380 if (VisitedRegs.
count(ScaledReg)) {
1384 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1388 for (
const SCEV *BaseReg :
F.BaseRegs) {
1389 if (VisitedRegs.
count(BaseReg)) {
1393 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1399 size_t NumBaseParts =
F.getNumRegs();
1400 if (NumBaseParts > 1)
1405 C.NumBaseAdds += (
F.UnfoldedOffset != 0);
1411 for (
const LSRFixup &
Fixup : LU.Fixups) {
1412 int64_t
O =
Fixup.Offset;
1422 if (LU.Kind == LSRUse::Address &&
Offset != 0 &&
1439 if (
C.NumRegs > TTIRegNum) {
1442 if (PrevNumRegs > TTIRegNum)
1443 C.Insns += (
C.NumRegs - PrevNumRegs);
1445 C.Insns += (
C.NumRegs - TTIRegNum);
1458 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1462 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1465 if (LU.Kind != LSRUse::ICmpZero)
1466 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1472 C.Insns = std::numeric_limits<unsigned>::max();
1473 C.NumRegs = std::numeric_limits<unsigned>::max();
1474 C.AddRecCost = std::numeric_limits<unsigned>::max();
1475 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1476 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1477 C.ImmCost = std::numeric_limits<unsigned>::max();
1478 C.SetupCost = std::numeric_limits<unsigned>::max();
1479 C.ScaleCost = std::numeric_limits<unsigned>::max();
1483bool Cost::isLess(
const Cost &
Other)
const {
1485 C.Insns !=
Other.C.Insns)
1486 return C.Insns <
Other.C.Insns;
1490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1493 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1494 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1495 if (
C.AddRecCost != 0)
1496 OS <<
", with addrec cost " <<
C.AddRecCost;
1497 if (
C.NumIVMuls != 0)
1498 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1499 << (
C.NumIVMuls == 1 ?
"" :
"s");
1500 if (
C.NumBaseAdds != 0)
1501 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1502 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1503 if (
C.ScaleCost != 0)
1504 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1506 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1507 if (
C.SetupCost != 0)
1508 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1517bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1519 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1520 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1521 if (PN->getIncomingValue(i) == OperandValToReplace &&
1522 L->contains(PN->getIncomingBlock(i)))
1527 return !
L->contains(UserInst);
1530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1534 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1536 Store->getOperand(0)->printAsOperand(
OS,
false);
1537 }
else if (UserInst->getType()->isVoidTy())
1538 OS << UserInst->getOpcodeName();
1540 UserInst->printAsOperand(
OS,
false);
1542 OS <<
", OperandValToReplace=";
1543 OperandValToReplace->printAsOperand(
OS,
false);
1545 for (
const Loop *PIL : PostIncLoops) {
1546 OS <<
", PostIncLoop=";
1547 PIL->getHeader()->printAsOperand(
OS,
false);
1561bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1563 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1566 return Uniquifier.
count(Key);
1570float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1572 for (
const Formula &
F : Formulae)
1573 if (
F.referencesReg(Reg))
1575 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1580bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1581 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1583 if (!Formulae.empty() && RigidFormula)
1587 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1591 if (!Uniquifier.
insert(Key).second)
1595 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1596 "Zero allocated in a scaled register!");
1598 for (
const SCEV *BaseReg :
F.BaseRegs)
1599 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1603 Formulae.push_back(
F);
1606 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1614void LSRUse::DeleteFormula(Formula &
F) {
1615 if (&
F != &Formulae.back())
1617 Formulae.pop_back();
1621void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1625 for (
const Formula &
F : Formulae) {
1626 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1627 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1631 for (
const SCEV *S : OldRegs)
1633 RegUses.dropRegister(S, LUIdx);
1636#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1638 OS <<
"LSR Use: Kind=";
1640 case Basic:
OS <<
"Basic";
break;
1641 case Special:
OS <<
"Special";
break;
1642 case ICmpZero:
OS <<
"ICmpZero";
break;
1644 OS <<
"Address of ";
1645 if (AccessTy.MemTy->isPointerTy())
1648 OS << *AccessTy.MemTy;
1651 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1654 OS <<
", Offsets={";
1655 bool NeedComma =
false;
1656 for (
const LSRFixup &
Fixup : Fixups) {
1657 if (NeedComma)
OS <<
',';
1663 if (AllFixupsOutsideLoop)
1664 OS <<
", all-fixups-outside-loop";
1666 if (WidestFixupType)
1667 OS <<
", widest fixup type: " << *WidestFixupType;
1676 LSRUse::KindType Kind, MemAccessTy AccessTy,
1678 bool HasBaseReg, int64_t Scale,
1680 int64_t ScalableOffset) {
1682 case LSRUse::Address:
1684 HasBaseReg, Scale, AccessTy.AddrSpace,
1685 Fixup, ScalableOffset);
1687 case LSRUse::ICmpZero:
1690 if (BaseGV || ScalableOffset != 0)
1694 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1699 if (Scale != 0 && Scale != -1)
1704 if (BaseOffset != 0) {
1712 BaseOffset = -(
uint64_t)BaseOffset;
1721 return !BaseGV && Scale == 0 && BaseOffset == 0 && ScalableOffset == 0;
1723 case LSRUse::Special:
1725 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0 &&
1726 ScalableOffset == 0;
1733 int64_t MinOffset, int64_t MaxOffset,
1734 LSRUse::KindType Kind, MemAccessTy AccessTy,
1736 bool HasBaseReg, int64_t Scale) {
1738 if (((int64_t)((
uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1741 MinOffset = (
uint64_t)BaseOffset + MinOffset;
1742 if (((int64_t)((
uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1745 MaxOffset = (
uint64_t)BaseOffset + MaxOffset;
1748 HasBaseReg, Scale) &&
1754 int64_t MinOffset, int64_t MaxOffset,
1755 LSRUse::KindType Kind, MemAccessTy AccessTy,
1756 const Formula &
F,
const Loop &L) {
1764 assert((
F.isCanonical(L) ||
F.Scale != 0));
1766 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1771 int64_t MaxOffset, LSRUse::KindType Kind,
1773 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1776 BaseOffset, HasBaseReg, Scale) ||
1781 BaseGV, BaseOffset,
true, 0));
1785 int64_t MaxOffset, LSRUse::KindType Kind,
1786 MemAccessTy AccessTy,
const Formula &
F) {
1787 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1788 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1792 const LSRUse &LU,
const Formula &
F) {
1795 for (
const LSRFixup &
Fixup : LU.Fixups)
1797 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1798 F.Scale,
Fixup.UserInst))
1804 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1809 const LSRUse &LU,
const Formula &
F,
1818 return F.Scale != 1;
1821 case LSRUse::Address: {
1824 LU.AccessTy.MemTy,
F.BaseGV,
1826 F.Scale, LU.AccessTy.AddrSpace);
1828 LU.AccessTy.MemTy,
F.BaseGV,
1830 F.Scale, LU.AccessTy.AddrSpace);
1833 "Legal addressing mode has an illegal cost!");
1834 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1836 case LSRUse::ICmpZero:
1838 case LSRUse::Special:
1848 LSRUse::KindType Kind, MemAccessTy AccessTy,
1850 bool HasBaseReg, int64_t ScalableOffset = 0) {
1852 if (BaseOffset == 0 && !BaseGV)
return true;
1856 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1860 if (!HasBaseReg && Scale == 1) {
1866 HasBaseReg, Scale,
nullptr, ScalableOffset);
1871 int64_t MaxOffset, LSRUse::KindType Kind,
1872 MemAccessTy AccessTy,
const SCEV *S,
1875 if (S->
isZero())
return true;
1883 if (!S->
isZero())
return false;
1886 if (BaseOffset == 0 && !BaseGV)
return true;
1890 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1893 BaseOffset, HasBaseReg, Scale);
1910 const SCEV *IncExpr;
1913 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
1920 const SCEV *ExprBase =
nullptr;
1922 IVChain() =
default;
1923 IVChain(
const IVInc &Head,
const SCEV *
Base)
1924 : Incs(1, Head), ExprBase(
Base) {}
1931 return std::next(Incs.
begin());
1938 bool hasIncs()
const {
return Incs.
size() >= 2; }
1947 bool isProfitableIncrement(
const SCEV *OperExpr,
1948 const SCEV *IncExpr,
1973 bool Changed =
false;
2000 RegUseTracker RegUses;
2005 static const unsigned MaxChains = 8;
2016 void OptimizeShadowIV();
2019 void OptimizeLoopTermCond();
2023 void FinalizeChain(IVChain &Chain);
2024 void CollectChains();
2025 void GenerateIVChain(
const IVChain &Chain,
2028 void CollectInterestingTypesAndFactors();
2029 void CollectFixupsAndInitialFormulae();
2035 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
2036 LSRUse::KindType Kind, MemAccessTy AccessTy);
2038 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2039 MemAccessTy AccessTy);
2041 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2043 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2045 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2046 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2047 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2048 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2050 void CollectLoopInvariantFixupsAndFormulae();
2052 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2053 unsigned Depth = 0);
2055 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2057 size_t Idx,
bool IsScaledReg =
false);
2058 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2059 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2060 const Formula &
Base,
size_t Idx,
2061 bool IsScaledReg =
false);
2062 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2063 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2064 const Formula &
Base,
2066 size_t Idx,
bool IsScaledReg =
false);
2067 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2068 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2069 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2070 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2071 void GenerateCrossUseConstantOffsets();
2072 void GenerateAllReuseFormulae();
2074 void FilterOutUndesirableDedicatedRegisters();
2076 size_t EstimateSearchSpaceComplexity()
const;
2077 void NarrowSearchSpaceByDetectingSupersets();
2078 void NarrowSearchSpaceByCollapsingUnrolledCode();
2079 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2080 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2081 void NarrowSearchSpaceByFilterPostInc();
2082 void NarrowSearchSpaceByDeletingCostlyFormulas();
2083 void NarrowSearchSpaceByPickingWinnerRegs();
2084 void NarrowSearchSpaceUsingHeuristics();
2089 const Cost &CurCost,
2099 const LSRUse &LU)
const;
2101 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2104 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2107 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2116 bool getChanged()
const {
return Changed; }
2118 return ScalarEvolutionIVs;
2132void LSRInstance::OptimizeShadowIV() {
2134 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2142 Type *DestTy =
nullptr;
2143 bool IsSigned =
false;
2157 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2159 DestTy = UCast->getDestTy();
2161 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2163 DestTy = SCast->getDestTy();
2165 if (!DestTy)
continue;
2185 if (Mantissa == -1)
continue;
2189 unsigned Entry, Latch;
2199 if (!
Init)
continue;
2200 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2201 (
double)
Init->getSExtValue() :
2202 (
double)
Init->getZExtValue());
2206 if (!Incr)
continue;
2207 if (Incr->
getOpcode() != Instruction::Add
2208 && Incr->
getOpcode() != Instruction::Sub)
2224 if (!
C->getValue().isStrictlyPositive())
2231 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2233 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2234 : Instruction::FSub,
2252 if (
U.getUser() ==
Cond) {
2320 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2325 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2326 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2333 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2334 Pred = ICmpInst::ICMP_SLE;
2336 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2337 Pred = ICmpInst::ICMP_SLT;
2339 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2340 Pred = ICmpInst::ICMP_ULT;
2349 if (
Max->getNumOperands() != 2)
2352 const SCEV *MaxLHS =
Max->getOperand(0);
2353 const SCEV *MaxRHS =
Max->getOperand(1);
2358 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2371 "Loop condition operand is an addrec in a different loop!");
2375 Value *NewRHS =
nullptr;
2376 if (ICmpInst::isTrueWhenEqual(Pred)) {
2379 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2380 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2381 NewRHS = BO->getOperand(0);
2383 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2384 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2385 NewRHS = BO->getOperand(0);
2392 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2393 NewRHS = SU->getValue();
2406 Cond->getOperand(0), NewRHS,
"scmp");
2410 Cond->replaceAllUsesWith(NewCond);
2413 Cond->eraseFromParent();
2415 if (
Cmp->use_empty())
2416 Cmp->eraseFromParent();
2422LSRInstance::OptimizeLoopTermCond() {
2439 L->getExitingBlocks(ExitingBlocks);
2447 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2453 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2463 if (!FindIVUserForCond(
Cond, CondUse))
2477 if (!DT.dominates(ExitingBlock, LatchBlock))
2482 if (LatchBlock != ExitingBlock)
2486 if (&*UI != CondUse &&
2487 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2490 const SCEV *
A = IU.getStride(*CondUse, L);
2491 const SCEV *
B = IU.getStride(*UI, L);
2492 if (!
A || !
B)
continue;
2505 if (
C->isOne() ||
C->isMinusOne())
2506 goto decline_post_inc;
2508 if (
C->getValue().getSignificantBits() >= 64 ||
2509 C->getValue().isMinSignedValue())
2510 goto decline_post_inc;
2514 TTI, UI->getUser(), UI->getOperandValToReplace());
2515 int64_t Scale =
C->getSExtValue();
2519 AccessTy.AddrSpace))
2520 goto decline_post_inc;
2525 AccessTy.AddrSpace))
2526 goto decline_post_inc;
2531 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2537 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2538 if (
Cond->hasOneUse()) {
2539 Cond->moveBefore(TermBr);
2543 Cond = cast<ICmpInst>(
Cond->clone());
2544 Cond->setName(
L->getHeader()->getName() +
".termcond");
2566 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2568 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2573bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2574 bool HasBaseReg, LSRUse::KindType Kind,
2575 MemAccessTy AccessTy) {
2576 int64_t NewMinOffset = LU.MinOffset;
2577 int64_t NewMaxOffset = LU.MaxOffset;
2578 MemAccessTy NewAccessTy = AccessTy;
2583 if (LU.Kind != Kind)
2589 if (Kind == LSRUse::Address) {
2590 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2591 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2592 AccessTy.AddrSpace);
2597 if (NewOffset < LU.MinOffset) {
2599 LU.MaxOffset - NewOffset, HasBaseReg))
2601 NewMinOffset = NewOffset;
2602 }
else if (NewOffset > LU.MaxOffset) {
2604 NewOffset - LU.MinOffset, HasBaseReg))
2606 NewMaxOffset = NewOffset;
2610 LU.MinOffset = NewMinOffset;
2611 LU.MaxOffset = NewMaxOffset;
2612 LU.AccessTy = NewAccessTy;
2619std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2620 LSRUse::KindType Kind,
2621 MemAccessTy AccessTy) {
2632 std::pair<UseMapTy::iterator, bool>
P =
2636 size_t LUIdx =
P.first->second;
2637 LSRUse &LU =
Uses[LUIdx];
2638 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2640 return std::make_pair(LUIdx,
Offset);
2644 size_t LUIdx =
Uses.size();
2645 P.first->second = LUIdx;
2646 Uses.push_back(LSRUse(Kind, AccessTy));
2647 LSRUse &LU =
Uses[LUIdx];
2651 return std::make_pair(LUIdx,
Offset);
2655void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2656 if (&LU != &
Uses.back())
2661 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2667LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2668 const LSRUse &OrigLU) {
2670 for (LSRUse &LU :
Uses) {
2676 if (&LU != &OrigLU &&
2677 LU.Kind != LSRUse::ICmpZero &&
2678 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2679 LU.WidestFixupType == OrigLU.WidestFixupType &&
2680 LU.HasFormulaWithSameRegs(OrigF)) {
2682 for (
const Formula &
F : LU.Formulae) {
2685 if (
F.BaseRegs == OrigF.BaseRegs &&
2686 F.ScaledReg == OrigF.ScaledReg &&
2687 F.BaseGV == OrigF.BaseGV &&
2688 F.Scale == OrigF.Scale &&
2689 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2690 if (
F.BaseOffset == 0)
2705void LSRInstance::CollectInterestingTypesAndFactors() {
2711 const SCEV *Expr = IU.getExpr(U);
2729 }
while (!Worklist.
empty());
2734 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2736 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2737 const SCEV *OldStride = *
I;
2738 const SCEV *NewStride = *NewStrideIter;
2749 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2751 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2752 Factors.insert(Factor->getAPInt().getSExtValue());
2757 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2758 Factors.insert(Factor->getAPInt().getSExtValue());
2764 if (
Types.size() == 1)
2776 for(; OI != OE; ++OI) {
2777 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2782 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2794 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2795 return Trunc->getOperand(0);
2817 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2819 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2821 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2828 if (SubExpr->getSCEVType() ==
scAddExpr)
2831 if (SubExpr->getSCEVType() !=
scMulExpr)
2837 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2847bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2848 const SCEV *IncExpr,
2856 if (!isa<SCEVConstant>(IncExpr)) {
2858 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2883 if (!Chain.hasIncs())
2886 if (!
Users.empty()) {
2887 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2889 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
2892 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2900 if (isa<PHINode>(Chain.tailUserInst())
2901 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2904 const SCEV *LastIncExpr =
nullptr;
2905 unsigned NumConstIncrements = 0;
2906 unsigned NumVarIncrements = 0;
2907 unsigned NumReusedIncrements = 0;
2912 for (
const IVInc &Inc : Chain) {
2915 if (Inc.IncExpr->isZero())
2920 if (isa<SCEVConstant>(Inc.IncExpr)) {
2921 ++NumConstIncrements;
2925 if (Inc.IncExpr == LastIncExpr)
2926 ++NumReusedIncrements;
2930 LastIncExpr = Inc.IncExpr;
2935 if (NumConstIncrements > 1)
2942 cost += NumVarIncrements;
2946 cost -= NumReusedIncrements;
2948 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2965 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2966 const SCEV *LastIncExpr =
nullptr;
2967 for (; ChainIdx < NChains; ++ChainIdx) {
2968 IVChain &Chain = IVChainVec[ChainIdx];
2982 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2988 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
2991 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2992 LastIncExpr = IncExpr;
2998 if (ChainIdx == NChains) {
2999 if (isa<PHINode>(UserInst))
3005 LastIncExpr = OperExpr;
3009 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3012 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3014 ChainUsersVec.
resize(NChains);
3015 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3016 <<
") IV=" << *LastIncExpr <<
"\n");
3018 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3019 <<
") IV+" << *LastIncExpr <<
"\n");
3021 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3023 IVChain &Chain = IVChainVec[ChainIdx];
3027 if (!LastIncExpr->
isZero()) {
3028 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3044 IVChain::const_iterator IncIter = Chain.Incs.begin();
3045 IVChain::const_iterator IncEnd = Chain.Incs.end();
3046 for( ; IncIter != IncEnd; ++IncIter) {
3047 if (IncIter->UserInst == OtherUse)
3050 if (IncIter != IncEnd)
3054 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3055 && IU.isIVUserOrOperand(OtherUse)) {
3058 NearUsers.
insert(OtherUse);
3063 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3088void LSRInstance::CollectChains() {
3094 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3095 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3104 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3114 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3115 ChainIdx < NChains; ++ChainIdx) {
3116 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3122 while (IVOpIter != IVOpEnd) {
3123 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3124 if (UniqueOperands.
insert(IVOpInst).second)
3125 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3126 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3131 for (
PHINode &PN :
L->getHeader()->phis()) {
3136 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3138 ChainInstruction(&PN, IncV, ChainUsersVec);
3141 unsigned ChainIdx = 0;
3142 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3143 UsersIdx < NChains; ++UsersIdx) {
3145 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3148 if (ChainIdx != UsersIdx)
3149 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3150 FinalizeChain(IVChainVec[ChainIdx]);
3153 IVChainVec.resize(ChainIdx);
3156void LSRInstance::FinalizeChain(IVChain &Chain) {
3157 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3158 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3160 for (
const IVInc &Inc : Chain) {
3162 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3163 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3164 IVIncSet.insert(UseI);
3171 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3172 int64_t IncOffset = 0;
3173 int64_t ScalableOffset = 0;
3180 auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3181 if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3182 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3184 auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3185 if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3187 ScalableOffset = Scale->getValue()->getSExtValue();
3195 IncOffset,
false, ScalableOffset))
3203void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3207 const IVInc &Head = Chain.Incs[0];
3212 Value *IVSrc =
nullptr;
3213 while (IVOpIter != IVOpEnd) {
3224 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3225 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3228 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3230 if (IVOpIter == IVOpEnd) {
3232 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3235 assert(IVSrc &&
"Failed to find IV chain source");
3240 const SCEV *LeftOverExpr =
nullptr;
3245 for (
const IVInc &Inc : Chain) {
3247 if (isa<PHINode>(InsertPt))
3248 InsertPt =
L->getLoopLatch()->getTerminator();
3252 Value *IVOper = IVSrc;
3253 if (!Inc.IncExpr->isZero()) {
3258 LeftOverExpr = LeftOverExpr ?
3259 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3263 bool FoundBase =
false;
3264 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3267 if (!Remainder->
isZero()) {
3269 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3270 const SCEV *IVOperExpr =
3272 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3281 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3284 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3287 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3291 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3294 LeftOverExpr =
nullptr;
3297 Type *OperTy = Inc.IVOperand->getType();
3298 if (IVTy != OperTy) {
3300 "cannot extend a chained IV");
3302 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3304 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3305 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3310 if (isa<PHINode>(Chain.tailUserInst())) {
3311 for (
PHINode &Phi :
L->getHeader()->phis()) {
3315 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3318 Value *IVOper = IVSrc;
3320 if (IVTy != PostIncTy) {
3322 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3323 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3324 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3326 Phi.replaceUsesOfWith(PostIncV, IVOper);
3332void LSRInstance::CollectFixupsAndInitialFormulae() {
3334 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3346 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3347 if (IVIncSet.count(UseI)) {
3348 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3352 LSRUse::KindType
Kind = LSRUse::Basic;
3353 MemAccessTy AccessTy;
3355 Kind = LSRUse::Address;
3359 const SCEV *S = IU.getExpr(U);
3370 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3373 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3375 if (CI->isEquality()) {
3378 Value *
NV = CI->getOperand(1);
3379 if (NV ==
U.getOperandValToReplace()) {
3380 CI->setOperand(1, CI->getOperand(0));
3381 CI->setOperand(0, NV);
3382 NV = CI->getOperand(1);
3389 (!
NV->getType()->isPointerTy() ||
3396 Kind = LSRUse::ICmpZero;
3398 }
else if (
L->isLoopInvariant(NV) &&
3399 (!isa<Instruction>(NV) ||
3400 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3401 !
NV->getType()->isPointerTy()) {
3412 Kind = LSRUse::ICmpZero;
3414 assert(!isa<SCEVCouldNotCompute>(S));
3419 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3420 if (Factors[i] != -1)
3421 Factors.insert(-(
uint64_t)Factors[i]);
3427 std::pair<size_t, int64_t>
P = getUse(S, Kind, AccessTy);
3428 size_t LUIdx =
P.first;
3430 LSRUse &LU =
Uses[LUIdx];
3433 LSRFixup &LF = LU.getNewFixup();
3434 LF.UserInst = UserInst;
3435 LF.OperandValToReplace =
U.getOperandValToReplace();
3436 LF.PostIncLoops = TmpPostIncLoops;
3438 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3441 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3443 F.initialMatch(S, L, SE);
3444 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3445 VisitedLSRUse.
insert(LUIdx);
3448 if (!LU.WidestFixupType ||
3451 LU.WidestFixupType = LF.OperandValToReplace->getType();
3454 if (LU.Formulae.empty()) {
3455 InsertInitialFormula(S, LU, LUIdx);
3456 CountRegisters(LU.Formulae.back(), LUIdx);
3465void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3469 LU.RigidFormula =
true;
3472 F.initialMatch(S, L, SE);
3473 bool Inserted = InsertFormula(LU, LUIdx,
F);
3474 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3480LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3481 LSRUse &LU,
size_t LUIdx) {
3483 F.BaseRegs.push_back(S);
3484 F.HasBaseReg =
true;
3485 bool Inserted = InsertFormula(LU, LUIdx,
F);
3486 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3490void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3492 RegUses.countRegister(
F.ScaledReg, LUIdx);
3493 for (
const SCEV *BaseReg :
F.BaseRegs)
3494 RegUses.countRegister(BaseReg, LUIdx);
3499bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3502 "Formula is illegal");
3504 if (!LU.InsertFormula(
F, *L))
3507 CountRegisters(
F, LUIdx);
3517LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3526 while (!Worklist.
empty()) {
3530 if (!Visited.
insert(S).second)
3537 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3540 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3541 const Value *
V = US->getValue();
3542 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3544 if (
L->contains(Inst))
continue;
3545 }
else if (isa<Constant>(V))
3548 for (
const Use &U :
V->uses()) {
3549 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3558 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3561 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3563 cast<PHINode>(UserInst)->getIncomingBlock(
3565 if (!DT.dominates(
L->getHeader(), UseBB))
3578 if (isa<PHINode>(UserInst)) {
3579 const auto *PhiNode = cast<PHINode>(UserInst);
3580 bool HasIncompatibleEHPTerminatedBlock =
false;
3582 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3583 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3584 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3585 HasIncompatibleEHPTerminatedBlock =
true;
3590 if (HasIncompatibleEHPTerminatedBlock) {
3596 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3603 if (!isa<SCEVUnknown>(UserS))
3612 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3613 unsigned OtherIdx = !
U.getOperandNo();
3614 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3619 std::pair<size_t, int64_t>
P = getUse(
3620 S, LSRUse::Basic, MemAccessTy());
3621 size_t LUIdx =
P.first;
3623 LSRUse &LU =
Uses[LUIdx];
3624 LSRFixup &LF = LU.getNewFixup();
3625 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3626 LF.OperandValToReplace =
U;
3628 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3629 if (!LU.WidestFixupType ||
3632 LU.WidestFixupType = LF.OperandValToReplace->getType();
3633 InsertSupplementalFormula(US, LU, LUIdx);
3634 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3650 unsigned Depth = 0) {
3657 for (
const SCEV *S :
Add->operands()) {
3663 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3672 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3674 Remainder =
nullptr;
3692 const SCEV *Remainder =
3705 LSRUse &LU,
const SCEV *S,
const Loop *L,
3707 if (LU.Kind != LSRUse::Address ||
3708 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3714 if (!isa<SCEVConstant>(LoopStep))
3720 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3727void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3728 const Formula &
Base,
3731 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3743 if (AddOps.
size() == 1)
3757 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3763 InnerAddOps.append(std::next(J),
3768 if (InnerAddOps.size() == 1 &&
3770 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3779 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3786 F.ScaledReg =
nullptr;
3788 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3789 }
else if (IsScaledReg)
3790 F.ScaledReg = InnerSum;
3792 F.BaseRegs[
Idx] = InnerSum;
3798 SC->getValue()->getZExtValue()))
3800 (
uint64_t)
F.UnfoldedOffset +
SC->getValue()->getZExtValue();
3802 F.BaseRegs.push_back(*J);
3807 if (InsertFormula(LU, LUIdx,
F))
3814 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3820void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3822 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
3827 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3828 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
3830 if (
Base.Scale == 1)
3831 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
3837void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3840 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
3841 (
Base.UnfoldedOffset != 0) <= 1)
3848 Formula NewBase =
Base;
3849 NewBase.BaseRegs.clear();
3850 Type *CombinedIntegerType =
nullptr;
3851 for (
const SCEV *BaseReg :
Base.BaseRegs) {
3854 if (!CombinedIntegerType)
3859 NewBase.BaseRegs.push_back(BaseReg);
3863 if (Ops.
size() == 0)
3868 auto GenerateFormula = [&](
const SCEV *Sum) {
3869 Formula
F = NewBase;
3877 F.BaseRegs.push_back(Sum);
3879 (void)InsertFormula(LU, LUIdx,
F);
3883 if (Ops.
size() > 1) {
3890 if (NewBase.UnfoldedOffset) {
3891 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
3894 NewBase.UnfoldedOffset = 0;
3900void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3901 const Formula &
Base,
size_t Idx,
3905 if (
G->isZero() || !GV)
3909 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
3914 F.BaseRegs[
Idx] =
G;
3915 (void)InsertFormula(LU, LUIdx,
F);
3919void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3922 if (
Base.BaseGV)
return;
3924 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3925 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
3926 if (
Base.Scale == 1)
3927 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
3932void LSRInstance::GenerateConstantOffsetsImpl(
3933 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
3936 auto GenerateOffset = [&](
const SCEV *
G, int64_t
Offset) {
3940 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
3947 F.ScaledReg =
nullptr;
3949 F.deleteBaseReg(
F.BaseRegs[
Idx]);
3951 }
else if (IsScaledReg)
3954 F.BaseRegs[
Idx] = NewG;
3956 (void)InsertFormula(LU, LUIdx,
F);
3971 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
3973 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
3974 const APInt &StepInt = StepRec->getAPInt();
3978 for (int64_t
Offset : Worklist) {
3985 for (int64_t
Offset : Worklist)
3989 if (
G->isZero() || Imm == 0)
3998 F.BaseRegs[
Idx] =
G;
4003 (void)InsertFormula(LU, LUIdx,
F);
4007void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4013 if (LU.MaxOffset != LU.MinOffset)
4016 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4017 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4018 if (
Base.Scale == 1)
4019 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4025void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4027 if (LU.Kind != LSRUse::ICmpZero)
return;
4035 if (LU.MinOffset != LU.MaxOffset)
return;
4038 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4040 for (
const SCEV *BaseReg :
Base.BaseRegs)
4043 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4046 for (int64_t Factor : Factors) {
4051 if (
Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
4053 int64_t NewBaseOffset = (
uint64_t)
Base.BaseOffset * Factor;
4054 assert(Factor != 0 &&
"Zero factor not expected!");
4055 if (NewBaseOffset / Factor !=
Base.BaseOffset)
4063 int64_t
Offset = LU.MinOffset;
4064 if (
Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
4067 if (
Offset / Factor != LU.MinOffset)
4075 F.BaseOffset = NewBaseOffset;
4087 for (
size_t i = 0, e =
F.BaseRegs.size(); i !=
e; ++i) {
4101 if (
F.UnfoldedOffset != 0) {
4102 if (
F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
4105 F.UnfoldedOffset = (
uint64_t)
F.UnfoldedOffset * Factor;
4106 if (
F.UnfoldedOffset / Factor !=
Base.UnfoldedOffset)
4115 (void)InsertFormula(LU, LUIdx,
F);
4122void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4129 if (
Base.Scale != 0 && !
Base.unscale())
4132 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4135 for (int64_t Factor : Factors) {
4136 Base.Scale = Factor;
4137 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4139 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4143 if (LU.Kind == LSRUse::Basic &&
4144 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4145 LU.AccessTy,
Base) &&
4146 LU.AllFixupsOutsideLoop)
4147 LU.Kind = LSRUse::Special;
4153 if (LU.Kind == LSRUse::ICmpZero &&
4154 !
Base.HasBaseReg &&
Base.BaseOffset == 0 && !
Base.BaseGV)
4157 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4159 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4166 if (!Quotient->isZero()) {
4169 F.ScaledReg = Quotient;
4170 F.deleteBaseReg(
F.BaseRegs[i]);
4174 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4175 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4179 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4181 (void)InsertFormula(LU, LUIdx,
F);
4197 const SCEV *Result =
nullptr;
4198 for (
auto &L :
Loops) {
4202 if (!New || (Result && New != Result))
4207 assert(Result &&
"failed to create expression");
4212void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4214 if (
Base.BaseGV)
return;
4224 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4227 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4231 for (
auto &LF : LU.Fixups)
4232 Loops.push_back(LF.PostIncLoops);
4234 for (
Type *SrcTy : Types) {
4243 const SCEV *NewScaledReg =
4245 if (!NewScaledReg || NewScaledReg->
isZero())
4247 F.ScaledReg = NewScaledReg;
4249 bool HasZeroBaseReg =
false;
4250 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4251 const SCEV *NewBaseReg =
4253 if (!NewBaseReg || NewBaseReg->
isZero()) {
4254 HasZeroBaseReg =
true;
4257 BaseReg = NewBaseReg;
4264 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4268 (void)InsertFormula(LU, LUIdx,
F);
4281 const SCEV *OrigReg;
4284 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4292#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4294 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4295 <<
" , add offset " <<
Imm;
4305void LSRInstance::GenerateCrossUseConstantOffsets() {
4307 using ImmMapTy = std::map<int64_t, const SCEV *>;
4312 for (
const SCEV *
Use : RegUses) {
4315 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4318 Pair.first->second.insert(std::make_pair(Imm,
Use));
4319 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4327 for (
const SCEV *Reg : Sequence) {
4328 const ImmMapTy &Imms =
Map.find(Reg)->second;
4331 if (Imms.size() == 1)
4334 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4335 for (
const auto &Entry
4337 <<
' ' <<
Entry.first;
4341 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4343 const SCEV *OrigReg = J->second;
4345 int64_t JImm = J->first;
4346 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4348 if (!isa<SCEVConstant>(OrigReg) &&
4349 UsedByIndicesMap[Reg].
count() == 1) {
4357 int64_t
First = Imms.begin()->first;
4358 int64_t
Last = std::prev(Imms.end())->first;
4365 ImmMapTy::const_iterator OtherImms[] = {
4366 Imms.begin(), std::prev(Imms.end()),
4367 Imms.lower_bound(Avg)};
4368 for (
const auto &M : OtherImms) {
4369 if (M == J || M == JE)
continue;
4375 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4383 UsedByIndicesMap.
clear();
4384 UniqueItems.
clear();
4387 for (
const WorkItem &WI : WorkItems) {
4388 size_t LUIdx = WI.LUIdx;
4389 LSRUse &LU =
Uses[LUIdx];
4390 int64_t
Imm = WI.Imm;
4391 const SCEV *OrigReg = WI.OrigReg;
4398 for (
size_t L = 0, LE = LU.Formulae.size();
L !=
LE; ++
L) {
4399 Formula
F = LU.Formulae[
L];
4406 if (
F.ScaledReg == OrigReg) {
4413 NewF.BaseOffset =
Offset;
4414 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4417 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4422 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
4423 if (
C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
4425 .ule(std::abs(NewF.BaseOffset)))
4429 NewF.canonicalize(*this->L);
4430 (void)InsertFormula(LU, LUIdx, NewF);
4433 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4434 const SCEV *BaseReg =
F.BaseRegs[
N];
4435 if (BaseReg != OrigReg)
4438 NewF.BaseOffset = (
uint64_t)NewF.BaseOffset + Imm;
4440 LU.Kind, LU.AccessTy, NewF)) {
4447 NewF.UnfoldedOffset = (
uint64_t)NewF.UnfoldedOffset + Imm;
4449 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4454 for (
const SCEV *NewReg : NewF.BaseRegs)
4456 if ((
C->getAPInt() + NewF.BaseOffset)
4458 .slt(std::abs(NewF.BaseOffset)) &&
4460 (
unsigned)llvm::countr_zero<uint64_t>(NewF.BaseOffset))
4464 NewF.canonicalize(*this->L);
4465 (void)InsertFormula(LU, LUIdx, NewF);
4476LSRInstance::GenerateAllReuseFormulae() {
4479 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4480 LSRUse &LU =
Uses[LUIdx];
4481 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4482 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4483 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4484 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4486 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4487 LSRUse &LU =
Uses[LUIdx];
4488 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4489 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4490 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4491 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4492 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4493 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4494 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4495 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4497 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4498 LSRUse &LU =
Uses[LUIdx];
4499 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4500 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4503 GenerateCrossUseConstantOffsets();
4506 "After generating reuse formulae:\n";
4507 print_uses(
dbgs()));
4512void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4517 bool ChangedFormulae =
false;
4522 using BestFormulaeTy =
4525 BestFormulaeTy BestFormulae;
4527 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4528 LSRUse &LU =
Uses[LUIdx];
4533 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4534 FIdx != NumForms; ++FIdx) {
4535 Formula &
F = LU.Formulae[FIdx];
4546 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4547 if (CostF.isLoser()) {
4559 for (
const SCEV *Reg :
F.BaseRegs) {
4560 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4564 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4565 Key.push_back(
F.ScaledReg);
4570 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4571 BestFormulae.insert(std::make_pair(Key, FIdx));
4575 Formula &Best = LU.Formulae[
P.first->second];
4577 Cost CostBest(L, SE,
TTI, AMK);
4579 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4580 if (CostF.isLess(CostBest))
4584 " in favor of formula ";
4585 Best.print(
dbgs());
dbgs() <<
'\n');
4588 ChangedFormulae =
true;
4590 LU.DeleteFormula(
F);
4598 LU.RecomputeRegs(LUIdx, RegUses);
4601 BestFormulae.clear();
4606 "After filtering out undesirable candidates:\n";
4614size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4616 for (
const LSRUse &LU :
Uses) {
4617 size_t FSize = LU.Formulae.size();
4632void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4636 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4637 "which use a superset of registers used by other "
4640 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4641 LSRUse &LU =
Uses[LUIdx];
4643 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4644 Formula &
F = LU.Formulae[i];
4649 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4654 NewF.BaseOffset += (
uint64_t)
C->getValue()->getSExtValue();
4655 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4656 (
I -
F.BaseRegs.begin()));
4657 if (LU.HasFormulaWithSameRegs(NewF)) {
4660 LU.DeleteFormula(
F);
4666 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4667 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4671 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4672 (
I -
F.BaseRegs.begin()));
4673 if (LU.HasFormulaWithSameRegs(NewF)) {
4676 LU.DeleteFormula(
F);
4687 LU.RecomputeRegs(LUIdx, RegUses);
4696void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4701 dbgs() <<
"The search space is too complex.\n"
4702 "Narrowing the search space by assuming that uses separated "
4703 "by a constant offset will use the same registers.\n");
4707 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4708 LSRUse &LU =
Uses[LUIdx];
4709 for (
const Formula &
F : LU.Formulae) {
4710 if (
F.BaseOffset == 0 || (
F.Scale != 0 &&
F.Scale != 1))
4713 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4717 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4718 LU.Kind, LU.AccessTy))
4723 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4726 for (LSRFixup &
Fixup : LU.Fixups) {
4727 Fixup.Offset +=
F.BaseOffset;
4728 LUThatHas->pushFixup(
Fixup);
4734 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4735 Formula &
F = LUThatHas->Formulae[i];
4736 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4737 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4739 LUThatHas->DeleteFormula(
F);
4747 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4750 DeleteUse(LU, LUIdx);
4763void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4767 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4768 "undesirable dedicated registers.\n");
4770 FilterOutUndesirableDedicatedRegisters();
4785void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4790 dbgs() <<
"The search space is too complex.\n"
4791 "Narrowing the search space by choosing the best Formula "
4792 "from the Formulae with the same Scale and ScaledReg.\n");
4797 BestFormulaeTy BestFormulae;
4799 bool ChangedFormulae =
false;
4804 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4805 LSRUse &LU =
Uses[LUIdx];
4810 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4815 size_t FARegNum = 0;
4816 for (
const SCEV *Reg : FA.BaseRegs) {
4817 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4818 FARegNum += (NumUses - UsedByIndices.
count() + 1);
4820 size_t FBRegNum = 0;
4821 for (
const SCEV *Reg : FB.BaseRegs) {
4822 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4823 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
4825 if (FARegNum != FBRegNum)
4826 return FARegNum < FBRegNum;
4833 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
4835 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
4836 return CostFA.isLess(CostFB);
4840 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4842 Formula &
F = LU.Formulae[FIdx];
4845 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
4849 Formula &Best = LU.Formulae[
P.first->second];
4850 if (IsBetterThan(
F, Best))
4854 " in favor of formula ";
4855 Best.print(
dbgs());
dbgs() <<
'\n');
4857 ChangedFormulae =
true;
4859 LU.DeleteFormula(
F);
4865 LU.RecomputeRegs(LUIdx, RegUses);
4868 BestFormulae.clear();
4873 "After filtering out undesirable candidates:\n";
4880void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
4887 "Narrowing the search space by choosing the lowest "
4888 "register Formula for PostInc Uses.\n");
4890 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4891 LSRUse &LU =
Uses[LUIdx];
4893 if (LU.Kind != LSRUse::Address)
4899 size_t MinRegs = std::numeric_limits<size_t>::max();
4900 for (
const Formula &
F : LU.Formulae)
4901 MinRegs = std::min(
F.getNumRegs(), MinRegs);
4904 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4906 Formula &
F = LU.Formulae[FIdx];
4907 if (
F.getNumRegs() > MinRegs) {
4910 LU.DeleteFormula(
F);
4917 LU.RecomputeRegs(LUIdx, RegUses);
4968void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4981 DenseMap <const SCEV *, float> RegNumMap;
4982 for (
const SCEV *Reg : RegUses) {
4983 if (UniqRegs.
count(Reg))
4986 for (
const LSRUse &LU :
Uses) {
4987 if (!LU.Regs.count(Reg))
4989 float P = LU.getNotSelectedProbability(Reg);
4995 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
4999 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5002 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5003 LSRUse &LU =
Uses[LUIdx];
5005 if (LU.Formulae.size() < 2)
5010 float FMinRegNum = LU.Formulae[0].getNumRegs();
5011 float FMinARegNum = LU.Formulae[0].getNumRegs();
5013 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5014 Formula &
F = LU.Formulae[i];
5017 for (
const SCEV *BaseReg :
F.BaseRegs) {
5018 if (UniqRegs.
count(BaseReg))
5020 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5021 if (isa<SCEVAddRecExpr>(BaseReg))
5023 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5025 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5026 if (!UniqRegs.
count(ScaledReg)) {
5028 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5029 if (isa<SCEVAddRecExpr>(ScaledReg))
5031 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5034 if (FMinRegNum > FRegNum ||
5035 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5036 FMinRegNum = FRegNum;
5037 FMinARegNum = FARegNum;
5042 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5044 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5045 while (LU.Formulae.size() != 1) {
5048 LU.Formulae.pop_back();
5050 LU.RecomputeRegs(LUIdx, RegUses);
5051 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5052 Formula &
F = LU.Formulae[0];
5055 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5068 MemAccessTy AccessType) {
5069 if (Best->
getType() != Reg->getType() ||
5070 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5071 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5072 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5074 const auto *Diff = dyn_cast<SCEVConstant>(SE.
getMinusSCEV(Best, Reg));
5079 AccessType.MemTy,
nullptr,
5080 Diff->getAPInt().getSExtValue(),
5081 true, 0, AccessType.AddrSpace) &&
5083 AccessType.MemTy,
nullptr,
5084 -Diff->getAPInt().getSExtValue(),
5085 true, 0, AccessType.AddrSpace);
5091void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5102 const SCEV *Best =
nullptr;
5103 unsigned BestNum = 0;
5104 for (
const SCEV *Reg : RegUses) {
5105 if (Taken.
count(Reg))
5109 BestNum = RegUses.getUsedByIndices(Reg).count();
5111 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5112 if (Count > BestNum) {
5120 if (Count == BestNum) {
5121 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5122 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5124 Uses[LUIdx].AccessTy)) {
5131 assert(Best &&
"Failed to find best LSRUse candidate");
5133 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5134 <<
" will yield profitable reuse.\n");
5139 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5140 LSRUse &LU =
Uses[LUIdx];
5141 if (!LU.Regs.count(Best))
continue;
5144 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5145 Formula &
F = LU.Formulae[i];
5146 if (!
F.referencesReg(Best)) {
5148 LU.DeleteFormula(
F);
5152 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5158 LU.RecomputeRegs(LUIdx, RegUses);
5169void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5170 NarrowSearchSpaceByDetectingSupersets();
5171 NarrowSearchSpaceByCollapsingUnrolledCode();
5172 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5174 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5175 NarrowSearchSpaceByFilterPostInc();
5177 NarrowSearchSpaceByDeletingCostlyFormulas();
5179 NarrowSearchSpaceByPickingWinnerRegs();
5186 const Cost &CurCost,
5199 const LSRUse &LU =
Uses[Workspace.
size()];
5206 for (
const SCEV *S : CurRegs)
5207 if (LU.Regs.count(S))
5211 Cost NewCost(L, SE,
TTI, AMK);
5212 for (
const Formula &
F : LU.Formulae) {
5220 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5221 for (
const SCEV *Reg : ReqRegs) {
5222 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5225 if (NumReqRegsToFind == 0)
5229 if (NumReqRegsToFind != 0) {
5240 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5241 if (NewCost.isLess(SolutionCost)) {
5243 if (Workspace.
size() !=
Uses.size()) {
5244 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5245 NewRegs, VisitedRegs);
5246 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5247 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5250 dbgs() <<
".\nRegs:\n";
5251 for (
const SCEV *S : NewRegs)
dbgs()
5252 <<
"- " << *S <<
"\n";
5255 SolutionCost = NewCost;
5256 Solution = Workspace;
5267 Cost SolutionCost(L, SE,
TTI, AMK);
5268 SolutionCost.Lose();
5269 Cost CurCost(L, SE,
TTI, AMK);
5275 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5276 CurRegs, VisitedRegs);
5277 if (Solution.
empty()) {
5284 "The chosen solution requires ";
5285 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5286 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5291 Solution[i]->print(
dbgs());
5297 const bool EnableDropUnprofitableSolution = [&] {
5309 if (BaselineCost.isLess(SolutionCost)) {
5310 if (!EnableDropUnprofitableSolution)
5312 dbgs() <<
"Baseline is more profitable than chosen solution, "
5313 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5316 "solution, dropping LSR solution.\n";);
5331 bool AllDominate =
true;
5335 if (isa<CatchSwitchInst>(Tentative))
5339 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5340 AllDominate =
false;
5345 if (Tentative->
getParent() == Inst->getParent() &&
5346 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5356 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5357 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5360 for (
DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
5361 if (!Rung)
return IP;
5362 Rung = Rung->getIDom();
5363 if (!Rung)
return IP;
5364 IDom = Rung->getBlock();
5367 const Loop *IDomLoop = LI.getLoopFor(IDom);
5368 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5369 if (IDomDepth <= IPLoopDepth &&
5370 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5388 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5390 if (LU.Kind == LSRUse::ICmpZero)
5392 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5394 if (LF.PostIncLoops.count(L)) {
5395 if (LF.isUseFullyOutsideLoop(L))
5396 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5402 for (
const Loop *PIL : LF.PostIncLoops) {
5403 if (PIL == L)
continue;
5408 if (!ExitingBlocks.
empty()) {
5410 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5411 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5416 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5417 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5418 "Insertion point must be a normal instruction");
5425 while (isa<PHINode>(IP)) ++IP;
5428 while (IP->isEHPad()) ++IP;
5431 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5436 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5444Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5447 if (LU.RigidFormula)
5448 return LF.OperandValToReplace;
5452 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5457 Rewriter.setPostInc(LF.PostIncLoops);
5460 Type *OpTy = LF.OperandValToReplace->getType();
5462 Type *Ty =
F.getType();
5476 for (
const SCEV *Reg :
F.BaseRegs) {
5477 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5485 Value *ICmpScaledV =
nullptr;
5487 const SCEV *ScaledS =
F.ScaledReg;
5493 if (LU.Kind == LSRUse::ICmpZero) {
5503 "The only scale supported by ICmpZero uses is -1!");
5504 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5512 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5548 if (LU.Kind == LSRUse::ICmpZero) {
5555 ICmpScaledV = ConstantInt::get(IntTy,
Offset);
5565 int64_t UnfoldedOffset =
F.UnfoldedOffset;
5566 if (UnfoldedOffset != 0) {
5584 if (LU.Kind == LSRUse::ICmpZero) {
5585 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5586 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5588 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5589 "a scale at the same time!");
5590 if (
F.Scale == -1) {
5591 if (ICmpScaledV->
getType() != OpTy) {
5601 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5602 "ICmp does not support folding a global value and "
5603 "a scale at the same time!");
5606 if (
C->getType() != OpTy) {
5610 assert(
C &&
"Cast of ConstantInt should have folded");
5623void LSRInstance::RewriteForPHI(
5624 PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
5636 bool needUpdateFixups =
false;
5647 Loop *PNLoop = LI.getLoopFor(Parent);
5648 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5655 .setMergeIdenticalEdges()
5656 .setKeepOneInputPHIs());
5670 if (
L->contains(BB) && !
L->contains(PN))
5678 needUpdateFixups =
true;
5683 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5684 Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5692 Type *OpTy = LF.OperandValToReplace->getType();
5696 LF.OperandValToReplace->getType(),
"tmp",
5702 if (
auto *
I = dyn_cast<Instruction>(FullV))
5703 if (
L->contains(
I) && !
L->contains(BB))
5707 Pair.first->second = FullV;
5714 if (needUpdateFixups) {
5715 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
5716 for (LSRFixup &
Fixup :
Uses[LUIdx].Fixups)
5720 if (
Fixup.UserInst == PN) {
5723 bool foundInOriginalPHI =
false;
5725 if (val ==
Fixup.OperandValToReplace) {
5726 foundInOriginalPHI =
true;
5731 if (foundInOriginalPHI)
5742 if (val ==
Fixup.OperandValToReplace)
5743 Fixup.UserInst = NewPN;
5755void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5760 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5761 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
5763 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
5766 Type *OpTy = LF.OperandValToReplace->getType();
5767 if (FullV->
getType() != OpTy) {
5770 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
5779 if (LU.Kind == LSRUse::ICmpZero)
5782 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5785 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
5795 if (LU.Kind != LSRUse::Address)
5802 if (IVIncInsertPos->
getParent() == LHeader)
5805 if (!
Fixup.OperandValToReplace ||
5807 Instruction *UI = cast<Instruction>(U);
5808 return UI->getParent() != LHeader;
5813 Type *Ty =
I->getType();
5821void LSRInstance::ImplementSolution(
5828 for (
const IVChain &Chain : IVChainVec) {
5829 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
5834 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
5835 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
5838 ?
L->getHeader()->getTerminator()
5840 Rewriter.setIVIncInsertPos(L, InsertPos);
5841 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
5845 for (
const IVChain &Chain : IVChainVec) {
5846 GenerateIVChain(Chain, DeadInsts);
5852 ScalarEvolutionIVs.push_back(
IV);
5868 for (
PHINode &PN :
L->getHeader()->phis()) {
5870 Value *Start =
nullptr, *Step =
nullptr;
5875 case Instruction::Sub:
5880 case Instruction::Add:
5886 if (!isa<Constant>(Step))
5891 if (BO->
getParent() == IVIncInsertPos->getParent())
5897 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
5910 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
5913 :
TTI.getPreferredAddressingMode(
L, &SE)),
5915 BaselineCost(
L, SE,
TTI, AMK) {
5917 if (!
L->isLoopSimplifyForm())
5921 if (IU.
empty())
return;
5925 unsigned NumUsers = 0;
5929 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
5936 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
5937 auto *FirstNonPHI = PN->
getParent()->getFirstNonPHI();
5938 if (isa<FuncletPadInst>(FirstNonPHI) ||
5939 isa<CatchSwitchInst>(FirstNonPHI))
5941 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
5947 L->getHeader()->printAsOperand(
dbgs(),
false);
5960 OptimizeLoopTermCond();
5963 if (IU.empty())
return;
5966 if (!
L->isInnermost()) {
5979 CollectInterestingTypesAndFactors();
5980 CollectFixupsAndInitialFormulae();
5981 CollectLoopInvariantFixupsAndFormulae();
5987 print_uses(
dbgs()));
5989 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
5993 GenerateAllReuseFormulae();
5995 FilterOutUndesirableDedicatedRegisters();
5996 NarrowSearchSpaceUsingHeuristics();
6006 if (Solution.
empty())
6011 for (
const LSRUse &LU :
Uses) {
6012 for (
const Formula &
F : LU.Formulae)
6014 F) &&
"Illegal formula generated!");
6019 ImplementSolution(Solution);
6022#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6023void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
6024 if (Factors.empty() &&
Types.empty())
return;
6026 OS <<
"LSR has identified the following interesting factors and types: ";
6029 for (int64_t Factor : Factors) {
6032 OS <<
'*' << Factor;
6035 for (
Type *Ty : Types) {
6038 OS <<
'(' << *Ty <<
')';
6044 OS <<
"LSR is examining the following fixup sites:\n";
6045 for (
const LSRUse &LU :
Uses)
6046 for (
const LSRFixup &LF : LU.Fixups) {
6054 OS <<
"LSR is examining the following uses:\n";
6055 for (
const LSRUse &LU :
Uses) {
6059 for (
const Formula &
F : LU.Formulae) {
6068 print_factors_and_types(
OS);
6080class LoopStrengthReduce :
public LoopPass {
6084 LoopStrengthReduce();
6093LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6097void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6129 return {Begin,
End};
6132struct SCEVDbgValueBuilder {
6133 SCEVDbgValueBuilder() =
default;
6134 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6136 void clone(
const SCEVDbgValueBuilder &
Base) {
6137 LocationOps =
Base.LocationOps;
6142 LocationOps.clear();
6159 unsigned ArgIndex = 0;
6160 if (It != LocationOps.
end()) {
6161 ArgIndex = std::distance(LocationOps.
begin(), It);
6163 ArgIndex = LocationOps.
size();
6175 if (
C->getAPInt().getSignificantBits() > 64)
6177 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6178 Expr.
push_back(
C->getAPInt().getSExtValue());
6185 return ToDwarfOpIter(Expr);
6192 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6193 "Expected arithmetic SCEV type");
6195 unsigned EmitOperator = 0;
6196 for (
const auto &
Op : CommExpr->
operands()) {
6199 if (EmitOperator >= 1)
6200 pushOperator(DwarfOp);
6211 bool Success = pushSCEV(Inner);
6213 IsSigned ? llvm::dwarf::DW_ATE_signed
6214 : llvm::dwarf::DW_ATE_unsigned};
6215 for (
const auto &
Op : CastOps)
6223 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6224 Success &= pushConst(StartInt);
6226 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6229 pushLocation(
U->getValue());
6231 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6232 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6234 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6235 Success &= pushSCEV(UDiv->getLHS());
6236 Success &= pushSCEV(UDiv->getRHS());
6237 pushOperator(llvm::dwarf::DW_OP_div);
6239 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6241 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6242 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6243 "Unexpected cast type in SCEV.");
6244 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6246 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6247 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6249 }
else if (isa<SCEVAddRecExpr>(S)) {
6264 if (
C->getAPInt().getSignificantBits() > 64)
6266 int64_t
I =
C->getAPInt().getSExtValue();
6268 case llvm::dwarf::DW_OP_plus:
6269 case llvm::dwarf::DW_OP_minus:
6271 case llvm::dwarf::DW_OP_mul:
6272 case llvm::dwarf::DW_OP_div:
6288 if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6295 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6296 if (!pushSCEV(Stride))
6298 pushOperator(llvm::dwarf::DW_OP_mul);
6300 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6301 if (!pushSCEV(Start))
6303 pushOperator(llvm::dwarf::DW_OP_plus);
6309 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6310 pushLocation(OffsetValue);
6313 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6314 << std::to_string(
Offset) <<
"\n");
6320 bool createIterCountExpr(
const SCEV *S,
6321 const SCEVDbgValueBuilder &IterationCount,
6328 if (!isa<SCEVAddRecExpr>(S))
6331 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6334 const auto *Rec = cast<SCEVAddRecExpr>(S);
6335 if (!Rec->isAffine())
6343 clone(IterationCount);
6344 if (!SCEVToValueExpr(*Rec, SE))
6358 if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6359 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6367 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6368 if (!pushSCEV(Start))
6370 pushOperator(llvm::dwarf::DW_OP_minus);
6372 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6373 if (!pushSCEV(Stride))
6375 pushOperator(llvm::dwarf::DW_OP_div);
6386 "Expected the locations vector to contain the IV");
6391 "Expected the location ops to contain the IV.");
6395 for (
const auto &
Op : LocationOps) {
6396 auto It =
find(DestLocations,
Op);
6397 if (It != DestLocations.
end()) {
6399 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6407 for (
const auto &
Op : expr_ops()) {
6409 Op.appendToVector(DestExpr);
6416 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6424struct DVIRecoveryRec {
6427 HadLocationArgList(
false) {}
6429 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6433 bool HadLocationArgList;
6439 for (
auto &RE : RecoveryExprs)
6441 RecoveryExprs.clear();
6444 ~DVIRecoveryRec() {
clear(); }
6452 auto expr_ops = ToDwarfOpIter(Expr);
6454 for (
auto Op : expr_ops)
6463template <
typename T>
6467 "contain any DW_OP_llvm_arg operands.");
6469 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6470 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6474template <
typename T>
6479 "Expected expression that references DIArglist locations using "
6480 "DW_OP_llvm_arg operands.");
6482 for (
Value *V : Locations)
6486 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6497 auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6499 if (NumLLVMArgs == 0) {
6506 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6518 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6521 DbgVal->setExpression(SalvageExpr);
6524 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6525 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6527 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6541 auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6542 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6543 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6544 assert(DVIRec.Expr &&
"Expected an expression");
6545 DbgVal->setExpression(DVIRec.Expr);
6549 if (!DVIRec.HadLocationArgList) {
6550 assert(DVIRec.LocationOps.size() == 1 &&
6551 "Unexpected number of location ops.");
6555 Value *CachedValue =
6560 for (
WeakVH VH : DVIRec.LocationOps) {
6565 DbgVal->setRawLocation(
6568 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6570 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6571 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6573 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6578 const SCEV *SCEVInductionVar,
6579 SCEVDbgValueBuilder IterCountExpr) {
6581 if (isa<DbgValueInst *>(DVIRec.DbgRef)
6582 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6583 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6595 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6597 NewLocationOps.
push_back(LSRInductionVar);
6599 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6600 WeakVH VH = DVIRec.LocationOps[i];
6604 if (VH && !isa<UndefValue>(VH)) {
6606 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6608 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6616 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6617 <<
" refers to a location that is now undef or erased. "
6618 "Salvage abandoned.\n");
6622 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6623 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6625 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6626 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6630 if (std::optional<APInt>
Offset =
6632 if (
Offset->getSignificantBits() <= 64)
6633 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6634 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6642 if (DVIRec.Expr->getNumElements() == 0) {
6643 assert(DVIRec.RecoveryExprs.size() == 1 &&
6644 "Expected only a single recovery expression for an empty "
6646 assert(DVIRec.RecoveryExprs[0] &&
6647 "Expected a SCEVDbgSalvageBuilder for location 0");
6648 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6649 B->appendToVectors(
NewExpr, NewLocationOps);
6651 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6659 SCEVDbgValueBuilder *DbgBuilder =
6660 DVIRec.RecoveryExprs[LocationArgIndex].get();
6666 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6667 "Expected a positive index for the location-op position.");
6668 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6672 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6676 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6678 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6681 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6689 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6690 if (DVIToUpdate.empty())
6694 assert(SCEVInductionVar &&
6695 "Anticipated a SCEV for the post-LSR induction variable");
6698 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6699 if (!IVAddRec->isAffine())
6707 SCEVDbgValueBuilder IterCountExpr;
6708 IterCountExpr.pushLocation(LSRInductionVar);
6709 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6712 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6715 for (
auto &DVIRec : DVIToUpdate) {
6716 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6727 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6729 for (
const auto &
B : L->getBlocks()) {
6730 for (
auto &
I : *
B) {
6731 auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6734 if (DbgVal->isKillLocation())
6739 const auto &HasTranslatableLocationOps =
6740 [&](
const auto *DbgValToTranslate) ->
bool {
6741 for (
const auto LocOp : DbgValToTranslate->location_ops()) {
6755 if (!HasTranslatableLocationOps(DbgVal))
6758 std::unique_ptr<DVIRecoveryRec> NewRec =
6759 std::make_unique<DVIRecoveryRec>(DbgVal);
6763 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
6764 for (
const auto LocOp : DbgVal->location_ops()) {
6765 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6766 NewRec->LocationOps.push_back(LocOp);
6767 NewRec->HadLocationArgList = DbgVal->hasArgList();
6769 SalvageableDVISCEVs.push_back(std::move(NewRec));
6773 if (DVR.isDbgValue() || DVR.isDbgAssign())
6774 ProcessDbgValue(&DVR);
6776 auto DVI = dyn_cast<DbgValueInst>(&
I);
6779 if (ProcessDbgValue(DVI))
6780 DVIHandles.insert(DVI);
6789 const LSRInstance &LSR) {
6791 auto IsSuitableIV = [&](
PHINode *
P) {
6802 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
6809 if (IsSuitableIV(
P))
6813 for (
PHINode &
P : L.getHeader()->phis()) {
6814 if (IsSuitableIV(&
P))
6820static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
6823 if (!L->isInnermost()) {
6825 return std::nullopt;
6828 if (!L->isLoopSimplifyForm()) {
6830 return std::nullopt;
6834 LLVM_DEBUG(
dbgs() <<
"Cannot fold on backedge that is loop variant\n");
6835 return std::nullopt;
6841 return std::nullopt;
6842 auto *TermCond = dyn_cast<ICmpInst>(BI->
getCondition());
6845 dbgs() <<
"Cannot fold on branching condition that is not an ICmpInst");
6846 return std::nullopt;
6848 if (!TermCond->hasOneUse()) {
6851 <<
"Cannot replace terminating condition with more than one use\n");
6852 return std::nullopt;
6856 Value *
RHS = TermCond->getOperand(1);
6857 if (!
LHS || !L->isLoopInvariant(
RHS))
6860 return std::nullopt;
6864 Value *ToFoldStart, *ToFoldStep;
6866 return std::nullopt;
6869 if (ToFold->
getParent() != L->getHeader())
6870 return std::nullopt;
6875 return std::nullopt;
6879 const unsigned ExpansionBudget = [&]() {
6882 return std::min(Budget, SmallTC);
6884 return std::min(Budget, *SmallTC);
6890 const DataLayout &
DL = L->getHeader()->getDataLayout();
6893 PHINode *ToHelpFold =
nullptr;
6894 const SCEV *TermValueS =
nullptr;
6895 bool MustDropPoison =
false;
6896 auto InsertPt = L->getLoopPreheader()->getTerminator();
6897 for (
PHINode &PN : L->getHeader()->phis()) {
6903 <<
"' is not SCEV-able, not qualified for the "
6904 "terminating condition folding.\n");
6909 if (!AddRec || !AddRec->
isAffine()) {
6911 <<
"' is not an affine add recursion, not qualified "
6912 "for the terminating condition folding.\n");
6930 const SCEV *TermValueSLocal =
PostInc->evaluateAtIteration(BECount, SE);
6933 dbgs() <<
"Is not safe to expand terminating value for phi node" << PN
6941 dbgs() <<
"Is too expensive to expand terminating value for phi node"
6958 bool MustDropPoisonLocal =
false;
6980 TermValueS = TermValueSLocal;
6981 MustDropPoison = MustDropPoisonLocal;
6985 <<
"Cannot find other AddRec IV to help folding\n";);
6988 <<
"\nFound loop that can fold terminating condition\n"
6990 <<
" TermCond: " << *TermCond <<
"\n"
6991 <<
" BrandInst: " << *BI <<
"\n"
6992 <<
" ToFold: " << *ToFold <<
"\n"
6993 <<
" ToHelpFold: " << *ToHelpFold <<
"\n");
6995 if (!ToFold || !ToHelpFold)
6996 return std::nullopt;
6997 return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
7012 bool Changed =
false;
7013 std::unique_ptr<MemorySSAUpdater> MSSAU;
7015 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7018 const LSRInstance &Reducer =
7019 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7020 Changed |= Reducer.getChanged();
7026 const DataLayout &
DL = L->getHeader()->getDataLayout();
7031 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7045 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7047 const DataLayout &
DL = L->getHeader()->getDataLayout();
7060 const bool EnableFormTerm = [&] {
7072 if (EnableFormTerm) {
7074 auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7079 BasicBlock *LoopPreheader = L->getLoopPreheader();
7085 <<
"New term-cond phi-node:\n"
7086 << *ToHelpFold <<
"\n");
7088 Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
7090 Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
7094 cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
7097 const DataLayout &
DL = L->getHeader()->getDataLayout();
7101 "Terminating value was checked safe in canFoldTerminatingCondition");
7108 << *StartValue <<
"\n"
7109 <<
"Terminating value of new term-cond phi-node:\n"
7110 << *TermValue <<
"\n");
7116 Value *NewTermCond =
7118 "lsr_fold_term_cond.replaced_term_cond");
7124 << *OldTermCond <<
"\n"
7125 <<
"New term-cond:\n" << *NewTermCond <<
"\n");
7135 if (SalvageableDVIRecords.
empty())
7141 for (
const auto &L : LI) {
7145 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7146 "could not be identified.\n");
7150 for (
auto &Rec : SalvageableDVIRecords)
7152 SalvageableDVIRecords.
clear();
7161 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7162 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7163 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7164 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7165 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7166 *
L->getHeader()->getParent());
7167 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7168 *
L->getHeader()->getParent());
7169 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7170 *
L->getHeader()->getParent());
7171 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7174 MSSA = &MSSAAnalysis->getMSSA();
7191char LoopStrengthReduce::ID = 0;
7194 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
iv Induction Variable Users
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static std::optional< std::tuple< PHINode *, PHINode *, const SCEV *, bool > > canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, const LoopInfo &LI, const TargetTransformInfo &TTI)
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static cl::opt< cl::boolOrDefault > AllowTerminatingConditionFoldingAfterLSR("lsr-term-fold", cl::Hidden, cl::desc("Attempt to replace primary IV with other IV."))
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t ScalableOffset=0)
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This defines the Use class.
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool hasPoisonGeneratingFlags() const LLVM_READONLY
Return true if this operator has flags which may cause this instruction to evaluate to poison despite...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSelfWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int64_t getFixed() const
Returns the fixed component of the stack.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.