83#include "llvm/Config/llvm-config.h"
132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
174 "Don't prefer any addressing mode"),
177 "Prefer pre-indexed addressing mode"),
180 "Prefer post-indexed addressing mode")));
184 cl::init(std::numeric_limits<uint16_t>::max()),
185 cl::desc(
"LSR search space complexity limit"));
189 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
193 cl::desc(
"Attempt to replace primary IV with other IV."));
197 cl::desc(
"Attempt to drop solution if it is less profitable"));
200 "Number of terminating condition fold recognized and performed");
206 cl::desc(
"Stress test LSR IV chains"));
215 static const unsigned UnknownAddressSpace =
216 std::numeric_limits<unsigned>::max();
218 Type *MemTy =
nullptr;
219 unsigned AddrSpace = UnknownAddressSpace;
221 MemAccessTy() =
default;
222 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
225 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
231 unsigned AS = UnknownAddressSpace) {
251#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
253 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
267 RegUsesTy RegUsesMap;
271 void countRegister(
const SCEV *Reg,
size_t LUIdx);
272 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
273 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
275 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
293RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
294 std::pair<RegUsesTy::iterator, bool> Pair =
295 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
296 RegSortData &RSD = Pair.first->second;
299 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
300 RSD.UsedByIndices.set(LUIdx);
304RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
305 RegUsesTy::iterator It = RegUsesMap.find(Reg);
306 assert(It != RegUsesMap.end());
307 RegSortData &RSD = It->second;
308 assert(RSD.UsedByIndices.size() > LUIdx);
309 RSD.UsedByIndices.reset(LUIdx);
313RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
314 assert(LUIdx <= LastLUIdx);
318 for (
auto &Pair : RegUsesMap) {
320 if (LUIdx < UsedByIndices.
size())
321 UsedByIndices[LUIdx] =
322 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
323 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
328RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
329 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
330 if (
I == RegUsesMap.end())
334 if (i == -1)
return false;
335 if ((
size_t)i != LUIdx)
return true;
340 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
341 assert(
I != RegUsesMap.end() &&
"Unknown register!");
342 return I->second.UsedByIndices;
345void RegUseTracker::clear() {
359 int64_t BaseOffset = 0;
362 bool HasBaseReg =
false;
385 const SCEV *ScaledReg =
nullptr;
390 int64_t UnfoldedOffset = 0;
398 void canonicalize(
const Loop &L);
402 bool hasZeroEnd()
const;
404 size_t getNumRegs()
const;
407 void deleteBaseReg(
const SCEV *&S);
409 bool referencesReg(
const SCEV *S)
const;
410 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
411 const RegUseTracker &RegUses)
const;
432 for (
const SCEV *S :
Add->operands())
439 if (!AR->getStart()->isZero() && AR->isAffine()) {
442 AR->getStepRecurrence(SE),
458 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
460 for (
const SCEV *S : MyGood)
462 for (
const SCEV *S : MyBad)
481 BaseRegs.push_back(Sum);
487 BaseRegs.push_back(Sum);
495 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
502bool Formula::isCanonical(
const Loop &L)
const {
504 return BaseRegs.size() <= 1;
509 if (Scale == 1 && BaseRegs.empty())
529void Formula::canonicalize(
const Loop &L) {
533 if (BaseRegs.empty()) {
535 assert(ScaledReg &&
"Expected 1*reg => reg");
536 assert(Scale == 1 &&
"Expected 1*reg => reg");
537 BaseRegs.push_back(ScaledReg);
545 ScaledReg = BaseRegs.pop_back_val();
556 if (
I != BaseRegs.end())
566bool Formula::unscale() {
570 BaseRegs.push_back(ScaledReg);
575bool Formula::hasZeroEnd()
const {
576 if (UnfoldedOffset || BaseOffset)
578 if (BaseRegs.size() != 1 || ScaledReg)
585size_t Formula::getNumRegs()
const {
586 return !!ScaledReg + BaseRegs.size();
591Type *Formula::getType()
const {
592 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
593 ScaledReg ? ScaledReg->getType() :
594 BaseGV ? BaseGV->getType() :
599void Formula::deleteBaseReg(
const SCEV *&S) {
600 if (&S != &BaseRegs.back())
606bool Formula::referencesReg(
const SCEV *S)
const {
612bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
613 const RegUseTracker &RegUses)
const {
615 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
617 for (
const SCEV *BaseReg : BaseRegs)
618 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
623#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
628 BaseGV->printAsOperand(
OS,
false);
630 if (BaseOffset != 0) {
634 for (
const SCEV *BaseReg : BaseRegs) {
636 OS <<
"reg(" << *BaseReg <<
')';
638 if (HasBaseReg && BaseRegs.empty()) {
640 OS <<
"**error: HasBaseReg**";
641 }
else if (!HasBaseReg && !BaseRegs.empty()) {
643 OS <<
"**error: !HasBaseReg**";
647 OS << Scale <<
"*reg(";
654 if (UnfoldedOffset != 0) {
656 OS <<
"imm(" << UnfoldedOffset <<
')';
697 bool IgnoreSignificantBits =
false) {
708 if (
RA.isAllOnes()) {
722 const APInt &LA =
C->getAPInt();
731 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
733 IgnoreSignificantBits);
734 if (!Step)
return nullptr;
736 IgnoreSignificantBits);
737 if (!Start)
return nullptr;
750 for (
const SCEV *S :
Add->operands()) {
752 if (!
Op)
return nullptr;
768 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
783 IgnoreSignificantBits)) {
802 if (
C->getAPInt().getSignificantBits() <= 64) {
804 return C->getValue()->getSExtValue();
812 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
827 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
828 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
838 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
854 bool isAddress = isa<LoadInst>(Inst);
855 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
856 if (SI->getPointerOperand() == OperandVal)
858 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
861 switch (II->getIntrinsicID()) {
862 case Intrinsic::memset:
863 case Intrinsic::prefetch:
864 case Intrinsic::masked_load:
865 if (II->getArgOperand(0) == OperandVal)
868 case Intrinsic::masked_store:
869 if (II->getArgOperand(1) == OperandVal)
872 case Intrinsic::memmove:
873 case Intrinsic::memcpy:
874 if (II->getArgOperand(0) == OperandVal ||
875 II->getArgOperand(1) == OperandVal)
881 if (IntrInfo.
PtrVal == OperandVal)
886 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
887 if (RMW->getPointerOperand() == OperandVal)
890 if (CmpX->getPointerOperand() == OperandVal)
899 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
906 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
907 AccessTy.AddrSpace = SI->getPointerAddressSpace();
908 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
909 AccessTy.AddrSpace = LI->getPointerAddressSpace();
910 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
911 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
913 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
914 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
915 switch (II->getIntrinsicID()) {
916 case Intrinsic::prefetch:
917 case Intrinsic::memset:
918 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
919 AccessTy.MemTy = OperandVal->
getType();
921 case Intrinsic::memmove:
922 case Intrinsic::memcpy:
924 AccessTy.MemTy = OperandVal->
getType();
926 case Intrinsic::masked_load:
930 case Intrinsic::masked_store:
932 II->getArgOperand(1)->getType()->getPointerAddressSpace();
992 if (!Processed.
insert(S).second)
996 for (
const SCEV *S :
Add->operands()) {
1012 Value *UVal = U->getValue();
1016 if (UI && UI->
getOpcode() == Instruction::Mul &&
1050 const LSRUse &LU,
const Formula &
F);
1054 const LSRUse &LU,
const Formula &
F,
1061 const Loop *
L =
nullptr;
1071 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1089 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1090 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1091 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1092 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1098 return C.NumRegs == ~0
u;
1101 void RateFormula(
const Formula &
F,
1111 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1113 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1126 Value *OperandValToReplace =
nullptr;
1138 LSRFixup() =
default;
1140 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1148struct UniquifierDenseMapInfo {
1151 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1157 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1193 MemAccessTy AccessTy;
1199 int64_t MinOffset = std::numeric_limits<int64_t>::max();
1200 int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1204 bool AllFixupsOutsideLoop =
true;
1211 bool RigidFormula =
false;
1217 Type *WidestFixupType =
nullptr;
1227 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1229 LSRFixup &getNewFixup() {
1230 Fixups.push_back(LSRFixup());
1234 void pushFixup(LSRFixup &f) {
1236 if (
f.Offset > MaxOffset)
1237 MaxOffset =
f.Offset;
1238 if (
f.Offset < MinOffset)
1239 MinOffset =
f.Offset;
1242 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1243 float getNotSelectedProbability(
const SCEV *Reg)
const;
1244 bool InsertFormula(
const Formula &
F,
const Loop &L);
1245 void DeleteFormula(Formula &
F);
1246 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1255 LSRUse::KindType Kind, MemAccessTy AccessTy,
1257 bool HasBaseReg, int64_t Scale,
1261 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1265 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1267 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1269 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1271 [&](
unsigned i,
const SCEV *Reg) {
1272 return i + getSetupCost(Reg, Depth - 1);
1274 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1281void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1287 if (AR->getLoop() != L) {
1294 if (!AR->getLoop()->contains(L)) {
1304 unsigned LoopCost = 1;
1311 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1312 if (Step->getAPInt() ==
F.BaseOffset)
1315 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1316 if (isa<SCEVConstant>(LoopStep)) {
1317 const SCEV *LoopStart = AR->getStart();
1318 if (!isa<SCEVConstant>(LoopStart) &&
1324 C.AddRecCost += LoopCost;
1328 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1329 if (!Regs.
count(AR->getOperand(1))) {
1330 RateRegister(
F, AR->getOperand(1), Regs);
1342 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1344 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1351void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1354 if (LoserRegs && LoserRegs->
count(Reg)) {
1358 if (Regs.
insert(Reg).second) {
1359 RateRegister(
F, Reg, Regs);
1360 if (LoserRegs && isLoser())
1365void Cost::RateFormula(
const Formula &
F,
1372 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1374 unsigned PrevAddRecCost =
C.AddRecCost;
1375 unsigned PrevNumRegs =
C.NumRegs;
1376 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1377 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1378 if (VisitedRegs.
count(ScaledReg)) {
1382 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1386 for (
const SCEV *BaseReg :
F.BaseRegs) {
1387 if (VisitedRegs.
count(BaseReg)) {
1391 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1397 size_t NumBaseParts =
F.getNumRegs();
1398 if (NumBaseParts > 1)
1403 C.NumBaseAdds += (
F.UnfoldedOffset != 0);
1409 for (
const LSRFixup &
Fixup : LU.Fixups) {
1410 int64_t
O =
Fixup.Offset;
1420 if (LU.Kind == LSRUse::Address &&
Offset != 0 &&
1437 if (
C.NumRegs > TTIRegNum) {
1440 if (PrevNumRegs > TTIRegNum)
1441 C.Insns += (
C.NumRegs - PrevNumRegs);
1443 C.Insns += (
C.NumRegs - TTIRegNum);
1456 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1460 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1463 if (LU.Kind != LSRUse::ICmpZero)
1464 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1470 C.Insns = std::numeric_limits<unsigned>::max();
1471 C.NumRegs = std::numeric_limits<unsigned>::max();
1472 C.AddRecCost = std::numeric_limits<unsigned>::max();
1473 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1474 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1475 C.ImmCost = std::numeric_limits<unsigned>::max();
1476 C.SetupCost = std::numeric_limits<unsigned>::max();
1477 C.ScaleCost = std::numeric_limits<unsigned>::max();
1481bool Cost::isLess(
const Cost &
Other)
const {
1483 C.Insns !=
Other.C.Insns)
1484 return C.Insns <
Other.C.Insns;
1488#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1491 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1492 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1493 if (
C.AddRecCost != 0)
1494 OS <<
", with addrec cost " <<
C.AddRecCost;
1495 if (
C.NumIVMuls != 0)
1496 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1497 << (
C.NumIVMuls == 1 ?
"" :
"s");
1498 if (
C.NumBaseAdds != 0)
1499 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1500 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1501 if (
C.ScaleCost != 0)
1502 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1504 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1505 if (
C.SetupCost != 0)
1506 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1515bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1517 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1518 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1519 if (PN->getIncomingValue(i) == OperandValToReplace &&
1520 L->contains(PN->getIncomingBlock(i)))
1525 return !
L->contains(UserInst);
1528#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1532 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1534 Store->getOperand(0)->printAsOperand(
OS,
false);
1535 }
else if (UserInst->getType()->isVoidTy())
1536 OS << UserInst->getOpcodeName();
1538 UserInst->printAsOperand(
OS,
false);
1540 OS <<
", OperandValToReplace=";
1541 OperandValToReplace->printAsOperand(
OS,
false);
1543 for (
const Loop *PIL : PostIncLoops) {
1544 OS <<
", PostIncLoop=";
1545 PIL->getHeader()->printAsOperand(
OS,
false);
1559bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1561 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1564 return Uniquifier.
count(Key);
1568float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1570 for (
const Formula &
F : Formulae)
1571 if (
F.referencesReg(Reg))
1573 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1578bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1579 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1581 if (!Formulae.empty() && RigidFormula)
1585 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1589 if (!Uniquifier.
insert(Key).second)
1593 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1594 "Zero allocated in a scaled register!");
1596 for (
const SCEV *BaseReg :
F.BaseRegs)
1597 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1601 Formulae.push_back(
F);
1604 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1612void LSRUse::DeleteFormula(Formula &
F) {
1613 if (&
F != &Formulae.back())
1615 Formulae.pop_back();
1619void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1623 for (
const Formula &
F : Formulae) {
1624 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1625 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1629 for (
const SCEV *S : OldRegs)
1631 RegUses.dropRegister(S, LUIdx);
1634#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1636 OS <<
"LSR Use: Kind=";
1638 case Basic:
OS <<
"Basic";
break;
1639 case Special:
OS <<
"Special";
break;
1640 case ICmpZero:
OS <<
"ICmpZero";
break;
1642 OS <<
"Address of ";
1643 if (AccessTy.MemTy->isPointerTy())
1646 OS << *AccessTy.MemTy;
1649 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1652 OS <<
", Offsets={";
1653 bool NeedComma =
false;
1654 for (
const LSRFixup &
Fixup : Fixups) {
1655 if (NeedComma)
OS <<
',';
1661 if (AllFixupsOutsideLoop)
1662 OS <<
", all-fixups-outside-loop";
1664 if (WidestFixupType)
1665 OS <<
", widest fixup type: " << *WidestFixupType;
1674 LSRUse::KindType Kind, MemAccessTy AccessTy,
1676 bool HasBaseReg, int64_t Scale,
1679 case LSRUse::Address:
1681 HasBaseReg, Scale, AccessTy.AddrSpace,
Fixup);
1683 case LSRUse::ICmpZero:
1690 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1695 if (Scale != 0 && Scale != -1)
1700 if (BaseOffset != 0) {
1708 BaseOffset = -(
uint64_t)BaseOffset;
1717 return !BaseGV && Scale == 0 && BaseOffset == 0;
1719 case LSRUse::Special:
1721 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1728 int64_t MinOffset, int64_t MaxOffset,
1729 LSRUse::KindType Kind, MemAccessTy AccessTy,
1731 bool HasBaseReg, int64_t Scale) {
1733 if (((int64_t)((
uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1736 MinOffset = (
uint64_t)BaseOffset + MinOffset;
1737 if (((int64_t)((
uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1740 MaxOffset = (
uint64_t)BaseOffset + MaxOffset;
1743 HasBaseReg, Scale) &&
1749 int64_t MinOffset, int64_t MaxOffset,
1750 LSRUse::KindType Kind, MemAccessTy AccessTy,
1751 const Formula &
F,
const Loop &L) {
1759 assert((
F.isCanonical(L) ||
F.Scale != 0));
1761 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1766 int64_t MaxOffset, LSRUse::KindType Kind,
1768 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1771 BaseOffset, HasBaseReg, Scale) ||
1776 BaseGV, BaseOffset,
true, 0));
1780 int64_t MaxOffset, LSRUse::KindType Kind,
1781 MemAccessTy AccessTy,
const Formula &
F) {
1782 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1783 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1787 const LSRUse &LU,
const Formula &
F) {
1790 for (
const LSRFixup &
Fixup : LU.Fixups)
1792 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1793 F.Scale,
Fixup.UserInst))
1799 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1804 const LSRUse &LU,
const Formula &
F,
1813 return F.Scale != 1;
1816 case LSRUse::Address: {
1819 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MinOffset,
F.HasBaseReg,
1820 F.Scale, LU.AccessTy.AddrSpace);
1822 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MaxOffset,
F.HasBaseReg,
1823 F.Scale, LU.AccessTy.AddrSpace);
1826 "Legal addressing mode has an illegal cost!");
1827 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1829 case LSRUse::ICmpZero:
1831 case LSRUse::Special:
1841 LSRUse::KindType Kind, MemAccessTy AccessTy,
1845 if (BaseOffset == 0 && !BaseGV)
return true;
1849 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1853 if (!HasBaseReg && Scale == 1) {
1864 int64_t MaxOffset, LSRUse::KindType Kind,
1865 MemAccessTy AccessTy,
const SCEV *S,
1868 if (S->
isZero())
return true;
1876 if (!S->
isZero())
return false;
1879 if (BaseOffset == 0 && !BaseGV)
return true;
1883 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1886 BaseOffset, HasBaseReg, Scale);
1903 const SCEV *IncExpr;
1906 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
1913 const SCEV *ExprBase =
nullptr;
1915 IVChain() =
default;
1916 IVChain(
const IVInc &Head,
const SCEV *
Base)
1917 : Incs(1, Head), ExprBase(
Base) {}
1924 return std::next(Incs.
begin());
1931 bool hasIncs()
const {
return Incs.
size() >= 2; }
1940 bool isProfitableIncrement(
const SCEV *OperExpr,
1941 const SCEV *IncExpr,
1966 bool Changed =
false;
1993 RegUseTracker RegUses;
1998 static const unsigned MaxChains = 8;
2009 void OptimizeShadowIV();
2012 void OptimizeLoopTermCond();
2016 void FinalizeChain(IVChain &Chain);
2017 void CollectChains();
2018 void GenerateIVChain(
const IVChain &Chain,
2021 void CollectInterestingTypesAndFactors();
2022 void CollectFixupsAndInitialFormulae();
2028 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
2029 LSRUse::KindType Kind, MemAccessTy AccessTy);
2031 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2032 MemAccessTy AccessTy);
2034 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2036 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2038 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2039 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2040 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2041 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2043 void CollectLoopInvariantFixupsAndFormulae();
2045 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2046 unsigned Depth = 0);
2048 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2050 size_t Idx,
bool IsScaledReg =
false);
2051 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2052 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2053 const Formula &
Base,
size_t Idx,
2054 bool IsScaledReg =
false);
2055 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2056 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2057 const Formula &
Base,
2059 size_t Idx,
bool IsScaledReg =
false);
2060 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2061 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2062 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2063 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2064 void GenerateCrossUseConstantOffsets();
2065 void GenerateAllReuseFormulae();
2067 void FilterOutUndesirableDedicatedRegisters();
2069 size_t EstimateSearchSpaceComplexity()
const;
2070 void NarrowSearchSpaceByDetectingSupersets();
2071 void NarrowSearchSpaceByCollapsingUnrolledCode();
2072 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2073 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2074 void NarrowSearchSpaceByFilterPostInc();
2075 void NarrowSearchSpaceByDeletingCostlyFormulas();
2076 void NarrowSearchSpaceByPickingWinnerRegs();
2077 void NarrowSearchSpaceUsingHeuristics();
2082 const Cost &CurCost,
2092 const LSRUse &LU)
const;
2094 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2097 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2100 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2109 bool getChanged()
const {
return Changed; }
2111 return ScalarEvolutionIVs;
2125void LSRInstance::OptimizeShadowIV() {
2127 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2135 Type *DestTy =
nullptr;
2136 bool IsSigned =
false;
2150 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2152 DestTy = UCast->getDestTy();
2154 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2156 DestTy = SCast->getDestTy();
2158 if (!DestTy)
continue;
2178 if (Mantissa == -1)
continue;
2182 unsigned Entry, Latch;
2192 if (!
Init)
continue;
2194 (
double)
Init->getSExtValue() :
2195 (
double)
Init->getZExtValue());
2199 if (!Incr)
continue;
2200 if (Incr->
getOpcode() != Instruction::Add
2201 && Incr->
getOpcode() != Instruction::Sub)
2217 if (!
C->getValue().isStrictlyPositive())
continue;
2226 Instruction::FAdd : Instruction::FSub,
2227 NewPH, CFP,
"IV.S.next.", Incr);
2244 if (
U.getUser() ==
Cond) {
2312 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2317 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2318 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2325 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2326 Pred = ICmpInst::ICMP_SLE;
2328 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2329 Pred = ICmpInst::ICMP_SLT;
2331 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2332 Pred = ICmpInst::ICMP_ULT;
2341 if (
Max->getNumOperands() != 2)
2344 const SCEV *MaxLHS =
Max->getOperand(0);
2345 const SCEV *MaxRHS =
Max->getOperand(1);
2350 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2363 "Loop condition operand is an addrec in a different loop!");
2367 Value *NewRHS =
nullptr;
2368 if (ICmpInst::isTrueWhenEqual(Pred)) {
2371 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2372 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2373 NewRHS = BO->getOperand(0);
2375 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2376 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2377 NewRHS = BO->getOperand(0);
2384 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2385 NewRHS = SU->getValue();
2402 Cond->replaceAllUsesWith(NewCond);
2405 Cond->eraseFromParent();
2407 if (
Cmp->use_empty())
2408 Cmp->eraseFromParent();
2414LSRInstance::OptimizeLoopTermCond() {
2431 L->getExitingBlocks(ExitingBlocks);
2439 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2445 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2455 if (!FindIVUserForCond(
Cond, CondUse))
2469 if (!DT.dominates(ExitingBlock, LatchBlock))
2474 if (LatchBlock != ExitingBlock)
2478 if (&*UI != CondUse &&
2479 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2482 const SCEV *
A = IU.getStride(*CondUse, L);
2483 const SCEV *
B = IU.getStride(*UI, L);
2484 if (!
A || !
B)
continue;
2497 if (
C->isOne() ||
C->isMinusOne())
2498 goto decline_post_inc;
2500 if (
C->getValue().getSignificantBits() >= 64 ||
2501 C->getValue().isMinSignedValue())
2502 goto decline_post_inc;
2506 TTI, UI->getUser(), UI->getOperandValToReplace());
2507 int64_t Scale =
C->getSExtValue();
2511 AccessTy.AddrSpace))
2512 goto decline_post_inc;
2517 AccessTy.AddrSpace))
2518 goto decline_post_inc;
2523 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2529 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2530 if (
Cond->hasOneUse()) {
2531 Cond->moveBefore(TermBr);
2535 Cond = cast<ICmpInst>(
Cond->clone());
2536 Cond->setName(
L->getHeader()->getName() +
".termcond");
2558 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2560 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2565bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2566 bool HasBaseReg, LSRUse::KindType Kind,
2567 MemAccessTy AccessTy) {
2568 int64_t NewMinOffset = LU.MinOffset;
2569 int64_t NewMaxOffset = LU.MaxOffset;
2570 MemAccessTy NewAccessTy = AccessTy;
2575 if (LU.Kind != Kind)
2581 if (Kind == LSRUse::Address) {
2582 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2583 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2584 AccessTy.AddrSpace);
2589 if (NewOffset < LU.MinOffset) {
2591 LU.MaxOffset - NewOffset, HasBaseReg))
2593 NewMinOffset = NewOffset;
2594 }
else if (NewOffset > LU.MaxOffset) {
2596 NewOffset - LU.MinOffset, HasBaseReg))
2598 NewMaxOffset = NewOffset;
2602 LU.MinOffset = NewMinOffset;
2603 LU.MaxOffset = NewMaxOffset;
2604 LU.AccessTy = NewAccessTy;
2611std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2612 LSRUse::KindType Kind,
2613 MemAccessTy AccessTy) {
2624 std::pair<UseMapTy::iterator, bool>
P =
2628 size_t LUIdx =
P.first->second;
2629 LSRUse &LU =
Uses[LUIdx];
2630 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2632 return std::make_pair(LUIdx,
Offset);
2636 size_t LUIdx =
Uses.size();
2637 P.first->second = LUIdx;
2638 Uses.push_back(LSRUse(Kind, AccessTy));
2639 LSRUse &LU =
Uses[LUIdx];
2643 return std::make_pair(LUIdx,
Offset);
2647void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2648 if (&LU != &
Uses.back())
2653 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2659LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2660 const LSRUse &OrigLU) {
2662 for (LSRUse &LU :
Uses) {
2668 if (&LU != &OrigLU &&
2669 LU.Kind != LSRUse::ICmpZero &&
2670 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2671 LU.WidestFixupType == OrigLU.WidestFixupType &&
2672 LU.HasFormulaWithSameRegs(OrigF)) {
2674 for (
const Formula &
F : LU.Formulae) {
2677 if (
F.BaseRegs == OrigF.BaseRegs &&
2678 F.ScaledReg == OrigF.ScaledReg &&
2679 F.BaseGV == OrigF.BaseGV &&
2680 F.Scale == OrigF.Scale &&
2681 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2682 if (
F.BaseOffset == 0)
2697void LSRInstance::CollectInterestingTypesAndFactors() {
2703 const SCEV *Expr = IU.getExpr(U);
2721 }
while (!Worklist.
empty());
2728 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2729 const SCEV *OldStride = *
I;
2730 const SCEV *NewStride = *NewStrideIter;
2741 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2743 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2744 Factors.insert(Factor->getAPInt().getSExtValue());
2749 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2750 Factors.insert(Factor->getAPInt().getSExtValue());
2756 if (
Types.size() == 1)
2768 for(; OI != OE; ++OI) {
2769 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2774 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2786 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2787 return Trunc->getOperand(0);
2809 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2811 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2813 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2820 if (SubExpr->getSCEVType() ==
scAddExpr)
2823 if (SubExpr->getSCEVType() !=
scMulExpr)
2829 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2839bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2840 const SCEV *IncExpr,
2848 if (!isa<SCEVConstant>(IncExpr)) {
2850 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2875 if (!Chain.hasIncs())
2878 if (!
Users.empty()) {
2879 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2881 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
2884 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2892 if (isa<PHINode>(Chain.tailUserInst())
2893 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2896 const SCEV *LastIncExpr =
nullptr;
2897 unsigned NumConstIncrements = 0;
2898 unsigned NumVarIncrements = 0;
2899 unsigned NumReusedIncrements = 0;
2904 for (
const IVInc &Inc : Chain) {
2907 if (Inc.IncExpr->isZero())
2912 if (isa<SCEVConstant>(Inc.IncExpr)) {
2913 ++NumConstIncrements;
2917 if (Inc.IncExpr == LastIncExpr)
2918 ++NumReusedIncrements;
2922 LastIncExpr = Inc.IncExpr;
2927 if (NumConstIncrements > 1)
2934 cost += NumVarIncrements;
2938 cost -= NumReusedIncrements;
2940 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2957 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2958 const SCEV *LastIncExpr =
nullptr;
2959 for (; ChainIdx < NChains; ++ChainIdx) {
2960 IVChain &Chain = IVChainVec[ChainIdx];
2974 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2980 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
2983 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2984 LastIncExpr = IncExpr;
2990 if (ChainIdx == NChains) {
2991 if (isa<PHINode>(UserInst))
2997 LastIncExpr = OperExpr;
3001 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3004 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3006 ChainUsersVec.
resize(NChains);
3007 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3008 <<
") IV=" << *LastIncExpr <<
"\n");
3010 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3011 <<
") IV+" << *LastIncExpr <<
"\n");
3013 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3015 IVChain &Chain = IVChainVec[ChainIdx];
3019 if (!LastIncExpr->
isZero()) {
3020 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3036 IVChain::const_iterator IncIter = Chain.Incs.begin();
3037 IVChain::const_iterator IncEnd = Chain.Incs.end();
3038 for( ; IncIter != IncEnd; ++IncIter) {
3039 if (IncIter->UserInst == OtherUse)
3042 if (IncIter != IncEnd)
3046 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3047 && IU.isIVUserOrOperand(OtherUse)) {
3050 NearUsers.
insert(OtherUse);
3055 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3080void LSRInstance::CollectChains() {
3086 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3087 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3096 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3106 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3107 ChainIdx < NChains; ++ChainIdx) {
3108 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3114 while (IVOpIter != IVOpEnd) {
3115 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3116 if (UniqueOperands.
insert(IVOpInst).second)
3117 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3118 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3123 for (
PHINode &PN :
L->getHeader()->phis()) {
3128 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3130 ChainInstruction(&PN, IncV, ChainUsersVec);
3133 unsigned ChainIdx = 0;
3134 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3135 UsersIdx < NChains; ++UsersIdx) {
3137 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3140 if (ChainIdx != UsersIdx)
3141 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3142 FinalizeChain(IVChainVec[ChainIdx]);
3145 IVChainVec.resize(ChainIdx);
3148void LSRInstance::FinalizeChain(IVChain &Chain) {
3149 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3150 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3152 for (
const IVInc &Inc : Chain) {
3154 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3155 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3156 IVIncSet.insert(UseI);
3163 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3181void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3185 const IVInc &Head = Chain.Incs[0];
3190 Value *IVSrc =
nullptr;
3191 while (IVOpIter != IVOpEnd) {
3202 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3203 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3206 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3208 if (IVOpIter == IVOpEnd) {
3210 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3213 assert(IVSrc &&
"Failed to find IV chain source");
3218 const SCEV *LeftOverExpr =
nullptr;
3219 for (
const IVInc &Inc : Chain) {
3221 if (isa<PHINode>(InsertPt))
3222 InsertPt =
L->getLoopLatch()->getTerminator();
3226 Value *IVOper = IVSrc;
3227 if (!Inc.IncExpr->isZero()) {
3231 LeftOverExpr = LeftOverExpr ?
3232 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3234 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
3237 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3240 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3244 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3246 LeftOverExpr =
nullptr;
3249 Type *OperTy = Inc.IVOperand->getType();
3250 if (IVTy != OperTy) {
3252 "cannot extend a chained IV");
3254 IVOper =
Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3256 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3257 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3262 if (isa<PHINode>(Chain.tailUserInst())) {
3263 for (
PHINode &Phi :
L->getHeader()->phis()) {
3267 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3270 Value *IVOper = IVSrc;
3272 if (IVTy != PostIncTy) {
3276 IVOper =
Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3278 Phi.replaceUsesOfWith(PostIncV, IVOper);
3284void LSRInstance::CollectFixupsAndInitialFormulae() {
3286 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3298 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3299 if (IVIncSet.count(UseI)) {
3300 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3304 LSRUse::KindType
Kind = LSRUse::Basic;
3305 MemAccessTy AccessTy;
3307 Kind = LSRUse::Address;
3311 const SCEV *S = IU.getExpr(U);
3322 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3325 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3327 if (CI->isEquality()) {
3330 Value *
NV = CI->getOperand(1);
3331 if (NV ==
U.getOperandValToReplace()) {
3332 CI->setOperand(1, CI->getOperand(0));
3333 CI->setOperand(0, NV);
3334 NV = CI->getOperand(1);
3341 (!
NV->getType()->isPointerTy() ||
3348 Kind = LSRUse::ICmpZero;
3350 }
else if (
L->isLoopInvariant(NV) &&
3351 (!isa<Instruction>(NV) ||
3352 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3353 !
NV->getType()->isPointerTy()) {
3364 Kind = LSRUse::ICmpZero;
3366 assert(!isa<SCEVCouldNotCompute>(S));
3371 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3372 if (Factors[i] != -1)
3373 Factors.insert(-(
uint64_t)Factors[i]);
3379 std::pair<size_t, int64_t>
P = getUse(S, Kind, AccessTy);
3380 size_t LUIdx =
P.first;
3382 LSRUse &LU =
Uses[LUIdx];
3385 LSRFixup &LF = LU.getNewFixup();
3386 LF.UserInst = UserInst;
3387 LF.OperandValToReplace =
U.getOperandValToReplace();
3388 LF.PostIncLoops = TmpPostIncLoops;
3390 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3393 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3395 F.initialMatch(S, L, SE);
3396 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3397 VisitedLSRUse.
insert(LUIdx);
3400 if (!LU.WidestFixupType ||
3403 LU.WidestFixupType = LF.OperandValToReplace->getType();
3406 if (LU.Formulae.empty()) {
3407 InsertInitialFormula(S, LU, LUIdx);
3408 CountRegisters(LU.Formulae.back(), LUIdx);
3417void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3421 LU.RigidFormula =
true;
3424 F.initialMatch(S, L, SE);
3425 bool Inserted = InsertFormula(LU, LUIdx,
F);
3426 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3432LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3433 LSRUse &LU,
size_t LUIdx) {
3435 F.BaseRegs.push_back(S);
3436 F.HasBaseReg =
true;
3437 bool Inserted = InsertFormula(LU, LUIdx,
F);
3438 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3442void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3444 RegUses.countRegister(
F.ScaledReg, LUIdx);
3445 for (
const SCEV *BaseReg :
F.BaseRegs)
3446 RegUses.countRegister(BaseReg, LUIdx);
3451bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3454 "Formula is illegal");
3456 if (!LU.InsertFormula(
F, *L))
3459 CountRegisters(
F, LUIdx);
3469LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3478 while (!Worklist.
empty()) {
3482 if (!Visited.
insert(S).second)
3489 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3492 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3493 const Value *
V = US->getValue();
3494 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3496 if (
L->contains(Inst))
continue;
3497 }
else if (isa<Constant>(V))
3500 for (
const Use &U :
V->uses()) {
3501 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3513 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3515 cast<PHINode>(UserInst)->getIncomingBlock(
3517 if (!DT.dominates(
L->getHeader(), UseBB))
3530 if (isa<PHINode>(UserInst)) {
3531 const auto *PhiNode = cast<PHINode>(UserInst);
3532 bool HasIncompatibleEHPTerminatedBlock =
false;
3534 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3535 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3536 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3537 HasIncompatibleEHPTerminatedBlock =
true;
3542 if (HasIncompatibleEHPTerminatedBlock) {
3555 if (!isa<SCEVUnknown>(UserS))
3564 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3565 unsigned OtherIdx = !
U.getOperandNo();
3566 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3571 std::pair<size_t, int64_t>
P = getUse(
3572 S, LSRUse::Basic, MemAccessTy());
3573 size_t LUIdx =
P.first;
3575 LSRUse &LU =
Uses[LUIdx];
3576 LSRFixup &LF = LU.getNewFixup();
3577 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3578 LF.OperandValToReplace =
U;
3580 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3581 if (!LU.WidestFixupType ||
3584 LU.WidestFixupType = LF.OperandValToReplace->getType();
3585 InsertSupplementalFormula(US, LU, LUIdx);
3586 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3602 unsigned Depth = 0) {
3609 for (
const SCEV *S :
Add->operands()) {
3615 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3624 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3626 Remainder =
nullptr;
3644 const SCEV *Remainder =
3657 LSRUse &LU,
const SCEV *S,
const Loop *L,
3659 if (LU.Kind != LSRUse::Address ||
3660 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3666 if (!isa<SCEVConstant>(LoopStep))
3672 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3679void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3680 const Formula &
Base,
3683 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3695 if (AddOps.
size() == 1)
3709 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3715 InnerAddOps.append(std::next(J),
3720 if (InnerAddOps.size() == 1 &&
3722 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3731 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3738 F.ScaledReg =
nullptr;
3740 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3741 }
else if (IsScaledReg)
3742 F.ScaledReg = InnerSum;
3744 F.BaseRegs[
Idx] = InnerSum;
3750 SC->getValue()->getZExtValue()))
3752 (
uint64_t)
F.UnfoldedOffset +
SC->getValue()->getZExtValue();
3754 F.BaseRegs.push_back(*J);
3759 if (InsertFormula(LU, LUIdx,
F))
3766 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3772void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3774 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
3779 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3780 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
3782 if (
Base.Scale == 1)
3783 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
3789void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3792 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
3793 (
Base.UnfoldedOffset != 0) <= 1)
3800 Formula NewBase =
Base;
3801 NewBase.BaseRegs.clear();
3802 Type *CombinedIntegerType =
nullptr;
3803 for (
const SCEV *BaseReg :
Base.BaseRegs) {
3806 if (!CombinedIntegerType)
3811 NewBase.BaseRegs.push_back(BaseReg);
3815 if (Ops.
size() == 0)
3820 auto GenerateFormula = [&](
const SCEV *Sum) {
3821 Formula
F = NewBase;
3829 F.BaseRegs.push_back(Sum);
3831 (void)InsertFormula(LU, LUIdx,
F);
3835 if (Ops.
size() > 1) {
3842 if (NewBase.UnfoldedOffset) {
3843 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
3846 NewBase.UnfoldedOffset = 0;
3852void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3853 const Formula &
Base,
size_t Idx,
3857 if (
G->isZero() || !GV)
3861 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
3866 F.BaseRegs[
Idx] =
G;
3867 (void)InsertFormula(LU, LUIdx,
F);
3871void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3874 if (
Base.BaseGV)
return;
3876 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3877 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
3878 if (
Base.Scale == 1)
3879 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
3884void LSRInstance::GenerateConstantOffsetsImpl(
3885 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
3888 auto GenerateOffset = [&](
const SCEV *
G, int64_t
Offset) {
3892 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
3899 F.ScaledReg =
nullptr;
3901 F.deleteBaseReg(
F.BaseRegs[
Idx]);
3903 }
else if (IsScaledReg)
3906 F.BaseRegs[
Idx] = NewG;
3908 (void)InsertFormula(LU, LUIdx,
F);
3923 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
3925 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
3926 const APInt &StepInt = StepRec->getAPInt();
3930 for (int64_t
Offset : Worklist) {
3937 for (int64_t
Offset : Worklist)
3941 if (
G->isZero() || Imm == 0)
3950 F.BaseRegs[
Idx] =
G;
3955 (void)InsertFormula(LU, LUIdx,
F);
3959void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3965 if (LU.MaxOffset != LU.MinOffset)
3968 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3969 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
3970 if (
Base.Scale == 1)
3971 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
3977void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3979 if (LU.Kind != LSRUse::ICmpZero)
return;
3987 if (LU.MinOffset != LU.MaxOffset)
return;
3990 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
3992 for (
const SCEV *BaseReg :
Base.BaseRegs)
3995 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
3998 for (int64_t Factor : Factors) {
4003 if (
Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
4005 int64_t NewBaseOffset = (
uint64_t)
Base.BaseOffset * Factor;
4006 assert(Factor != 0 &&
"Zero factor not expected!");
4007 if (NewBaseOffset / Factor !=
Base.BaseOffset)
4015 int64_t
Offset = LU.MinOffset;
4016 if (
Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
4019 if (
Offset / Factor != LU.MinOffset)
4027 F.BaseOffset = NewBaseOffset;
4039 for (
size_t i = 0, e =
F.BaseRegs.size(); i !=
e; ++i) {
4053 if (
F.UnfoldedOffset != 0) {
4054 if (
F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
4057 F.UnfoldedOffset = (
uint64_t)
F.UnfoldedOffset * Factor;
4058 if (
F.UnfoldedOffset / Factor !=
Base.UnfoldedOffset)
4067 (void)InsertFormula(LU, LUIdx,
F);
4074void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4081 if (
Base.Scale != 0 && !
Base.unscale())
4084 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4087 for (int64_t Factor : Factors) {
4088 Base.Scale = Factor;
4089 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4091 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4095 if (LU.Kind == LSRUse::Basic &&
4096 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4097 LU.AccessTy,
Base) &&
4098 LU.AllFixupsOutsideLoop)
4099 LU.Kind = LSRUse::Special;
4105 if (LU.Kind == LSRUse::ICmpZero &&
4106 !
Base.HasBaseReg &&
Base.BaseOffset == 0 && !
Base.BaseGV)
4109 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4111 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4118 if (!Quotient->isZero()) {
4121 F.ScaledReg = Quotient;
4122 F.deleteBaseReg(
F.BaseRegs[i]);
4126 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4127 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4131 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4133 (void)InsertFormula(LU, LUIdx,
F);
4149 const SCEV *Result =
nullptr;
4150 for (
auto &L :
Loops) {
4154 if (!New || (Result && New != Result))
4159 assert(Result &&
"failed to create expression");
4164void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4166 if (
Base.BaseGV)
return;
4176 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4179 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4183 for (
auto &LF : LU.Fixups)
4184 Loops.push_back(LF.PostIncLoops);
4186 for (
Type *SrcTy : Types) {
4195 const SCEV *NewScaledReg =
4197 if (!NewScaledReg || NewScaledReg->
isZero())
4199 F.ScaledReg = NewScaledReg;
4201 bool HasZeroBaseReg =
false;
4202 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4203 const SCEV *NewBaseReg =
4205 if (!NewBaseReg || NewBaseReg->
isZero()) {
4206 HasZeroBaseReg =
true;
4209 BaseReg = NewBaseReg;
4216 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4220 (void)InsertFormula(LU, LUIdx,
F);
4233 const SCEV *OrigReg;
4236 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4244#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4246 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4247 <<
" , add offset " <<
Imm;
4257void LSRInstance::GenerateCrossUseConstantOffsets() {
4259 using ImmMapTy = std::map<int64_t, const SCEV *>;
4264 for (
const SCEV *
Use : RegUses) {
4267 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4270 Pair.first->second.insert(std::make_pair(Imm,
Use));
4271 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4279 for (
const SCEV *Reg : Sequence) {
4280 const ImmMapTy &Imms =
Map.find(Reg)->second;
4283 if (Imms.size() == 1)
4286 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4287 for (
const auto &Entry
4289 <<
' ' << Entry.first;
4293 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4295 const SCEV *OrigReg = J->second;
4297 int64_t JImm = J->first;
4298 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4300 if (!isa<SCEVConstant>(OrigReg) &&
4301 UsedByIndicesMap[Reg].
count() == 1) {
4309 int64_t
First = Imms.begin()->first;
4310 int64_t
Last = std::prev(Imms.end())->first;
4317 ImmMapTy::const_iterator OtherImms[] = {
4318 Imms.begin(), std::prev(Imms.end()),
4319 Imms.lower_bound(Avg)};
4320 for (
const auto &M : OtherImms) {
4321 if (M == J || M == JE)
continue;
4327 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4335 UsedByIndicesMap.
clear();
4336 UniqueItems.
clear();
4339 for (
const WorkItem &WI : WorkItems) {
4340 size_t LUIdx = WI.LUIdx;
4341 LSRUse &LU =
Uses[LUIdx];
4342 int64_t
Imm = WI.Imm;
4343 const SCEV *OrigReg = WI.OrigReg;
4350 for (
size_t L = 0, LE = LU.Formulae.size();
L !=
LE; ++
L) {
4351 Formula
F = LU.Formulae[
L];
4358 if (
F.ScaledReg == OrigReg) {
4365 NewF.BaseOffset =
Offset;
4366 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4369 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4374 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
4375 if (
C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
4377 .ule(std::abs(NewF.BaseOffset)))
4381 NewF.canonicalize(*this->L);
4382 (void)InsertFormula(LU, LUIdx, NewF);
4385 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4386 const SCEV *BaseReg =
F.BaseRegs[
N];
4387 if (BaseReg != OrigReg)
4390 NewF.BaseOffset = (
uint64_t)NewF.BaseOffset + Imm;
4392 LU.Kind, LU.AccessTy, NewF)) {
4399 NewF.UnfoldedOffset = (
uint64_t)NewF.UnfoldedOffset + Imm;
4401 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4406 for (
const SCEV *NewReg : NewF.BaseRegs)
4408 if ((
C->getAPInt() + NewF.BaseOffset)
4410 .slt(std::abs(NewF.BaseOffset)) &&
4412 (
unsigned)llvm::countr_zero<uint64_t>(NewF.BaseOffset))
4416 NewF.canonicalize(*this->L);
4417 (void)InsertFormula(LU, LUIdx, NewF);
4428LSRInstance::GenerateAllReuseFormulae() {
4431 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4432 LSRUse &LU =
Uses[LUIdx];
4433 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4434 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4435 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4436 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4438 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4439 LSRUse &LU =
Uses[LUIdx];
4440 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4441 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4442 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4443 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4444 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4445 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4446 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4447 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4449 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4450 LSRUse &LU =
Uses[LUIdx];
4451 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4452 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4455 GenerateCrossUseConstantOffsets();
4458 "After generating reuse formulae:\n";
4459 print_uses(
dbgs()));
4464void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4469 bool ChangedFormulae =
false;
4474 using BestFormulaeTy =
4477 BestFormulaeTy BestFormulae;
4479 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4480 LSRUse &LU =
Uses[LUIdx];
4485 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4486 FIdx != NumForms; ++FIdx) {
4487 Formula &
F = LU.Formulae[FIdx];
4498 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4499 if (CostF.isLoser()) {
4511 for (
const SCEV *Reg :
F.BaseRegs) {
4512 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4516 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4517 Key.push_back(
F.ScaledReg);
4522 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4523 BestFormulae.insert(std::make_pair(Key, FIdx));
4527 Formula &Best = LU.Formulae[
P.first->second];
4529 Cost CostBest(L, SE,
TTI, AMK);
4531 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4532 if (CostF.isLess(CostBest))
4536 " in favor of formula ";
4537 Best.print(
dbgs());
dbgs() <<
'\n');
4540 ChangedFormulae =
true;
4542 LU.DeleteFormula(
F);
4550 LU.RecomputeRegs(LUIdx, RegUses);
4553 BestFormulae.clear();
4558 "After filtering out undesirable candidates:\n";
4566size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4568 for (
const LSRUse &LU :
Uses) {
4569 size_t FSize = LU.Formulae.size();
4584void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4588 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4589 "which use a superset of registers used by other "
4592 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4593 LSRUse &LU =
Uses[LUIdx];
4595 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4596 Formula &
F = LU.Formulae[i];
4601 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4606 NewF.BaseOffset += (
uint64_t)
C->getValue()->getSExtValue();
4607 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4608 (
I -
F.BaseRegs.begin()));
4609 if (LU.HasFormulaWithSameRegs(NewF)) {
4612 LU.DeleteFormula(
F);
4618 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4619 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4623 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4624 (
I -
F.BaseRegs.begin()));
4625 if (LU.HasFormulaWithSameRegs(NewF)) {
4628 LU.DeleteFormula(
F);
4639 LU.RecomputeRegs(LUIdx, RegUses);
4648void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4653 dbgs() <<
"The search space is too complex.\n"
4654 "Narrowing the search space by assuming that uses separated "
4655 "by a constant offset will use the same registers.\n");
4659 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4660 LSRUse &LU =
Uses[LUIdx];
4661 for (
const Formula &
F : LU.Formulae) {
4662 if (
F.BaseOffset == 0 || (
F.Scale != 0 &&
F.Scale != 1))
4665 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4669 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4670 LU.Kind, LU.AccessTy))
4675 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4678 for (LSRFixup &
Fixup : LU.Fixups) {
4679 Fixup.Offset +=
F.BaseOffset;
4680 LUThatHas->pushFixup(
Fixup);
4686 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4687 Formula &
F = LUThatHas->Formulae[i];
4688 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4689 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4691 LUThatHas->DeleteFormula(
F);
4699 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4702 DeleteUse(LU, LUIdx);
4715void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4719 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4720 "undesirable dedicated registers.\n");
4722 FilterOutUndesirableDedicatedRegisters();
4737void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4742 dbgs() <<
"The search space is too complex.\n"
4743 "Narrowing the search space by choosing the best Formula "
4744 "from the Formulae with the same Scale and ScaledReg.\n");
4749 BestFormulaeTy BestFormulae;
4751 bool ChangedFormulae =
false;
4756 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4757 LSRUse &LU =
Uses[LUIdx];
4762 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4767 size_t FARegNum = 0;
4768 for (
const SCEV *Reg : FA.BaseRegs) {
4769 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4770 FARegNum += (NumUses - UsedByIndices.
count() + 1);
4772 size_t FBRegNum = 0;
4773 for (
const SCEV *Reg : FB.BaseRegs) {
4774 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4775 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
4777 if (FARegNum != FBRegNum)
4778 return FARegNum < FBRegNum;
4785 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
4787 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
4788 return CostFA.isLess(CostFB);
4792 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4794 Formula &
F = LU.Formulae[FIdx];
4797 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
4801 Formula &Best = LU.Formulae[
P.first->second];
4802 if (IsBetterThan(
F, Best))
4806 " in favor of formula ";
4807 Best.print(
dbgs());
dbgs() <<
'\n');
4809 ChangedFormulae =
true;
4811 LU.DeleteFormula(
F);
4817 LU.RecomputeRegs(LUIdx, RegUses);
4820 BestFormulae.clear();
4825 "After filtering out undesirable candidates:\n";
4832void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
4839 "Narrowing the search space by choosing the lowest "
4840 "register Formula for PostInc Uses.\n");
4842 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4843 LSRUse &LU =
Uses[LUIdx];
4845 if (LU.Kind != LSRUse::Address)
4851 size_t MinRegs = std::numeric_limits<size_t>::max();
4852 for (
const Formula &
F : LU.Formulae)
4853 MinRegs = std::min(
F.getNumRegs(), MinRegs);
4856 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4858 Formula &
F = LU.Formulae[FIdx];
4859 if (
F.getNumRegs() > MinRegs) {
4862 LU.DeleteFormula(
F);
4869 LU.RecomputeRegs(LUIdx, RegUses);
4920void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4933 DenseMap <const SCEV *, float> RegNumMap;
4934 for (
const SCEV *Reg : RegUses) {
4935 if (UniqRegs.
count(Reg))
4938 for (
const LSRUse &LU :
Uses) {
4939 if (!LU.Regs.count(Reg))
4941 float P = LU.getNotSelectedProbability(Reg);
4947 RegNumMap.
insert(std::make_pair(Reg, PNotSel));