83#include "llvm/Config/llvm-config.h"
132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
174 "Don't prefer any addressing mode"),
177 "Prefer pre-indexed addressing mode"),
180 "Prefer post-indexed addressing mode")));
184 cl::init(std::numeric_limits<uint16_t>::max()),
185 cl::desc(
"LSR search space complexity limit"));
189 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
193 cl::desc(
"Attempt to replace primary IV with other IV."));
197 cl::desc(
"Attempt to drop solution if it is less profitable"));
200 "Number of terminating condition fold recognized and performed");
206 cl::desc(
"Stress test LSR IV chains"));
215 static const unsigned UnknownAddressSpace =
216 std::numeric_limits<unsigned>::max();
218 Type *MemTy =
nullptr;
219 unsigned AddrSpace = UnknownAddressSpace;
221 MemAccessTy() =
default;
222 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
225 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
231 unsigned AS = UnknownAddressSpace) {
251#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
253 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
267 RegUsesTy RegUsesMap;
271 void countRegister(
const SCEV *Reg,
size_t LUIdx);
272 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
273 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
275 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
293RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
294 std::pair<RegUsesTy::iterator, bool> Pair =
295 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
296 RegSortData &RSD = Pair.first->second;
299 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
300 RSD.UsedByIndices.set(LUIdx);
304RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
305 RegUsesTy::iterator It = RegUsesMap.find(Reg);
306 assert(It != RegUsesMap.end());
307 RegSortData &RSD = It->second;
308 assert(RSD.UsedByIndices.size() > LUIdx);
309 RSD.UsedByIndices.reset(LUIdx);
313RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
314 assert(LUIdx <= LastLUIdx);
318 for (
auto &Pair : RegUsesMap) {
320 if (LUIdx < UsedByIndices.
size())
321 UsedByIndices[LUIdx] =
322 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
323 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
328RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
329 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
330 if (
I == RegUsesMap.end())
334 if (i == -1)
return false;
335 if ((
size_t)i != LUIdx)
return true;
340 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
341 assert(
I != RegUsesMap.end() &&
"Unknown register!");
342 return I->second.UsedByIndices;
345void RegUseTracker::clear() {
359 int64_t BaseOffset = 0;
362 bool HasBaseReg =
false;
385 const SCEV *ScaledReg =
nullptr;
390 int64_t UnfoldedOffset = 0;
398 void canonicalize(
const Loop &L);
402 bool hasZeroEnd()
const;
404 size_t getNumRegs()
const;
407 void deleteBaseReg(
const SCEV *&S);
409 bool referencesReg(
const SCEV *S)
const;
410 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
411 const RegUseTracker &RegUses)
const;
432 for (
const SCEV *S :
Add->operands())
439 if (!AR->getStart()->isZero() && AR->isAffine()) {
442 AR->getStepRecurrence(SE),
458 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
460 for (
const SCEV *S : MyGood)
462 for (
const SCEV *S : MyBad)
481 BaseRegs.push_back(Sum);
487 BaseRegs.push_back(Sum);
495 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
502bool Formula::isCanonical(
const Loop &L)
const {
504 return BaseRegs.size() <= 1;
509 if (Scale == 1 && BaseRegs.empty())
529void Formula::canonicalize(
const Loop &L) {
533 if (BaseRegs.empty()) {
535 assert(ScaledReg &&
"Expected 1*reg => reg");
536 assert(Scale == 1 &&
"Expected 1*reg => reg");
537 BaseRegs.push_back(ScaledReg);
545 ScaledReg = BaseRegs.pop_back_val();
556 if (
I != BaseRegs.end())
566bool Formula::unscale() {
570 BaseRegs.push_back(ScaledReg);
575bool Formula::hasZeroEnd()
const {
576 if (UnfoldedOffset || BaseOffset)
578 if (BaseRegs.size() != 1 || ScaledReg)
585size_t Formula::getNumRegs()
const {
586 return !!ScaledReg + BaseRegs.size();
591Type *Formula::getType()
const {
592 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
593 ScaledReg ? ScaledReg->getType() :
594 BaseGV ? BaseGV->getType() :
599void Formula::deleteBaseReg(
const SCEV *&S) {
600 if (&S != &BaseRegs.back())
606bool Formula::referencesReg(
const SCEV *S)
const {
612bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
613 const RegUseTracker &RegUses)
const {
615 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
617 for (
const SCEV *BaseReg : BaseRegs)
618 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
623#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
627 if (!First)
OS <<
" + ";
else First =
false;
628 BaseGV->printAsOperand(
OS,
false);
630 if (BaseOffset != 0) {
631 if (!First)
OS <<
" + ";
else First =
false;
634 for (
const SCEV *BaseReg : BaseRegs) {
635 if (!First)
OS <<
" + ";
else First =
false;
636 OS <<
"reg(" << *BaseReg <<
')';
638 if (HasBaseReg && BaseRegs.empty()) {
639 if (!First)
OS <<
" + ";
else First =
false;
640 OS <<
"**error: HasBaseReg**";
641 }
else if (!HasBaseReg && !BaseRegs.empty()) {
642 if (!First)
OS <<
" + ";
else First =
false;
643 OS <<
"**error: !HasBaseReg**";
646 if (!First)
OS <<
" + ";
else First =
false;
647 OS << Scale <<
"*reg(";
654 if (UnfoldedOffset != 0) {
655 if (!First)
OS <<
" + ";
656 OS <<
"imm(" << UnfoldedOffset <<
')';
697 bool IgnoreSignificantBits =
false) {
708 if (
RA.isAllOnes()) {
722 const APInt &LA =
C->getAPInt();
724 if (LA.srem(
RA) != 0)
731 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
733 IgnoreSignificantBits);
734 if (!Step)
return nullptr;
736 IgnoreSignificantBits);
737 if (!Start)
return nullptr;
750 for (
const SCEV *S :
Add->operands()) {
752 if (!Op)
return nullptr;
768 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
783 IgnoreSignificantBits)) {
802 if (
C->getAPInt().getSignificantBits() <= 64) {
804 return C->getValue()->getSExtValue();
812 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
827 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
828 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
838 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
854 bool isAddress = isa<LoadInst>(Inst);
856 if (
SI->getPointerOperand() == OperandVal)
858 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
861 switch (II->getIntrinsicID()) {
862 case Intrinsic::memset:
863 case Intrinsic::prefetch:
864 case Intrinsic::masked_load:
865 if (II->getArgOperand(0) == OperandVal)
868 case Intrinsic::masked_store:
869 if (II->getArgOperand(1) == OperandVal)
872 case Intrinsic::memmove:
873 case Intrinsic::memcpy:
874 if (II->getArgOperand(0) == OperandVal ||
875 II->getArgOperand(1) == OperandVal)
881 if (IntrInfo.
PtrVal == OperandVal)
886 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
887 if (RMW->getPointerOperand() == OperandVal)
890 if (CmpX->getPointerOperand() == OperandVal)
899 MemAccessTy AccessTy(Inst->
getType(), MemAccessTy::UnknownAddressSpace);
900 if (
const StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
901 AccessTy.MemTy =
SI->getOperand(0)->getType();
902 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
903 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
904 AccessTy.AddrSpace = LI->getPointerAddressSpace();
905 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
906 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
908 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
909 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
910 switch (II->getIntrinsicID()) {
911 case Intrinsic::prefetch:
912 case Intrinsic::memset:
913 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
914 AccessTy.MemTy = OperandVal->
getType();
916 case Intrinsic::memmove:
917 case Intrinsic::memcpy:
919 AccessTy.MemTy = OperandVal->
getType();
921 case Intrinsic::masked_load:
925 case Intrinsic::masked_store:
926 AccessTy.MemTy = II->getOperand(0)->getType();
928 II->getArgOperand(1)->getType()->getPointerAddressSpace();
944 if (
PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
946 PTy->getAddressSpace());
994 if (!Processed.
insert(S).second)
998 for (
const SCEV *S :
Add->operands()) {
1014 Value *UVal = U->getValue();
1018 if (UI && UI->
getOpcode() == Instruction::Mul &&
1052 const LSRUse &LU,
const Formula &
F);
1056 const LSRUse &LU,
const Formula &
F,
1063 const Loop *
L =
nullptr;
1073 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1084 bool isLess(
const Cost &
Other)
const;
1091 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1092 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1093 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1094 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1100 return C.NumRegs == ~0
u;
1103 void RateFormula(
const Formula &
F,
1113 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1115 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1128 Value *OperandValToReplace =
nullptr;
1140 LSRFixup() =
default;
1142 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1150struct UniquifierDenseMapInfo {
1153 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1159 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1195 MemAccessTy AccessTy;
1201 int64_t MinOffset = std::numeric_limits<int64_t>::max();
1202 int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1206 bool AllFixupsOutsideLoop =
true;
1213 bool RigidFormula =
false;
1219 Type *WidestFixupType =
nullptr;
1229 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1231 LSRFixup &getNewFixup() {
1232 Fixups.push_back(LSRFixup());
1236 void pushFixup(LSRFixup &f) {
1238 if (
f.Offset > MaxOffset)
1239 MaxOffset =
f.Offset;
1240 if (
f.Offset < MinOffset)
1241 MinOffset =
f.Offset;
1244 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1245 float getNotSelectedProbability(
const SCEV *Reg)
const;
1246 bool InsertFormula(
const Formula &
F,
const Loop &L);
1247 void DeleteFormula(Formula &
F);
1248 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1257 LSRUse::KindType Kind, MemAccessTy AccessTy,
1259 bool HasBaseReg, int64_t Scale,
1263 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1267 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1269 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1271 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1273 [&](
unsigned i,
const SCEV *Reg) {
1274 return i + getSetupCost(Reg, Depth - 1);
1276 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1283void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1289 if (AR->getLoop() != L) {
1296 if (!AR->getLoop()->contains(L)) {
1306 unsigned LoopCost = 1;
1313 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1314 if (Step->getAPInt() ==
F.BaseOffset)
1317 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1318 if (isa<SCEVConstant>(LoopStep)) {
1319 const SCEV *LoopStart = AR->getStart();
1320 if (!isa<SCEVConstant>(LoopStart) &&
1326 C.AddRecCost += LoopCost;
1330 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1331 if (!Regs.
count(AR->getOperand(1))) {
1332 RateRegister(
F, AR->getOperand(1), Regs);
1344 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1346 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1353void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1356 if (LoserRegs && LoserRegs->
count(Reg)) {
1360 if (Regs.
insert(Reg).second) {
1361 RateRegister(
F, Reg, Regs);
1362 if (LoserRegs && isLoser())
1367void Cost::RateFormula(
const Formula &
F,
1374 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1376 unsigned PrevAddRecCost =
C.AddRecCost;
1377 unsigned PrevNumRegs =
C.NumRegs;
1378 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1379 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1380 if (VisitedRegs.
count(ScaledReg)) {
1384 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1388 for (
const SCEV *BaseReg :
F.BaseRegs) {
1389 if (VisitedRegs.
count(BaseReg)) {
1393 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1399 size_t NumBaseParts =
F.getNumRegs();
1400 if (NumBaseParts > 1)
1405 C.NumBaseAdds += (
F.UnfoldedOffset != 0);
1411 for (
const LSRFixup &
Fixup : LU.Fixups) {
1412 int64_t
O =
Fixup.Offset;
1422 if (LU.Kind == LSRUse::Address &&
Offset != 0 &&
1439 if (
C.NumRegs > TTIRegNum) {
1442 if (PrevNumRegs > TTIRegNum)
1443 C.Insns += (
C.NumRegs - PrevNumRegs);
1445 C.Insns += (
C.NumRegs - TTIRegNum);
1458 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1462 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1465 if (LU.Kind != LSRUse::ICmpZero)
1466 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1472 C.Insns = std::numeric_limits<unsigned>::max();
1473 C.NumRegs = std::numeric_limits<unsigned>::max();
1474 C.AddRecCost = std::numeric_limits<unsigned>::max();
1475 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1476 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1477 C.ImmCost = std::numeric_limits<unsigned>::max();
1478 C.SetupCost = std::numeric_limits<unsigned>::max();
1479 C.ScaleCost = std::numeric_limits<unsigned>::max();
1483bool Cost::isLess(
const Cost &
Other)
const {
1485 C.Insns !=
Other.C.Insns)
1486 return C.Insns <
Other.C.Insns;
1490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1493 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1494 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1495 if (
C.AddRecCost != 0)
1496 OS <<
", with addrec cost " <<
C.AddRecCost;
1497 if (
C.NumIVMuls != 0)
1498 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1499 << (
C.NumIVMuls == 1 ?
"" :
"s");
1500 if (
C.NumBaseAdds != 0)
1501 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1502 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1503 if (
C.ScaleCost != 0)
1504 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1506 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1507 if (
C.SetupCost != 0)
1508 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1517bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1519 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1520 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1521 if (PN->getIncomingValue(i) == OperandValToReplace &&
1522 L->contains(PN->getIncomingBlock(i)))
1527 return !
L->contains(UserInst);
1530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1534 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1536 Store->getOperand(0)->printAsOperand(
OS,
false);
1537 }
else if (UserInst->getType()->isVoidTy())
1538 OS << UserInst->getOpcodeName();
1540 UserInst->printAsOperand(
OS,
false);
1542 OS <<
", OperandValToReplace=";
1543 OperandValToReplace->printAsOperand(
OS,
false);
1545 for (
const Loop *PIL : PostIncLoops) {
1546 OS <<
", PostIncLoop=";
1547 PIL->getHeader()->printAsOperand(
OS,
false);
1561bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1563 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1566 return Uniquifier.
count(Key);
1570float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1572 for (
const Formula &
F : Formulae)
1573 if (
F.referencesReg(Reg))
1575 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1580bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1581 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1583 if (!Formulae.empty() && RigidFormula)
1587 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1591 if (!Uniquifier.
insert(Key).second)
1595 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1596 "Zero allocated in a scaled register!");
1598 for (
const SCEV *BaseReg :
F.BaseRegs)
1599 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1603 Formulae.push_back(
F);
1606 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1614void LSRUse::DeleteFormula(Formula &
F) {
1615 if (&
F != &Formulae.back())
1617 Formulae.pop_back();
1621void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1625 for (
const Formula &
F : Formulae) {
1626 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1627 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1631 for (
const SCEV *S : OldRegs)
1633 RegUses.dropRegister(S, LUIdx);
1636#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1638 OS <<
"LSR Use: Kind=";
1640 case Basic:
OS <<
"Basic";
break;
1641 case Special:
OS <<
"Special";
break;
1642 case ICmpZero:
OS <<
"ICmpZero";
break;
1644 OS <<
"Address of ";
1645 if (AccessTy.MemTy->isPointerTy())
1648 OS << *AccessTy.MemTy;
1651 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1654 OS <<
", Offsets={";
1655 bool NeedComma =
false;
1656 for (
const LSRFixup &
Fixup : Fixups) {
1657 if (NeedComma)
OS <<
',';
1663 if (AllFixupsOutsideLoop)
1664 OS <<
", all-fixups-outside-loop";
1666 if (WidestFixupType)
1667 OS <<
", widest fixup type: " << *WidestFixupType;
1676 LSRUse::KindType Kind, MemAccessTy AccessTy,
1678 bool HasBaseReg, int64_t Scale,
1681 case LSRUse::Address:
1683 HasBaseReg, Scale, AccessTy.AddrSpace,
Fixup);
1685 case LSRUse::ICmpZero:
1692 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1697 if (Scale != 0 && Scale != -1)
1702 if (BaseOffset != 0) {
1710 BaseOffset = -(
uint64_t)BaseOffset;
1719 return !BaseGV && Scale == 0 && BaseOffset == 0;
1721 case LSRUse::Special:
1723 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1730 int64_t MinOffset, int64_t MaxOffset,
1731 LSRUse::KindType Kind, MemAccessTy AccessTy,
1733 bool HasBaseReg, int64_t Scale) {
1735 if (((int64_t)((
uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1738 MinOffset = (
uint64_t)BaseOffset + MinOffset;
1739 if (((int64_t)((
uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1742 MaxOffset = (
uint64_t)BaseOffset + MaxOffset;
1745 HasBaseReg, Scale) &&
1751 int64_t MinOffset, int64_t MaxOffset,
1752 LSRUse::KindType Kind, MemAccessTy AccessTy,
1753 const Formula &
F,
const Loop &L) {
1761 assert((
F.isCanonical(L) ||
F.Scale != 0));
1763 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1768 int64_t MaxOffset, LSRUse::KindType Kind,
1770 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1773 BaseOffset, HasBaseReg, Scale) ||
1778 BaseGV, BaseOffset,
true, 0));
1782 int64_t MaxOffset, LSRUse::KindType Kind,
1783 MemAccessTy AccessTy,
const Formula &
F) {
1784 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1785 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1789 const LSRUse &LU,
const Formula &
F) {
1792 for (
const LSRFixup &
Fixup : LU.Fixups)
1794 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1795 F.Scale,
Fixup.UserInst))
1801 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1806 const LSRUse &LU,
const Formula &
F,
1815 return F.Scale != 1;
1818 case LSRUse::Address: {
1821 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MinOffset,
F.HasBaseReg,
1822 F.Scale, LU.AccessTy.AddrSpace);
1824 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MaxOffset,
F.HasBaseReg,
1825 F.Scale, LU.AccessTy.AddrSpace);
1828 "Legal addressing mode has an illegal cost!");
1829 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1831 case LSRUse::ICmpZero:
1833 case LSRUse::Special:
1843 LSRUse::KindType Kind, MemAccessTy AccessTy,
1847 if (BaseOffset == 0 && !BaseGV)
return true;
1851 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1855 if (!HasBaseReg && Scale == 1) {
1866 int64_t MaxOffset, LSRUse::KindType Kind,
1867 MemAccessTy AccessTy,
const SCEV *S,
1870 if (S->
isZero())
return true;
1878 if (!S->
isZero())
return false;
1881 if (BaseOffset == 0 && !BaseGV)
return true;
1885 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1888 BaseOffset, HasBaseReg, Scale);
1905 const SCEV *IncExpr;
1908 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
1915 const SCEV *ExprBase =
nullptr;
1917 IVChain() =
default;
1918 IVChain(
const IVInc &Head,
const SCEV *
Base)
1919 : Incs(1, Head), ExprBase(
Base) {}
1926 return std::next(Incs.
begin());
1933 bool hasIncs()
const {
return Incs.
size() >= 2; }
1942 bool isProfitableIncrement(
const SCEV *OperExpr,
1943 const SCEV *IncExpr,
1968 bool Changed =
false;
1995 RegUseTracker RegUses;
2000 static const unsigned MaxChains = 8;
2011 void OptimizeShadowIV();
2014 void OptimizeLoopTermCond();
2018 void FinalizeChain(IVChain &Chain);
2019 void CollectChains();
2020 void GenerateIVChain(
const IVChain &Chain,
2023 void CollectInterestingTypesAndFactors();
2024 void CollectFixupsAndInitialFormulae();
2030 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
2031 LSRUse::KindType Kind, MemAccessTy AccessTy);
2033 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2034 MemAccessTy AccessTy);
2036 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2038 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2040 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2041 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2042 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2043 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2045 void CollectLoopInvariantFixupsAndFormulae();
2047 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2048 unsigned Depth = 0);
2050 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2052 size_t Idx,
bool IsScaledReg =
false);
2053 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2054 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2055 const Formula &
Base,
size_t Idx,
2056 bool IsScaledReg =
false);
2057 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2058 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2059 const Formula &
Base,
2061 size_t Idx,
bool IsScaledReg =
false);
2062 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2063 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2064 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2065 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2066 void GenerateCrossUseConstantOffsets();
2067 void GenerateAllReuseFormulae();
2069 void FilterOutUndesirableDedicatedRegisters();
2071 size_t EstimateSearchSpaceComplexity()
const;
2072 void NarrowSearchSpaceByDetectingSupersets();
2073 void NarrowSearchSpaceByCollapsingUnrolledCode();
2074 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2075 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2076 void NarrowSearchSpaceByFilterPostInc();
2077 void NarrowSearchSpaceByDeletingCostlyFormulas();
2078 void NarrowSearchSpaceByPickingWinnerRegs();
2079 void NarrowSearchSpaceUsingHeuristics();
2084 const Cost &CurCost,
2094 const LSRUse &LU)
const;
2096 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2099 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2102 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2111 bool getChanged()
const {
return Changed; }
2113 return ScalarEvolutionIVs;
2127void LSRInstance::OptimizeShadowIV() {
2129 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2137 Type *DestTy =
nullptr;
2138 bool IsSigned =
false;
2152 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2154 DestTy = UCast->getDestTy();
2156 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2158 DestTy = SCast->getDestTy();
2160 if (!DestTy)
continue;
2180 if (Mantissa == -1)
continue;
2184 unsigned Entry, Latch;
2194 if (!
Init)
continue;
2196 (
double)
Init->getSExtValue() :
2197 (
double)
Init->getZExtValue());
2201 if (!Incr)
continue;
2202 if (Incr->
getOpcode() != Instruction::Add
2203 && Incr->
getOpcode() != Instruction::Sub)
2219 if (!
C->getValue().isStrictlyPositive())
continue;
2228 Instruction::FAdd : Instruction::FSub,
2229 NewPH, CFP,
"IV.S.next.", Incr);
2246 if (
U.getUser() ==
Cond) {
2314 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2319 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2320 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2327 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2328 Pred = ICmpInst::ICMP_SLE;
2330 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2331 Pred = ICmpInst::ICMP_SLT;
2333 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2334 Pred = ICmpInst::ICMP_ULT;
2343 if (
Max->getNumOperands() != 2)
2346 const SCEV *MaxLHS =
Max->getOperand(0);
2347 const SCEV *MaxRHS =
Max->getOperand(1);
2352 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2365 "Loop condition operand is an addrec in a different loop!");
2369 Value *NewRHS =
nullptr;
2370 if (ICmpInst::isTrueWhenEqual(Pred)) {
2373 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2374 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2375 NewRHS = BO->getOperand(0);
2377 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2378 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2379 NewRHS = BO->getOperand(0);
2386 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2387 NewRHS = SU->getValue();
2404 Cond->replaceAllUsesWith(NewCond);
2407 Cond->eraseFromParent();
2409 if (
Cmp->use_empty())
2410 Cmp->eraseFromParent();
2416LSRInstance::OptimizeLoopTermCond() {
2433 L->getExitingBlocks(ExitingBlocks);
2441 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2447 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2457 if (!FindIVUserForCond(
Cond, CondUse))
2471 if (!DT.dominates(ExitingBlock, LatchBlock))
2476 if (LatchBlock != ExitingBlock)
2480 if (&*UI != CondUse &&
2481 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2484 const SCEV *
A = IU.getStride(*CondUse, L);
2485 const SCEV *
B = IU.getStride(*UI, L);
2486 if (!
A || !
B)
continue;
2499 if (
C->isOne() ||
C->isMinusOne())
2500 goto decline_post_inc;
2502 if (
C->getValue().getSignificantBits() >= 64 ||
2503 C->getValue().isMinSignedValue())
2504 goto decline_post_inc;
2508 TTI, UI->getUser(), UI->getOperandValToReplace());
2509 int64_t Scale =
C->getSExtValue();
2513 AccessTy.AddrSpace))
2514 goto decline_post_inc;
2519 AccessTy.AddrSpace))
2520 goto decline_post_inc;
2525 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2531 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2532 if (
Cond->hasOneUse()) {
2533 Cond->moveBefore(TermBr);
2537 Cond = cast<ICmpInst>(
Cond->clone());
2538 Cond->setName(
L->getHeader()->getName() +
".termcond");
2560 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2562 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2567bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2568 bool HasBaseReg, LSRUse::KindType Kind,
2569 MemAccessTy AccessTy) {
2570 int64_t NewMinOffset = LU.MinOffset;
2571 int64_t NewMaxOffset = LU.MaxOffset;
2572 MemAccessTy NewAccessTy = AccessTy;
2577 if (LU.Kind != Kind)
2583 if (Kind == LSRUse::Address) {
2584 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2585 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2586 AccessTy.AddrSpace);
2591 if (NewOffset < LU.MinOffset) {
2593 LU.MaxOffset - NewOffset, HasBaseReg))
2595 NewMinOffset = NewOffset;
2596 }
else if (NewOffset > LU.MaxOffset) {
2598 NewOffset - LU.MinOffset, HasBaseReg))
2600 NewMaxOffset = NewOffset;
2604 LU.MinOffset = NewMinOffset;
2605 LU.MaxOffset = NewMaxOffset;
2606 LU.AccessTy = NewAccessTy;
2613std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2614 LSRUse::KindType Kind,
2615 MemAccessTy AccessTy) {
2626 std::pair<UseMapTy::iterator, bool>
P =
2630 size_t LUIdx =
P.first->second;
2631 LSRUse &LU =
Uses[LUIdx];
2632 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2634 return std::make_pair(LUIdx,
Offset);
2638 size_t LUIdx =
Uses.size();
2639 P.first->second = LUIdx;
2640 Uses.push_back(LSRUse(Kind, AccessTy));
2641 LSRUse &LU =
Uses[LUIdx];
2645 return std::make_pair(LUIdx,
Offset);
2649void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2650 if (&LU != &
Uses.back())
2655 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2661LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2662 const LSRUse &OrigLU) {
2664 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
2665 LSRUse &LU =
Uses[LUIdx];
2671 if (&LU != &OrigLU &&
2672 LU.Kind != LSRUse::ICmpZero &&
2673 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2674 LU.WidestFixupType == OrigLU.WidestFixupType &&
2675 LU.HasFormulaWithSameRegs(OrigF)) {
2677 for (
const Formula &
F : LU.Formulae) {
2680 if (
F.BaseRegs == OrigF.BaseRegs &&
2681 F.ScaledReg == OrigF.ScaledReg &&
2682 F.BaseGV == OrigF.BaseGV &&
2683 F.Scale == OrigF.Scale &&
2684 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2685 if (
F.BaseOffset == 0)
2700void LSRInstance::CollectInterestingTypesAndFactors() {
2706 const SCEV *Expr = IU.getExpr(U);
2722 }
while (!Worklist.
empty());
2729 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2730 const SCEV *OldStride = *
I;
2731 const SCEV *NewStride = *NewStrideIter;
2742 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2744 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2745 Factors.insert(Factor->getAPInt().getSExtValue());
2750 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2751 Factors.insert(Factor->getAPInt().getSExtValue());
2757 if (
Types.size() == 1)
2769 for(; OI != OE; ++OI) {
2770 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2775 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2787 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2788 return Trunc->getOperand(0);
2822 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2824 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2826 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2833 if (SubExpr->getSCEVType() ==
scAddExpr)
2836 if (SubExpr->getSCEVType() !=
scMulExpr)
2842 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2852bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2853 const SCEV *IncExpr,
2861 if (!isa<SCEVConstant>(IncExpr)) {
2863 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2888 if (!Chain.hasIncs())
2891 if (!
Users.empty()) {
2892 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2894 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
2897 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2905 if (isa<PHINode>(Chain.tailUserInst())
2906 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2909 const SCEV *LastIncExpr =
nullptr;
2910 unsigned NumConstIncrements = 0;
2911 unsigned NumVarIncrements = 0;
2912 unsigned NumReusedIncrements = 0;
2917 for (
const IVInc &Inc : Chain) {
2920 if (Inc.IncExpr->isZero())
2925 if (isa<SCEVConstant>(Inc.IncExpr)) {
2926 ++NumConstIncrements;
2930 if (Inc.IncExpr == LastIncExpr)
2931 ++NumReusedIncrements;
2935 LastIncExpr = Inc.IncExpr;
2940 if (NumConstIncrements > 1)
2947 cost += NumVarIncrements;
2951 cost -= NumReusedIncrements;
2953 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2970 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2971 const SCEV *LastIncExpr =
nullptr;
2972 for (; ChainIdx < NChains; ++ChainIdx) {
2973 IVChain &Chain = IVChainVec[ChainIdx];
2987 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2993 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
2996 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2997 LastIncExpr = IncExpr;
3003 if (ChainIdx == NChains) {
3004 if (isa<PHINode>(UserInst))
3010 LastIncExpr = OperExpr;
3014 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3017 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3019 ChainUsersVec.
resize(NChains);
3020 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3021 <<
") IV=" << *LastIncExpr <<
"\n");
3023 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3024 <<
") IV+" << *LastIncExpr <<
"\n");
3026 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3028 IVChain &Chain = IVChainVec[ChainIdx];
3032 if (!LastIncExpr->
isZero()) {
3033 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3049 IVChain::const_iterator IncIter = Chain.Incs.begin();
3050 IVChain::const_iterator IncEnd = Chain.Incs.end();
3051 for( ; IncIter != IncEnd; ++IncIter) {
3052 if (IncIter->UserInst == OtherUse)
3055 if (IncIter != IncEnd)
3059 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3060 && IU.isIVUserOrOperand(OtherUse)) {
3063 NearUsers.
insert(OtherUse);
3068 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3093void LSRInstance::CollectChains() {
3099 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3100 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3109 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3119 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3120 ChainIdx < NChains; ++ChainIdx) {
3121 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3127 while (IVOpIter != IVOpEnd) {
3128 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3129 if (UniqueOperands.
insert(IVOpInst).second)
3130 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3131 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3136 for (
PHINode &PN :
L->getHeader()->phis()) {
3141 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3143 ChainInstruction(&PN, IncV, ChainUsersVec);
3146 unsigned ChainIdx = 0;
3147 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3148 UsersIdx < NChains; ++UsersIdx) {
3150 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3153 if (ChainIdx != UsersIdx)
3154 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3155 FinalizeChain(IVChainVec[ChainIdx]);
3158 IVChainVec.resize(ChainIdx);
3161void LSRInstance::FinalizeChain(IVChain &Chain) {
3162 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3163 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3165 for (
const IVInc &Inc : Chain) {
3167 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3168 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3169 IVIncSet.insert(UseI);
3176 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3194void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3198 const IVInc &Head = Chain.Incs[0];
3203 Value *IVSrc =
nullptr;
3204 while (IVOpIter != IVOpEnd) {
3215 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3216 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3219 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3221 if (IVOpIter == IVOpEnd) {
3223 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3226 assert(IVSrc &&
"Failed to find IV chain source");
3231 const SCEV *LeftOverExpr =
nullptr;
3232 for (
const IVInc &Inc : Chain) {
3234 if (isa<PHINode>(InsertPt))
3235 InsertPt =
L->getLoopLatch()->getTerminator();
3239 Value *IVOper = IVSrc;
3240 if (!Inc.IncExpr->isZero()) {
3244 LeftOverExpr = LeftOverExpr ?
3245 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3247 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
3250 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3253 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3257 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3259 LeftOverExpr =
nullptr;
3262 Type *OperTy = Inc.IVOperand->getType();
3263 if (IVTy != OperTy) {
3265 "cannot extend a chained IV");
3267 IVOper =
Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3269 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3270 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3275 if (isa<PHINode>(Chain.tailUserInst())) {
3276 for (
PHINode &Phi :
L->getHeader()->phis()) {
3280 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3283 Value *IVOper = IVSrc;
3285 if (IVTy != PostIncTy) {
3289 IVOper =
Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3291 Phi.replaceUsesOfWith(PostIncV, IVOper);
3297void LSRInstance::CollectFixupsAndInitialFormulae() {
3299 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3311 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3312 if (IVIncSet.count(UseI)) {
3313 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3317 LSRUse::KindType
Kind = LSRUse::Basic;
3318 MemAccessTy AccessTy;
3320 Kind = LSRUse::Address;
3324 const SCEV *S = IU.getExpr(U);
3333 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3336 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3338 if (CI->isEquality()) {
3341 Value *
NV = CI->getOperand(1);
3342 if (NV ==
U.getOperandValToReplace()) {
3343 CI->setOperand(1, CI->getOperand(0));
3344 CI->setOperand(0, NV);
3345 NV = CI->getOperand(1);
3352 (!
NV->getType()->isPointerTy() ||
3357 Kind = LSRUse::ICmpZero;
3359 }
else if (
L->isLoopInvariant(NV) &&
3360 (!isa<Instruction>(NV) ||
3361 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3362 !
NV->getType()->isPointerTy()) {
3371 Kind = LSRUse::ICmpZero;
3373 assert(!isa<SCEVCouldNotCompute>(S));
3378 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3379 if (Factors[i] != -1)
3380 Factors.insert(-(
uint64_t)Factors[i]);
3386 std::pair<size_t, int64_t>
P = getUse(S, Kind, AccessTy);
3387 size_t LUIdx =
P.first;
3389 LSRUse &LU =
Uses[LUIdx];
3392 LSRFixup &LF = LU.getNewFixup();
3393 LF.UserInst = UserInst;
3394 LF.OperandValToReplace =
U.getOperandValToReplace();
3395 LF.PostIncLoops = TmpPostIncLoops;
3397 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3400 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3402 F.initialMatch(S, L, SE);
3403 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3404 VisitedLSRUse.
insert(LUIdx);
3407 if (!LU.WidestFixupType ||
3410 LU.WidestFixupType = LF.OperandValToReplace->getType();
3413 if (LU.Formulae.empty()) {
3414 InsertInitialFormula(S, LU, LUIdx);
3415 CountRegisters(LU.Formulae.back(), LUIdx);
3424void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3428 LU.RigidFormula =
true;
3431 F.initialMatch(S, L, SE);
3432 bool Inserted = InsertFormula(LU, LUIdx,
F);
3433 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3439LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3440 LSRUse &LU,
size_t LUIdx) {
3442 F.BaseRegs.push_back(S);
3443 F.HasBaseReg =
true;
3444 bool Inserted = InsertFormula(LU, LUIdx,
F);
3445 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3449void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3451 RegUses.countRegister(
F.ScaledReg, LUIdx);
3452 for (
const SCEV *BaseReg :
F.BaseRegs)
3453 RegUses.countRegister(BaseReg, LUIdx);
3458bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3461 "Formula is illegal");
3463 if (!LU.InsertFormula(
F, *L))
3466 CountRegisters(
F, LUIdx);
3476LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3480 while (!Worklist.
empty()) {
3484 if (!Visited.
insert(S).second)
3491 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3494 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3495 const Value *
V = US->getValue();
3496 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3498 if (
L->contains(Inst))
continue;
3499 }
else if (isa<UndefValue>(V))
3502 for (
const Use &U :
V->uses()) {
3503 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3515 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3517 cast<PHINode>(UserInst)->getIncomingBlock(
3519 if (!DT.dominates(
L->getHeader(), UseBB))
3532 if (isa<PHINode>(UserInst)) {
3533 const auto *PhiNode = cast<PHINode>(UserInst);
3534 bool HasIncompatibleEHPTerminatedBlock =
false;
3536 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3537 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3538 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3539 HasIncompatibleEHPTerminatedBlock =
true;
3544 if (HasIncompatibleEHPTerminatedBlock) {
3557 if (!isa<SCEVUnknown>(UserS))
3566 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3567 unsigned OtherIdx = !
U.getOperandNo();
3568 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3573 std::pair<size_t, int64_t>
P = getUse(
3574 S, LSRUse::Basic, MemAccessTy());
3575 size_t LUIdx =
P.first;
3577 LSRUse &LU =
Uses[LUIdx];
3578 LSRFixup &LF = LU.getNewFixup();
3579 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3580 LF.OperandValToReplace =
U;
3582 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3583 if (!LU.WidestFixupType ||
3586 LU.WidestFixupType = LF.OperandValToReplace->getType();
3587 InsertSupplementalFormula(US, LU, LUIdx);
3588 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3604 unsigned Depth = 0) {
3611 for (
const SCEV *S :
Add->operands()) {
3617 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3626 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3628 Remainder =
nullptr;
3646 const SCEV *Remainder =
3659 LSRUse &LU,
const SCEV *S,
const Loop *L,
3661 if (LU.Kind != LSRUse::Address ||
3662 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3668 if (!isa<SCEVConstant>(LoopStep))
3674 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3681void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3682 const Formula &
Base,
3685 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3697 if (AddOps.
size() == 1)
3711 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3717 InnerAddOps.append(std::next(J),
3722 if (InnerAddOps.size() == 1 &&
3724 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3733 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3740 F.ScaledReg =
nullptr;
3742 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3743 }
else if (IsScaledReg)
3744 F.ScaledReg = InnerSum;
3746 F.BaseRegs[
Idx] = InnerSum;
3752 SC->getValue()->getZExtValue()))
3754 (
uint64_t)
F.UnfoldedOffset +
SC->getValue()->getZExtValue();
3756 F.BaseRegs.push_back(*J);
3761 if (InsertFormula(LU, LUIdx,
F))
3768 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3774void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3776 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
3781 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3782 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
3784 if (
Base.Scale == 1)
3785 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
3791void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3794 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
3795 (
Base.UnfoldedOffset != 0) <= 1)
3802 Formula NewBase =
Base;
3803 NewBase.BaseRegs.clear();
3804 Type *CombinedIntegerType =
nullptr;
3805 for (
const SCEV *BaseReg :
Base.BaseRegs) {
3808 if (!CombinedIntegerType)
3813 NewBase.BaseRegs.push_back(BaseReg);
3817 if (Ops.
size() == 0)
3822 auto GenerateFormula = [&](
const SCEV *Sum) {
3823 Formula
F = NewBase;
3831 F.BaseRegs.push_back(Sum);
3833 (void)InsertFormula(LU, LUIdx,
F);
3837 if (Ops.
size() > 1) {
3844 if (NewBase.UnfoldedOffset) {
3845 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
3848 NewBase.UnfoldedOffset = 0;
3854void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3855 const Formula &
Base,
size_t Idx,
3859 if (
G->isZero() || !GV)
3863 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
3868 F.BaseRegs[
Idx] =
G;
3869 (void)InsertFormula(LU, LUIdx,
F);
3873void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3876 if (
Base.BaseGV)
return;
3878 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3879 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
3880 if (
Base.Scale == 1)
3881 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
3886void LSRInstance::GenerateConstantOffsetsImpl(
3887 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
3890 auto GenerateOffset = [&](
const SCEV *
G, int64_t
Offset) {
3894 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
3901 F.ScaledReg =
nullptr;
3903 F.deleteBaseReg(
F.BaseRegs[
Idx]);
3905 }
else if (IsScaledReg)
3908 F.BaseRegs[
Idx] = NewG;
3910 (void)InsertFormula(LU, LUIdx,
F);
3925 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
3927 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
3928 const APInt &StepInt = StepRec->getAPInt();
3932 for (int64_t
Offset : Worklist) {
3939 for (int64_t
Offset : Worklist)
3943 if (
G->isZero() || Imm == 0)
3952 F.BaseRegs[
Idx] =
G;
3957 (void)InsertFormula(LU, LUIdx,
F);
3961void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3967 if (LU.MaxOffset != LU.MinOffset)
3970 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3971 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
3972 if (
Base.Scale == 1)
3973 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
3979void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3981 if (LU.Kind != LSRUse::ICmpZero)
return;
3989 if (LU.MinOffset != LU.MaxOffset)
return;
3992 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
3994 for (
const SCEV *BaseReg :
Base.BaseRegs)
3997 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4000 for (int64_t Factor : Factors) {
4005 if (
Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
4007 int64_t NewBaseOffset = (
uint64_t)
Base.BaseOffset * Factor;
4008 assert(Factor != 0 &&
"Zero factor not expected!");
4009 if (NewBaseOffset / Factor !=
Base.BaseOffset)
4017 int64_t
Offset = LU.MinOffset;
4018 if (
Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
4021 if (
Offset / Factor != LU.MinOffset)
4029 F.BaseOffset = NewBaseOffset;
4041 for (
size_t i = 0, e =
F.BaseRegs.size(); i !=
e; ++i) {
4055 if (
F.UnfoldedOffset != 0) {
4056 if (
F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
4059 F.UnfoldedOffset = (
uint64_t)
F.UnfoldedOffset * Factor;
4060 if (
F.UnfoldedOffset / Factor !=
Base.UnfoldedOffset)
4069 (void)InsertFormula(LU, LUIdx,
F);
4076void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4083 if (
Base.Scale != 0 && !
Base.unscale())
4086 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4089 for (int64_t Factor : Factors) {
4090 Base.Scale = Factor;
4091 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4093 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4097 if (LU.Kind == LSRUse::Basic &&
4098 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4099 LU.AccessTy,
Base) &&
4100 LU.AllFixupsOutsideLoop)
4101 LU.Kind = LSRUse::Special;
4107 if (LU.Kind == LSRUse::ICmpZero &&
4108 !
Base.HasBaseReg &&
Base.BaseOffset == 0 && !
Base.BaseGV)
4111 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4113 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4120 if (!Quotient->isZero()) {
4123 F.ScaledReg = Quotient;
4124 F.deleteBaseReg(
F.BaseRegs[i]);
4128 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4129 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4133 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4135 (void)InsertFormula(LU, LUIdx,
F);
4143void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4145 if (
Base.BaseGV)
return;
4155 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4158 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4161 for (
Type *SrcTy : Types) {
4171 if (NewScaledReg->
isZero())
4173 F.ScaledReg = NewScaledReg;
4175 bool HasZeroBaseReg =
false;
4176 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4178 if (NewBaseReg->
isZero()) {
4179 HasZeroBaseReg =
true;
4182 BaseReg = NewBaseReg;
4189 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4193 (void)InsertFormula(LU, LUIdx,
F);
4206 const SCEV *OrigReg;
4208 WorkItem(
size_t LI, int64_t
I,
const SCEV *R)
4209 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4219 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4220 <<
" , add offset " <<
Imm;
4230void LSRInstance::GenerateCrossUseConstantOffsets() {
4232 using ImmMapTy = std::map<int64_t, const SCEV *>;
4237 for (
const SCEV *
Use : RegUses) {
4240 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4243 Pair.first->second.insert(std::make_pair(Imm,
Use));
4244 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4252 for (
const SCEV *Reg : Sequence) {
4253 const ImmMapTy &Imms =
Map.find(Reg)->second;
4256 if (Imms.size() == 1)
4259 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4260 for (
const auto &Entry
4262 <<
' ' << Entry.first;
4266 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4268 const SCEV *OrigReg = J->second;
4270 int64_t JImm = J->first;
4271 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4273 if (!isa<SCEVConstant>(OrigReg) &&
4274 UsedByIndicesMap[Reg].
count() == 1) {
4282 int64_t
First = Imms.begin()->first;
4283 int64_t
Last = std::prev(Imms.end())->first;
4290 ImmMapTy::const_iterator OtherImms[] = {
4291 Imms.begin(), std::prev(Imms.end()),
4292 Imms.lower_bound(Avg)};
4293 for (
const auto &M : OtherImms) {
4294 if (M == J || M == JE)
continue;
4300 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4301 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4308 UsedByIndicesMap.
clear();
4309 UniqueItems.
clear();
4312 for (
const WorkItem &WI : WorkItems) {
4313 size_t LUIdx = WI.LUIdx;
4314 LSRUse &LU =
Uses[LUIdx];
4315 int64_t
Imm = WI.Imm;
4316 const SCEV *OrigReg = WI.OrigReg;
4323 for (
size_t L = 0, LE = LU.Formulae.size();
L !=
LE; ++
L) {
4324 Formula
F = LU.Formulae[
L];
4331 if (
F.ScaledReg == OrigReg) {
4338 NewF.BaseOffset =
Offset;
4339 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4342 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4347 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
4348 if (
C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
4350 .ule(std::abs(NewF.BaseOffset)))
4354 NewF.canonicalize(*this->L);
4355 (void)InsertFormula(LU, LUIdx, NewF);
4358 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4359 const SCEV *BaseReg =
F.BaseRegs[
N];
4360 if (BaseReg != OrigReg)
4363 NewF.BaseOffset = (
uint64_t)NewF.BaseOffset + Imm;
4365 LU.Kind, LU.AccessTy, NewF)) {
4372 NewF.UnfoldedOffset = (
uint64_t)NewF.UnfoldedOffset + Imm;
4374 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4379 for (
const SCEV *NewReg : NewF.BaseRegs)
4381 if ((
C->getAPInt() + NewF.BaseOffset)
4383 .slt(std::abs(NewF.BaseOffset)) &&
4385 (
unsigned)llvm::countr_zero<uint64_t>(NewF.BaseOffset))
4389 NewF.canonicalize(*this->L);
4390 (void)InsertFormula(LU, LUIdx, NewF);
4401LSRInstance::GenerateAllReuseFormulae() {
4404 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4405 LSRUse &LU =
Uses[LUIdx];
4406 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4407 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4408 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4409 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4411 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4412 LSRUse &LU =
Uses[LUIdx];
4413 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4414 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4415 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4416 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4417 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4418 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4419 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4420 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4422 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4423 LSRUse &LU =
Uses[LUIdx];
4424 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4425 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4428 GenerateCrossUseConstantOffsets();
4431 "After generating reuse formulae:\n";
4432 print_uses(
dbgs()));
4437void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4442 bool ChangedFormulae =
false;
4447 using BestFormulaeTy =
4450 BestFormulaeTy BestFormulae;
4452 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4453 LSRUse &LU =
Uses[LUIdx];
4458 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4459 FIdx != NumForms; ++FIdx) {
4460 Formula &
F = LU.Formulae[FIdx];
4471 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4472 if (CostF.isLoser()) {
4484 for (
const SCEV *Reg :
F.BaseRegs) {
4485 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4489 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4490 Key.push_back(
F.ScaledReg);
4495 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4496 BestFormulae.insert(std::make_pair(Key, FIdx));
4500 Formula &Best = LU.Formulae[
P.first->second];
4502 Cost CostBest(L, SE,
TTI, AMK);
4504 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4505 if (CostF.isLess(CostBest))
4509 " in favor of formula ";
4510 Best.print(
dbgs());
dbgs() <<
'\n');
4513 ChangedFormulae =
true;
4515 LU.DeleteFormula(
F);
4523 LU.RecomputeRegs(LUIdx, RegUses);
4526 BestFormulae.clear();
4531 "After filtering out undesirable candidates:\n";
4539size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4541 for (
const LSRUse &LU :
Uses) {
4542 size_t FSize = LU.Formulae.size();
4557void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4561 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4562 "which use a superset of registers used by other "
4565 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4566 LSRUse &LU =
Uses[LUIdx];
4568 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4569 Formula &
F = LU.Formulae[i];
4574 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4579 NewF.BaseOffset += (
uint64_t)
C->getValue()->getSExtValue();
4580 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4581 (
I -
F.BaseRegs.begin()));
4582 if (LU.HasFormulaWithSameRegs(NewF)) {
4585 LU.DeleteFormula(
F);
4591 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4592 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4596 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4597 (
I -
F.BaseRegs.begin()));
4598 if (LU.HasFormulaWithSameRegs(NewF)) {
4601 LU.DeleteFormula(
F);
4612 LU.RecomputeRegs(LUIdx, RegUses);
4621void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4626 dbgs() <<
"The search space is too complex.\n"
4627 "Narrowing the search space by assuming that uses separated "
4628 "by a constant offset will use the same registers.\n");
4632 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4633 LSRUse &LU =
Uses[LUIdx];
4634 for (
const Formula &
F : LU.Formulae) {
4635 if (
F.BaseOffset == 0 || (
F.Scale != 0 &&
F.Scale != 1))
4638 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4642 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4643 LU.Kind, LU.AccessTy))
4648 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4651 for (LSRFixup &
Fixup : LU.Fixups) {
4652 Fixup.Offset +=
F.BaseOffset;
4653 LUThatHas->pushFixup(
Fixup);
4659 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4660 Formula &
F = LUThatHas->Formulae[i];
4661 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4662 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4664 LUThatHas->DeleteFormula(
F);
4672 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4675 DeleteUse(LU, LUIdx);
4688void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4692 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4693 "undesirable dedicated registers.\n");
4695 FilterOutUndesirableDedicatedRegisters();
4710void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4715 dbgs() <<
"The search space is too complex.\n"
4716 "Narrowing the search space by choosing the best Formula "
4717 "from the Formulae with the same Scale and ScaledReg.\n");
4722 BestFormulaeTy BestFormulae;
4724 bool ChangedFormulae =
false;
4729 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4730 LSRUse &LU =
Uses[LUIdx];
4735 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4740 size_t FARegNum = 0;
4741 for (
const SCEV *Reg : FA.BaseRegs) {
4742 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4743 FARegNum += (NumUses - UsedByIndices.
count() + 1);
4745 size_t FBRegNum = 0;
4746 for (
const SCEV *Reg : FB.BaseRegs) {
4747 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4748 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
4750 if (FARegNum != FBRegNum)
4751 return FARegNum < FBRegNum;
4758 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
4760 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
4761 return CostFA.isLess(CostFB);
4765 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4767 Formula &
F = LU.Formulae[FIdx];
4770 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
4774 Formula &Best = LU.Formulae[
P.first->second];
4775 if (IsBetterThan(
F, Best))
4779 " in favor of formula ";
4780 Best.print(
dbgs());
dbgs() <<
'\n');
4782 ChangedFormulae =
true;
4784 LU.DeleteFormula(
F);
4790 LU.RecomputeRegs(LUIdx, RegUses);
4793 BestFormulae.clear();
4798 "After filtering out undesirable candidates:\n";
4805void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
4812 "Narrowing the search space by choosing the lowest "
4813 "register Formula for PostInc Uses.\n");
4815 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4816 LSRUse &LU =
Uses[LUIdx];
4818 if (LU.Kind != LSRUse::Address)
4824 size_t MinRegs = std::numeric_limits<size_t>::max();
4825 for (
const Formula &
F : LU.Formulae)
4826 MinRegs = std::min(
F.getNumRegs(), MinRegs);
4829 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4831 Formula &
F = LU.Formulae[FIdx];
4832 if (
F.getNumRegs() > MinRegs) {
4835 LU.DeleteFormula(
F);
4842 LU.RecomputeRegs(LUIdx, RegUses);
4893void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4906 DenseMap <const SCEV *, float> RegNumMap;
4907 for (
const SCEV *Reg : RegUses) {
4908 if (UniqRegs.
count(Reg))
4911 for (
const LSRUse &LU :
Uses) {
4912 if (!LU.Regs.count(Reg))
4914 float P = LU.getNotSelectedProbability(Reg);
4920 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
4924 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
4927 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4928 LSRUse &LU =
Uses[LUIdx];
4930 if (LU.Formulae.size() < 2)
4935 float FMinRegNum = LU.Formulae[0].getNumRegs();
4936 float FMinARegNum = LU.Formulae[0].getNumRegs();
4938 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4939 Formula &
F = LU.Formulae[i];
4942 for (
const SCEV *BaseReg :
F.BaseRegs) {
4943 if (UniqRegs.
count(BaseReg))
4945 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4946 if (isa<SCEVAddRecExpr>(BaseReg))
4948 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4950 if (
const SCEV *ScaledReg =
F.ScaledReg) {
4951 if (!UniqRegs.
count(ScaledReg)) {
4953 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4954 if (isa<SCEVAddRecExpr>(ScaledReg))
4956 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);