84#include "llvm/Config/llvm-config.h"
132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
174 "Don't prefer any addressing mode"),
177 "Prefer pre-indexed addressing mode"),
180 "Prefer post-indexed addressing mode")));
184 cl::init(std::numeric_limits<uint16_t>::max()),
185 cl::desc(
"LSR search space complexity limit"));
189 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
193 cl::desc(
"Attempt to drop solution if it is less profitable"));
197 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
201 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
207 cl::desc(
"Stress test LSR IV chains"));
216 static const unsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
219 Type *MemTy =
nullptr;
220 unsigned AddrSpace = UnknownAddressSpace;
222 MemAccessTy() =
default;
223 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
226 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
232 unsigned AS = UnknownAddressSpace) {
253 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
254 : FixedOrScalableQuantity(MinVal, Scalable) {}
256 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
257 : FixedOrScalableQuantity(
V) {}
260 constexpr Immediate() =
delete;
262 static constexpr Immediate getFixed(ScalarTy MinVal) {
263 return {MinVal,
false};
265 static constexpr Immediate getScalable(ScalarTy MinVal) {
266 return {MinVal,
true};
268 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
269 return {MinVal, Scalable};
271 static constexpr Immediate getZero() {
return {0,
false}; }
272 static constexpr Immediate getFixedMin() {
273 return {std::numeric_limits<int64_t>::min(),
false};
275 static constexpr Immediate getFixedMax() {
276 return {std::numeric_limits<int64_t>::max(),
false};
278 static constexpr Immediate getScalableMin() {
279 return {std::numeric_limits<int64_t>::min(),
true};
281 static constexpr Immediate getScalableMax() {
282 return {std::numeric_limits<int64_t>::max(),
true};
285 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
287 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
289 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
290 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
293 constexpr bool isMin()
const {
294 return Quantity == std::numeric_limits<ScalarTy>::min();
297 constexpr bool isMax()
const {
298 return Quantity == std::numeric_limits<ScalarTy>::max();
302 constexpr Immediate addUnsigned(
const Immediate &RHS)
const {
303 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
305 return {
Value, Scalable ||
RHS.isScalable()};
308 constexpr Immediate subUnsigned(
const Immediate &RHS)
const {
309 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
311 return {
Value, Scalable ||
RHS.isScalable()};
315 constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const {
317 return {
Value, Scalable};
347struct KeyOrderTargetImmediate {
348 bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const {
349 if (
LHS.isScalable() && !
RHS.isScalable())
351 if (!
LHS.isScalable() &&
RHS.isScalable())
353 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
360struct KeyOrderSizeTAndImmediate {
361 bool operator()(
const std::pair<size_t, Immediate> &LHS,
362 const std::pair<size_t, Immediate> &RHS)
const {
363 size_t LSize =
LHS.first;
364 size_t RSize =
RHS.first;
366 return LSize < RSize;
367 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
374 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
388 RegUsesTy RegUsesMap;
392 void countRegister(
const SCEV *Reg,
size_t LUIdx);
393 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
394 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
396 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
414RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
415 std::pair<RegUsesTy::iterator, bool> Pair =
416 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
421 RSD.UsedByIndices.set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
429 assert(RSD.UsedByIndices.size() > LUIdx);
430 RSD.UsedByIndices.reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
451 if (
I == RegUsesMap.end())
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
461 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 size_t getNumRegs()
const;
528 void deleteBaseReg(
const SCEV *&S);
530 bool referencesReg(
const SCEV *S)
const;
531 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
532 const RegUseTracker &RegUses)
const;
553 for (
const SCEV *S :
Add->operands())
560 if (!AR->getStart()->isZero() && AR->isAffine()) {
563 AR->getStepRecurrence(SE),
579 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
581 for (
const SCEV *S : MyGood)
583 for (
const SCEV *S : MyBad)
602 BaseRegs.push_back(Sum);
608 BaseRegs.push_back(Sum);
616 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
623bool Formula::isCanonical(
const Loop &L)
const {
624 assert((Scale == 0 || ScaledReg) &&
625 "ScaledReg must be non-null if Scale is non-zero");
628 return BaseRegs.size() <= 1;
633 if (Scale == 1 && BaseRegs.empty())
653void Formula::canonicalize(
const Loop &L) {
657 if (BaseRegs.empty()) {
659 assert(ScaledReg &&
"Expected 1*reg => reg");
660 assert(Scale == 1 &&
"Expected 1*reg => reg");
661 BaseRegs.push_back(ScaledReg);
669 ScaledReg = BaseRegs.pop_back_val();
680 if (
I != BaseRegs.end())
690bool Formula::unscale() {
694 BaseRegs.push_back(ScaledReg);
699bool Formula::hasZeroEnd()
const {
700 if (UnfoldedOffset || BaseOffset)
702 if (BaseRegs.size() != 1 || ScaledReg)
709size_t Formula::getNumRegs()
const {
710 return !!ScaledReg + BaseRegs.size();
715Type *Formula::getType()
const {
716 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
717 ScaledReg ? ScaledReg->getType() :
718 BaseGV ? BaseGV->getType() :
723void Formula::deleteBaseReg(
const SCEV *&S) {
724 if (&S != &BaseRegs.back())
730bool Formula::referencesReg(
const SCEV *S)
const {
736bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
737 const RegUseTracker &RegUses)
const {
739 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
741 for (
const SCEV *BaseReg : BaseRegs)
742 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
752 BaseGV->printAsOperand(
OS,
false);
754 if (BaseOffset.isNonZero()) {
758 for (
const SCEV *BaseReg : BaseRegs) {
760 OS <<
"reg(" << *BaseReg <<
')';
762 if (HasBaseReg && BaseRegs.empty()) {
764 OS <<
"**error: HasBaseReg**";
765 }
else if (!HasBaseReg && !BaseRegs.empty()) {
767 OS <<
"**error: !HasBaseReg**";
771 OS << Scale <<
"*reg(";
778 if (UnfoldedOffset.isNonZero()) {
780 OS <<
"imm(" << UnfoldedOffset <<
')';
821 bool IgnoreSignificantBits =
false) {
832 if (
RA.isAllOnes()) {
846 const APInt &LA =
C->getAPInt();
855 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
857 IgnoreSignificantBits);
858 if (!Step)
return nullptr;
860 IgnoreSignificantBits);
861 if (!Start)
return nullptr;
874 for (
const SCEV *S :
Add->operands()) {
876 if (!
Op)
return nullptr;
892 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
907 IgnoreSignificantBits)) {
926 if (
C->getAPInt().getSignificantBits() <= 64) {
928 return Immediate::getFixed(
C->getValue()->getSExtValue());
933 if (Result.isNonZero())
936 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
939 if (Result.isNonZero())
944 }
else if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
946 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(M->getOperand(0)))
947 if (isa<SCEVVScale>(M->getOperand(1))) {
949 return Immediate::getScalable(
C->getValue()->getSExtValue());
953 return Immediate::getZero();
959 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
960 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
970 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
986 bool isAddress = isa<LoadInst>(Inst);
987 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
988 if (SI->getPointerOperand() == OperandVal)
993 switch (
II->getIntrinsicID()) {
994 case Intrinsic::memset:
995 case Intrinsic::prefetch:
996 case Intrinsic::masked_load:
997 if (
II->getArgOperand(0) == OperandVal)
1000 case Intrinsic::masked_store:
1001 if (
II->getArgOperand(1) == OperandVal)
1004 case Intrinsic::memmove:
1005 case Intrinsic::memcpy:
1006 if (
II->getArgOperand(0) == OperandVal ||
1007 II->getArgOperand(1) == OperandVal)
1013 if (IntrInfo.
PtrVal == OperandVal)
1018 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1019 if (RMW->getPointerOperand() == OperandVal)
1022 if (CmpX->getPointerOperand() == OperandVal)
1031 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1035 AccessTy.MemTy = Ty;
1038 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1039 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1040 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1041 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1042 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1043 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1045 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1047 switch (
II->getIntrinsicID()) {
1048 case Intrinsic::prefetch:
1049 case Intrinsic::memset:
1050 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1051 AccessTy.MemTy = OperandVal->
getType();
1053 case Intrinsic::memmove:
1054 case Intrinsic::memcpy:
1056 AccessTy.MemTy = OperandVal->
getType();
1058 case Intrinsic::masked_load:
1059 AccessTy.AddrSpace =
1060 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1062 case Intrinsic::masked_store:
1063 AccessTy.AddrSpace =
1064 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1124 if (!Processed.
insert(S).second)
1128 for (
const SCEV *S :
Add->operands()) {
1144 Value *UVal = U->getValue();
1148 if (UI && UI->
getOpcode() == Instruction::Mul &&
1182 const LSRUse &LU,
const Formula &
F);
1186 const LSRUse &LU,
const Formula &
F,
1193 const Loop *
L =
nullptr;
1203 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1221 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1222 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1223 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1224 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1230 return C.NumRegs == ~0
u;
1233 void RateFormula(
const Formula &
F,
1243 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1245 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1258 Value *OperandValToReplace =
nullptr;
1268 Immediate
Offset = Immediate::getZero();
1270 LSRFixup() =
default;
1272 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1280struct UniquifierDenseMapInfo {
1283 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1289 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1325 MemAccessTy AccessTy;
1331 Immediate MinOffset = Immediate::getFixedMax();
1332 Immediate MaxOffset = Immediate::getFixedMin();
1336 bool AllFixupsOutsideLoop =
true;
1343 bool RigidFormula =
false;
1349 Type *WidestFixupType =
nullptr;
1359 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1361 LSRFixup &getNewFixup() {
1362 Fixups.push_back(LSRFixup());
1366 void pushFixup(LSRFixup &f) {
1368 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1369 MaxOffset =
f.Offset;
1370 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1371 MinOffset =
f.Offset;
1374 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1375 float getNotSelectedProbability(
const SCEV *Reg)
const;
1376 bool InsertFormula(
const Formula &
F,
const Loop &L);
1377 void DeleteFormula(Formula &
F);
1378 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1387 LSRUse::KindType Kind, MemAccessTy AccessTy,
1389 bool HasBaseReg, int64_t Scale,
1393 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1397 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1399 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1401 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1403 [&](
unsigned i,
const SCEV *Reg) {
1404 return i + getSetupCost(Reg, Depth - 1);
1406 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1413void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1419 if (AR->getLoop() != L) {
1426 if (!AR->getLoop()->contains(L)) {
1436 unsigned LoopCost = 1;
1443 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1444 if (Step->getAPInt() ==
F.BaseOffset.getFixedValue())
1447 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1448 if (isa<SCEVConstant>(LoopStep)) {
1449 const SCEV *LoopStart = AR->getStart();
1450 if (!isa<SCEVConstant>(LoopStart) &&
1456 C.AddRecCost += LoopCost;
1460 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1461 if (!Regs.
count(AR->getOperand(1))) {
1462 RateRegister(
F, AR->getOperand(1), Regs);
1474 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1476 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1483void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1486 if (LoserRegs && LoserRegs->
count(Reg)) {
1490 if (Regs.
insert(Reg).second) {
1491 RateRegister(
F, Reg, Regs);
1492 if (LoserRegs && isLoser())
1497void Cost::RateFormula(
const Formula &
F,
1504 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1506 unsigned PrevAddRecCost =
C.AddRecCost;
1507 unsigned PrevNumRegs =
C.NumRegs;
1508 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1509 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1510 if (VisitedRegs.
count(ScaledReg)) {
1514 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1518 for (
const SCEV *BaseReg :
F.BaseRegs) {
1519 if (VisitedRegs.
count(BaseReg)) {
1523 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1529 size_t NumBaseParts =
F.getNumRegs();
1530 if (NumBaseParts > 1)
1535 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1541 for (
const LSRFixup &
Fixup : LU.Fixups) {
1542 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1543 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1547 else if (
Offset.isNonZero())
1553 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1574 if (
C.NumRegs > TTIRegNum) {
1577 if (PrevNumRegs > TTIRegNum)
1578 C.Insns += (
C.NumRegs - PrevNumRegs);
1580 C.Insns += (
C.NumRegs - TTIRegNum);
1593 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1597 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1600 if (LU.Kind != LSRUse::ICmpZero)
1601 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1607 C.Insns = std::numeric_limits<unsigned>::max();
1608 C.NumRegs = std::numeric_limits<unsigned>::max();
1609 C.AddRecCost = std::numeric_limits<unsigned>::max();
1610 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1611 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1612 C.ImmCost = std::numeric_limits<unsigned>::max();
1613 C.SetupCost = std::numeric_limits<unsigned>::max();
1614 C.ScaleCost = std::numeric_limits<unsigned>::max();
1618bool Cost::isLess(
const Cost &
Other)
const {
1620 C.Insns !=
Other.C.Insns)
1621 return C.Insns <
Other.C.Insns;
1625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1628 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1629 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1630 if (
C.AddRecCost != 0)
1631 OS <<
", with addrec cost " <<
C.AddRecCost;
1632 if (
C.NumIVMuls != 0)
1633 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1634 << (
C.NumIVMuls == 1 ?
"" :
"s");
1635 if (
C.NumBaseAdds != 0)
1636 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1637 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1638 if (
C.ScaleCost != 0)
1639 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1641 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1642 if (
C.SetupCost != 0)
1643 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1652bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1654 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1655 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1656 if (PN->getIncomingValue(i) == OperandValToReplace &&
1657 L->contains(PN->getIncomingBlock(i)))
1662 return !
L->contains(UserInst);
1665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1669 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1671 Store->getOperand(0)->printAsOperand(
OS,
false);
1672 }
else if (UserInst->getType()->isVoidTy())
1673 OS << UserInst->getOpcodeName();
1675 UserInst->printAsOperand(
OS,
false);
1677 OS <<
", OperandValToReplace=";
1678 OperandValToReplace->printAsOperand(
OS,
false);
1680 for (
const Loop *PIL : PostIncLoops) {
1681 OS <<
", PostIncLoop=";
1682 PIL->getHeader()->printAsOperand(
OS,
false);
1696bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1698 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1701 return Uniquifier.
count(Key);
1705float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1707 for (
const Formula &
F : Formulae)
1708 if (
F.referencesReg(Reg))
1710 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1715bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1716 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1718 if (!Formulae.empty() && RigidFormula)
1722 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1726 if (!Uniquifier.
insert(Key).second)
1730 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1731 "Zero allocated in a scaled register!");
1733 for (
const SCEV *BaseReg :
F.BaseRegs)
1734 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1738 Formulae.push_back(
F);
1741 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1749void LSRUse::DeleteFormula(Formula &
F) {
1750 if (&
F != &Formulae.back())
1752 Formulae.pop_back();
1756void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1760 for (
const Formula &
F : Formulae) {
1761 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1762 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1766 for (
const SCEV *S : OldRegs)
1768 RegUses.dropRegister(S, LUIdx);
1771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1773 OS <<
"LSR Use: Kind=";
1775 case Basic:
OS <<
"Basic";
break;
1776 case Special:
OS <<
"Special";
break;
1777 case ICmpZero:
OS <<
"ICmpZero";
break;
1779 OS <<
"Address of ";
1780 if (AccessTy.MemTy->isPointerTy())
1783 OS << *AccessTy.MemTy;
1786 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1789 OS <<
", Offsets={";
1790 bool NeedComma =
false;
1791 for (
const LSRFixup &
Fixup : Fixups) {
1792 if (NeedComma)
OS <<
',';
1798 if (AllFixupsOutsideLoop)
1799 OS <<
", all-fixups-outside-loop";
1801 if (WidestFixupType)
1802 OS <<
", widest fixup type: " << *WidestFixupType;
1811 LSRUse::KindType Kind, MemAccessTy AccessTy,
1813 bool HasBaseReg, int64_t Scale,
1816 case LSRUse::Address: {
1817 int64_t FixedOffset =
1818 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1819 int64_t ScalableOffset =
1820 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1822 HasBaseReg, Scale, AccessTy.AddrSpace,
1823 Fixup, ScalableOffset);
1825 case LSRUse::ICmpZero:
1832 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1837 if (Scale != 0 && Scale != -1)
1842 if (BaseOffset.isNonZero()) {
1845 if (BaseOffset.isScalable())
1855 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1864 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1866 case LSRUse::Special:
1868 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1875 Immediate MinOffset, Immediate MaxOffset,
1876 LSRUse::KindType Kind, MemAccessTy AccessTy,
1878 bool HasBaseReg, int64_t Scale) {
1879 if (BaseOffset.isNonZero() &&
1880 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1881 BaseOffset.isScalable() != MaxOffset.isScalable()))
1884 int64_t
Base = BaseOffset.getKnownMinValue();
1885 int64_t Min = MinOffset.getKnownMinValue();
1886 int64_t Max = MaxOffset.getKnownMinValue();
1889 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1892 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1895 HasBaseReg, Scale) &&
1901 Immediate MinOffset, Immediate MaxOffset,
1902 LSRUse::KindType Kind, MemAccessTy AccessTy,
1903 const Formula &
F,
const Loop &L) {
1911 assert((
F.isCanonical(L) ||
F.Scale != 0));
1913 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1918 Immediate MaxOffset, LSRUse::KindType Kind,
1920 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1923 BaseOffset, HasBaseReg, Scale) ||
1928 BaseGV, BaseOffset,
true, 0));
1932 Immediate MaxOffset, LSRUse::KindType Kind,
1933 MemAccessTy AccessTy,
const Formula &
F) {
1934 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1935 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1947 const LSRUse &LU,
const Formula &
F) {
1950 for (
const LSRFixup &
Fixup : LU.Fixups)
1952 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1953 F.Scale,
Fixup.UserInst))
1959 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1964 const LSRUse &LU,
const Formula &
F,
1973 return F.Scale != 1;
1976 case LSRUse::Address: {
1978 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1979 if (
F.BaseOffset.isScalable()) {
1980 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1981 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1983 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1984 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1988 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1991 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1994 "Legal addressing mode has an illegal cost!");
1995 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1997 case LSRUse::ICmpZero:
1999 case LSRUse::Special:
2009 LSRUse::KindType Kind, MemAccessTy AccessTy,
2013 if (BaseOffset.isZero() && !BaseGV)
2018 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2022 if (!HasBaseReg && Scale == 1) {
2032 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2042 Immediate MaxOffset, LSRUse::KindType Kind,
2043 MemAccessTy AccessTy,
const SCEV *S,
2046 if (S->
isZero())
return true;
2054 if (!S->
isZero())
return false;
2057 if (BaseOffset.isZero() && !BaseGV)
2060 if (BaseOffset.isScalable())
2065 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2068 BaseOffset, HasBaseReg, Scale);
2085 const SCEV *IncExpr;
2088 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2095 const SCEV *ExprBase =
nullptr;
2097 IVChain() =
default;
2098 IVChain(
const IVInc &Head,
const SCEV *
Base)
2099 : Incs(1, Head), ExprBase(
Base) {}
2106 return std::next(Incs.
begin());
2113 bool hasIncs()
const {
return Incs.
size() >= 2; }
2122 bool isProfitableIncrement(
const SCEV *OperExpr,
2123 const SCEV *IncExpr,
2148 bool Changed =
false;
2175 RegUseTracker RegUses;
2180 static const unsigned MaxChains = 8;
2197 void OptimizeShadowIV();
2200 void OptimizeLoopTermCond();
2204 void FinalizeChain(IVChain &Chain);
2205 void CollectChains();
2206 void GenerateIVChain(
const IVChain &Chain,
2209 void CollectInterestingTypesAndFactors();
2210 void CollectFixupsAndInitialFormulae();
2216 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2217 LSRUse::KindType Kind, MemAccessTy AccessTy);
2219 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2220 MemAccessTy AccessTy);
2222 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2224 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2226 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2227 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2228 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2229 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2231 void CollectLoopInvariantFixupsAndFormulae();
2233 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2234 unsigned Depth = 0);
2236 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2238 size_t Idx,
bool IsScaledReg =
false);
2239 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2240 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2241 const Formula &
Base,
size_t Idx,
2242 bool IsScaledReg =
false);
2243 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2244 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2245 const Formula &
Base,
2247 size_t Idx,
bool IsScaledReg =
false);
2248 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2250 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2251 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2252 void GenerateCrossUseConstantOffsets();
2253 void GenerateAllReuseFormulae();
2255 void FilterOutUndesirableDedicatedRegisters();
2257 size_t EstimateSearchSpaceComplexity()
const;
2258 void NarrowSearchSpaceByDetectingSupersets();
2259 void NarrowSearchSpaceByCollapsingUnrolledCode();
2260 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2261 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2262 void NarrowSearchSpaceByFilterPostInc();
2263 void NarrowSearchSpaceByDeletingCostlyFormulas();
2264 void NarrowSearchSpaceByPickingWinnerRegs();
2265 void NarrowSearchSpaceUsingHeuristics();
2270 const Cost &CurCost,
2280 const LSRUse &LU)
const;
2282 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2285 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2288 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2297 bool getChanged()
const {
return Changed; }
2299 return ScalarEvolutionIVs;
2313void LSRInstance::OptimizeShadowIV() {
2315 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2323 Type *DestTy =
nullptr;
2324 bool IsSigned =
false;
2338 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2340 DestTy = UCast->getDestTy();
2342 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2344 DestTy = SCast->getDestTy();
2346 if (!DestTy)
continue;
2366 if (Mantissa == -1)
continue;
2370 unsigned Entry, Latch;
2380 if (!
Init)
continue;
2381 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2382 (
double)
Init->getSExtValue() :
2383 (
double)
Init->getZExtValue());
2387 if (!Incr)
continue;
2388 if (Incr->
getOpcode() != Instruction::Add
2389 && Incr->
getOpcode() != Instruction::Sub)
2405 if (!
C->getValue().isStrictlyPositive())
2413 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2415 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2416 : Instruction::FSub,
2435 if (
U.getUser() ==
Cond) {
2503 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2508 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2509 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2516 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2517 Pred = ICmpInst::ICMP_SLE;
2519 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2520 Pred = ICmpInst::ICMP_SLT;
2522 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2523 Pred = ICmpInst::ICMP_ULT;
2532 if (
Max->getNumOperands() != 2)
2535 const SCEV *MaxLHS =
Max->getOperand(0);
2536 const SCEV *MaxRHS =
Max->getOperand(1);
2541 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2554 "Loop condition operand is an addrec in a different loop!");
2558 Value *NewRHS =
nullptr;
2559 if (ICmpInst::isTrueWhenEqual(Pred)) {
2562 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2563 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2564 NewRHS = BO->getOperand(0);
2566 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2567 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2568 NewRHS = BO->getOperand(0);
2575 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2576 NewRHS = SU->getValue();
2589 Cond->getOperand(0), NewRHS,
"scmp");
2593 Cond->replaceAllUsesWith(NewCond);
2596 Cond->eraseFromParent();
2598 if (
Cmp->use_empty())
2599 Cmp->eraseFromParent();
2605LSRInstance::OptimizeLoopTermCond() {
2622 L->getExitingBlocks(ExitingBlocks);
2630 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2636 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2646 if (!FindIVUserForCond(
Cond, CondUse))
2660 if (!DT.
dominates(ExitingBlock, LatchBlock))
2665 if (LatchBlock != ExitingBlock)
2669 if (&UI != CondUse &&
2673 const SCEV *
A = IU.getStride(*CondUse, L);
2674 const SCEV *
B = IU.getStride(UI, L);
2675 if (!
A || !
B)
continue;
2688 if (
C->isOne() ||
C->isMinusOne())
2689 goto decline_post_inc;
2691 if (
C->getValue().getSignificantBits() >= 64 ||
2692 C->getValue().isMinSignedValue())
2693 goto decline_post_inc;
2696 MemAccessTy AccessTy =
2698 int64_t Scale =
C->getSExtValue();
2702 AccessTy.AddrSpace))
2703 goto decline_post_inc;
2708 AccessTy.AddrSpace))
2709 goto decline_post_inc;
2714 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2720 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2721 if (
Cond->hasOneUse()) {
2722 Cond->moveBefore(TermBr);
2726 Cond = cast<ICmpInst>(
Cond->clone());
2727 Cond->setName(
L->getHeader()->getName() +
".termcond");
2749 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2756bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2757 bool HasBaseReg, LSRUse::KindType Kind,
2758 MemAccessTy AccessTy) {
2759 Immediate NewMinOffset = LU.MinOffset;
2760 Immediate NewMaxOffset = LU.MaxOffset;
2761 MemAccessTy NewAccessTy = AccessTy;
2766 if (LU.Kind != Kind)
2772 if (Kind == LSRUse::Address) {
2773 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2774 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2775 AccessTy.AddrSpace);
2780 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2782 LU.MaxOffset - NewOffset, HasBaseReg))
2784 NewMinOffset = NewOffset;
2785 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2787 NewOffset - LU.MinOffset, HasBaseReg))
2789 NewMaxOffset = NewOffset;
2795 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2796 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2800 LU.MinOffset = NewMinOffset;
2801 LU.MaxOffset = NewMaxOffset;
2802 LU.AccessTy = NewAccessTy;
2809std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2810 LSRUse::KindType Kind,
2811 MemAccessTy AccessTy) {
2819 Offset = Immediate::getFixed(0);
2822 std::pair<UseMapTy::iterator, bool>
P =
2826 size_t LUIdx =
P.first->second;
2827 LSRUse &LU =
Uses[LUIdx];
2828 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2830 return std::make_pair(LUIdx,
Offset);
2834 size_t LUIdx =
Uses.size();
2835 P.first->second = LUIdx;
2836 Uses.push_back(LSRUse(Kind, AccessTy));
2837 LSRUse &LU =
Uses[LUIdx];
2841 return std::make_pair(LUIdx,
Offset);
2845void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2846 if (&LU != &
Uses.back())
2851 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2857LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2858 const LSRUse &OrigLU) {
2860 for (LSRUse &LU :
Uses) {
2866 if (&LU != &OrigLU &&
2867 LU.Kind != LSRUse::ICmpZero &&
2868 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2869 LU.WidestFixupType == OrigLU.WidestFixupType &&
2870 LU.HasFormulaWithSameRegs(OrigF)) {
2872 for (
const Formula &
F : LU.Formulae) {
2875 if (
F.BaseRegs == OrigF.BaseRegs &&
2876 F.ScaledReg == OrigF.ScaledReg &&
2877 F.BaseGV == OrigF.BaseGV &&
2878 F.Scale == OrigF.Scale &&
2879 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2880 if (
F.BaseOffset.isZero())
2895void LSRInstance::CollectInterestingTypesAndFactors() {
2901 const SCEV *Expr = IU.getExpr(U);
2919 }
while (!Worklist.
empty());
2924 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2926 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2927 const SCEV *OldStride = *
I;
2928 const SCEV *NewStride = *NewStrideIter;
2939 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2941 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2942 Factors.insert(Factor->getAPInt().getSExtValue());
2947 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2948 Factors.insert(Factor->getAPInt().getSExtValue());
2954 if (
Types.size() == 1)
2966 for(; OI != OE; ++OI) {
2967 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2972 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2984 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2985 return Trunc->getOperand(0);
3007 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3009 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3011 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3018 if (SubExpr->getSCEVType() ==
scAddExpr)
3021 if (SubExpr->getSCEVType() !=
scMulExpr)
3027 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3037bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3038 const SCEV *IncExpr,
3046 if (!isa<SCEVConstant>(IncExpr)) {
3048 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3073 if (!Chain.hasIncs())
3076 if (!
Users.empty()) {
3077 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3079 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3082 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3090 if (isa<PHINode>(Chain.tailUserInst())
3091 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3094 const SCEV *LastIncExpr =
nullptr;
3095 unsigned NumConstIncrements = 0;
3096 unsigned NumVarIncrements = 0;
3097 unsigned NumReusedIncrements = 0;
3102 for (
const IVInc &Inc : Chain) {
3105 if (Inc.IncExpr->isZero())
3110 if (isa<SCEVConstant>(Inc.IncExpr)) {
3111 ++NumConstIncrements;
3115 if (Inc.IncExpr == LastIncExpr)
3116 ++NumReusedIncrements;
3120 LastIncExpr = Inc.IncExpr;
3125 if (NumConstIncrements > 1)
3132 cost += NumVarIncrements;
3136 cost -= NumReusedIncrements;
3138 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3155 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3156 const SCEV *LastIncExpr =
nullptr;
3157 for (; ChainIdx < NChains; ++ChainIdx) {
3158 IVChain &Chain = IVChainVec[ChainIdx];
3172 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3178 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3181 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3182 LastIncExpr = IncExpr;
3188 if (ChainIdx == NChains) {
3189 if (isa<PHINode>(UserInst))
3195 LastIncExpr = OperExpr;
3199 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3202 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3204 ChainUsersVec.
resize(NChains);
3205 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3206 <<
") IV=" << *LastIncExpr <<
"\n");
3208 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3209 <<
") IV+" << *LastIncExpr <<
"\n");
3211 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3213 IVChain &Chain = IVChainVec[ChainIdx];
3217 if (!LastIncExpr->
isZero()) {
3218 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3234 IVChain::const_iterator IncIter = Chain.Incs.begin();
3235 IVChain::const_iterator IncEnd = Chain.Incs.end();
3236 for( ; IncIter != IncEnd; ++IncIter) {
3237 if (IncIter->UserInst == OtherUse)
3240 if (IncIter != IncEnd)
3244 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3245 && IU.isIVUserOrOperand(OtherUse)) {
3248 NearUsers.
insert(OtherUse);
3253 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3278void LSRInstance::CollectChains() {
3285 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3294 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3304 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3305 ChainIdx < NChains; ++ChainIdx) {
3306 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3312 while (IVOpIter != IVOpEnd) {
3313 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3314 if (UniqueOperands.
insert(IVOpInst).second)
3315 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3316 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3321 for (
PHINode &PN :
L->getHeader()->phis()) {
3326 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3328 ChainInstruction(&PN, IncV, ChainUsersVec);
3331 unsigned ChainIdx = 0;
3332 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3333 UsersIdx < NChains; ++UsersIdx) {
3335 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3338 if (ChainIdx != UsersIdx)
3339 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3340 FinalizeChain(IVChainVec[ChainIdx]);
3343 IVChainVec.resize(ChainIdx);
3346void LSRInstance::FinalizeChain(IVChain &Chain) {
3347 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3348 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3350 for (
const IVInc &Inc : Chain) {
3352 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3353 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3354 IVIncSet.insert(UseI);
3361 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3362 Immediate IncOffset = Immediate::getZero();
3369 auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3370 if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3371 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3373 auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3374 if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3376 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3392void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3396 const IVInc &Head = Chain.Incs[0];
3401 Value *IVSrc =
nullptr;
3402 while (IVOpIter != IVOpEnd) {
3413 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3414 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3417 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3419 if (IVOpIter == IVOpEnd) {
3421 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3424 assert(IVSrc &&
"Failed to find IV chain source");
3429 const SCEV *LeftOverExpr =
nullptr;
3434 for (
const IVInc &Inc : Chain) {
3436 if (isa<PHINode>(InsertPt))
3437 InsertPt =
L->getLoopLatch()->getTerminator();
3441 Value *IVOper = IVSrc;
3442 if (!Inc.IncExpr->isZero()) {
3447 LeftOverExpr = LeftOverExpr ?
3448 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3452 bool FoundBase =
false;
3453 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3456 if (!Remainder->
isZero()) {
3458 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3459 const SCEV *IVOperExpr =
3461 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3470 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3473 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3476 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3480 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3483 LeftOverExpr =
nullptr;
3486 Type *OperTy = Inc.IVOperand->getType();
3487 if (IVTy != OperTy) {
3489 "cannot extend a chained IV");
3491 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3493 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3494 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3499 if (isa<PHINode>(Chain.tailUserInst())) {
3500 for (
PHINode &Phi :
L->getHeader()->phis()) {
3504 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3507 Value *IVOper = IVSrc;
3509 if (IVTy != PostIncTy) {
3511 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3512 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3513 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3515 Phi.replaceUsesOfWith(PostIncV, IVOper);
3521void LSRInstance::CollectFixupsAndInitialFormulae() {
3523 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3535 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3536 if (IVIncSet.count(UseI)) {
3537 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3541 LSRUse::KindType
Kind = LSRUse::Basic;
3542 MemAccessTy AccessTy;
3544 Kind = LSRUse::Address;
3548 const SCEV *S = IU.getExpr(U);
3559 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3562 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3564 if (CI->isEquality()) {
3567 Value *
NV = CI->getOperand(1);
3568 if (NV ==
U.getOperandValToReplace()) {
3569 CI->setOperand(1, CI->getOperand(0));
3570 CI->setOperand(0, NV);
3571 NV = CI->getOperand(1);
3578 (!
NV->getType()->isPointerTy() ||
3585 Kind = LSRUse::ICmpZero;
3587 }
else if (
L->isLoopInvariant(NV) &&
3588 (!isa<Instruction>(NV) ||
3589 DT.
dominates(cast<Instruction>(NV),
L->getHeader())) &&
3590 !
NV->getType()->isPointerTy()) {
3601 Kind = LSRUse::ICmpZero;
3603 assert(!isa<SCEVCouldNotCompute>(S));
3608 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3609 if (Factors[i] != -1)
3610 Factors.insert(-(
uint64_t)Factors[i]);
3616 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3617 size_t LUIdx =
P.first;
3619 LSRUse &LU =
Uses[LUIdx];
3622 LSRFixup &LF = LU.getNewFixup();
3623 LF.UserInst = UserInst;
3624 LF.OperandValToReplace =
U.getOperandValToReplace();
3625 LF.PostIncLoops = TmpPostIncLoops;
3627 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3630 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3632 F.initialMatch(S, L, SE);
3633 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3634 VisitedLSRUse.
insert(LUIdx);
3637 if (!LU.WidestFixupType ||
3640 LU.WidestFixupType = LF.OperandValToReplace->getType();
3643 if (LU.Formulae.empty()) {
3644 InsertInitialFormula(S, LU, LUIdx);
3645 CountRegisters(LU.Formulae.back(), LUIdx);
3654void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3658 LU.RigidFormula =
true;
3661 F.initialMatch(S, L, SE);
3662 bool Inserted = InsertFormula(LU, LUIdx,
F);
3663 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3669LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3670 LSRUse &LU,
size_t LUIdx) {
3672 F.BaseRegs.push_back(S);
3673 F.HasBaseReg =
true;
3674 bool Inserted = InsertFormula(LU, LUIdx,
F);
3675 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3679void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3681 RegUses.countRegister(
F.ScaledReg, LUIdx);
3682 for (
const SCEV *BaseReg :
F.BaseRegs)
3683 RegUses.countRegister(BaseReg, LUIdx);
3688bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3691 "Formula is illegal");
3693 if (!LU.InsertFormula(
F, *L))
3696 CountRegisters(
F, LUIdx);
3706LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3715 while (!Worklist.
empty()) {
3719 if (!Visited.
insert(S).second)
3726 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3729 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3730 const Value *
V = US->getValue();
3731 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3733 if (
L->contains(Inst))
continue;
3734 }
else if (isa<Constant>(V))
3737 for (
const Use &U :
V->uses()) {
3738 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3747 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3750 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3752 cast<PHINode>(UserInst)->getIncomingBlock(
3767 if (isa<PHINode>(UserInst)) {
3768 const auto *PhiNode = cast<PHINode>(UserInst);
3769 bool HasIncompatibleEHPTerminatedBlock =
false;
3771 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3772 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3773 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3774 HasIncompatibleEHPTerminatedBlock =
true;
3779 if (HasIncompatibleEHPTerminatedBlock) {
3785 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3792 if (!isa<SCEVUnknown>(UserS))
3801 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3802 unsigned OtherIdx = !
U.getOperandNo();
3803 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3808 std::pair<size_t, Immediate>
P =
3809 getUse(S, LSRUse::Basic, MemAccessTy());
3810 size_t LUIdx =
P.first;
3812 LSRUse &LU =
Uses[LUIdx];
3813 LSRFixup &LF = LU.getNewFixup();
3814 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3815 LF.OperandValToReplace =
U;
3817 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3818 if (!LU.WidestFixupType ||
3821 LU.WidestFixupType = LF.OperandValToReplace->getType();
3822 InsertSupplementalFormula(US, LU, LUIdx);
3823 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3839 unsigned Depth = 0) {
3846 for (
const SCEV *S :
Add->operands()) {
3852 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3861 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3863 Remainder =
nullptr;
3881 const SCEV *Remainder =
3894 LSRUse &LU,
const SCEV *S,
const Loop *L,
3896 if (LU.Kind != LSRUse::Address ||
3897 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3903 if (!isa<SCEVConstant>(LoopStep))
3909 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3916void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3917 const Formula &
Base,
3920 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3932 if (AddOps.
size() == 1)
3946 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3952 InnerAddOps.append(std::next(J),
3957 if (InnerAddOps.size() == 1 &&
3959 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3967 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3971 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3976 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3979 F.ScaledReg =
nullptr;
3982 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3983 }
else if (IsScaledReg)
3984 F.ScaledReg = InnerSum;
3986 F.BaseRegs[
Idx] = InnerSum;
3992 SC->getValue()->getZExtValue()))
3994 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3995 SC->getValue()->getZExtValue());
3997 F.BaseRegs.push_back(*J);
4002 if (InsertFormula(LU, LUIdx,
F))
4009 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4015void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4017 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4022 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4023 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4025 if (
Base.Scale == 1)
4026 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4032void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4035 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4036 (
Base.UnfoldedOffset.isNonZero()) <=
4044 Formula NewBase =
Base;
4045 NewBase.BaseRegs.clear();
4046 Type *CombinedIntegerType =
nullptr;
4047 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4050 if (!CombinedIntegerType)
4055 NewBase.BaseRegs.push_back(BaseReg);
4059 if (Ops.
size() == 0)
4064 auto GenerateFormula = [&](
const SCEV *Sum) {
4065 Formula
F = NewBase;
4073 F.BaseRegs.push_back(Sum);
4075 (void)InsertFormula(LU, LUIdx,
F);
4079 if (Ops.
size() > 1) {
4086 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4087 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4089 NewBase.UnfoldedOffset.getFixedValue(),
true));
4090 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4096void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4097 const Formula &
Base,
size_t Idx,
4101 if (
G->isZero() || !GV)
4105 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4110 F.BaseRegs[
Idx] =
G;
4111 (void)InsertFormula(LU, LUIdx,
F);
4115void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4118 if (
Base.BaseGV)
return;
4120 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4121 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4122 if (
Base.Scale == 1)
4123 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4128void LSRInstance::GenerateConstantOffsetsImpl(
4129 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4132 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4134 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4136 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4138 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4140 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4146 F.ScaledReg =
nullptr;
4148 F.deleteBaseReg(
F.BaseRegs[
Idx]);
4150 }
else if (IsScaledReg)
4153 F.BaseRegs[
Idx] = NewG;
4155 (void)InsertFormula(LU, LUIdx,
F);
4170 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
4172 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4173 const APInt &StepInt = StepRec->getAPInt();
4177 for (Immediate
Offset : Worklist) {
4179 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4186 for (Immediate
Offset : Worklist)
4190 if (
G->isZero() ||
Imm.isZero() ||
4191 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4194 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4195 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4200 F.BaseRegs[
Idx] =
G;
4205 (void)InsertFormula(LU, LUIdx,
F);
4209void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4215 if (LU.MaxOffset != LU.MinOffset)
4218 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4219 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4220 if (
Base.Scale == 1)
4221 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4227void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4229 if (LU.Kind != LSRUse::ICmpZero)
return;
4237 if (LU.MinOffset != LU.MaxOffset)
return;
4240 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4242 for (
const SCEV *BaseReg :
Base.BaseRegs)
4245 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4248 for (int64_t Factor : Factors) {
4253 if (
Base.BaseOffset.isMin() && Factor == -1)
4256 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4258 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4259 assert(Factor != 0 &&
"Zero factor not expected!");
4260 if (NewBaseOffset.getFixedValue() / Factor !=
4261 Base.BaseOffset.getFixedValue())
4269 Immediate
Offset = LU.MinOffset;
4270 if (
Offset.isMin() && Factor == -1)
4273 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4281 F.BaseOffset = NewBaseOffset;
4288 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4293 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4307 if (
F.UnfoldedOffset.isNonZero()) {
4308 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4310 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4311 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4312 Base.UnfoldedOffset.getFixedValue())
4316 IntTy,
F.UnfoldedOffset.getFixedValue()))
4321 (void)InsertFormula(LU, LUIdx,
F);
4328void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4335 if (
Base.Scale != 0 && !
Base.unscale())
4338 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4341 for (int64_t Factor : Factors) {
4342 Base.Scale = Factor;
4343 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4345 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4349 if (LU.Kind == LSRUse::Basic &&
4350 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4351 LU.AccessTy,
Base) &&
4352 LU.AllFixupsOutsideLoop)
4353 LU.Kind = LSRUse::Special;
4359 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4360 Base.BaseOffset.isZero() && !
Base.BaseGV)
4363 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4365 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4372 if (!Quotient->isZero()) {
4375 F.ScaledReg = Quotient;
4376 F.deleteBaseReg(
F.BaseRegs[i]);
4380 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4381 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4385 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4387 (void)InsertFormula(LU, LUIdx,
F);
4403 const SCEV *Result =
nullptr;
4404 for (
auto &L :
Loops) {
4408 if (!New || (Result && New != Result))
4413 assert(Result &&
"failed to create expression");
4418void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4420 if (
Base.BaseGV)
return;
4430 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4433 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4437 for (
auto &LF : LU.Fixups)
4438 Loops.push_back(LF.PostIncLoops);
4440 for (
Type *SrcTy : Types) {
4449 const SCEV *NewScaledReg =
4451 if (!NewScaledReg || NewScaledReg->
isZero())
4453 F.ScaledReg = NewScaledReg;
4455 bool HasZeroBaseReg =
false;
4456 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4457 const SCEV *NewBaseReg =
4459 if (!NewBaseReg || NewBaseReg->
isZero()) {
4460 HasZeroBaseReg =
true;
4463 BaseReg = NewBaseReg;
4470 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4474 (void)InsertFormula(LU, LUIdx,
F);
4487 const SCEV *OrigReg;
4490 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4500 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4501 <<
" , add offset " <<
Imm;
4511void LSRInstance::GenerateCrossUseConstantOffsets() {
4513 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4518 for (
const SCEV *
Use : RegUses) {
4521 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4524 Pair.first->second.insert(std::make_pair(Imm,
Use));
4525 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4534 for (
const SCEV *Reg : Sequence) {
4535 const ImmMapTy &Imms =
Map.find(Reg)->second;
4538 if (Imms.size() == 1)
4541 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4542 for (
const auto &Entry
4544 <<
' ' <<
Entry.first;
4548 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4550 const SCEV *OrigReg = J->second;
4552 Immediate JImm = J->first;
4553 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4555 if (!isa<SCEVConstant>(OrigReg) &&
4556 UsedByIndicesMap[Reg].
count() == 1) {
4564 Immediate
First = Imms.begin()->first;
4565 Immediate
Last = std::prev(Imms.end())->first;
4566 if (!
First.isCompatibleImmediate(
Last)) {
4573 bool Scalable =
First.isScalable() ||
Last.isScalable();
4574 int64_t FI =
First.getKnownMinValue();
4575 int64_t LI =
Last.getKnownMinValue();
4578 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4581 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4582 ImmMapTy::const_iterator OtherImms[] = {
4583 Imms.begin(), std::prev(Imms.end()),
4584 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4585 for (
const auto &M : OtherImms) {
4586 if (M == J || M == JE)
continue;
4587 if (!JImm.isCompatibleImmediate(
M->first))
4591 Immediate
Imm = JImm.subUnsigned(
M->first);
4592 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4594 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4602 UsedByIndicesMap.
clear();
4603 UniqueItems.
clear();
4606 for (
const WorkItem &WI : WorkItems) {
4607 size_t LUIdx = WI.LUIdx;
4608 LSRUse &LU =
Uses[LUIdx];
4609 Immediate
Imm = WI.Imm;
4610 const SCEV *OrigReg = WI.OrigReg;
4613 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4617 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4618 Formula
F = LU.Formulae[
L];
4625 if (
F.ScaledReg == OrigReg) {
4626 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4628 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4630 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4631 if (
F.referencesReg(S))
4634 NewF.BaseOffset =
Offset;
4635 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4638 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4643 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4647 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4649 if (
C->getValue()->isNegative() !=
4650 (NewF.BaseOffset.isLessThanZero()) &&
4652 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4657 NewF.canonicalize(*this->L);
4658 (void)InsertFormula(LU, LUIdx, NewF);
4661 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4662 const SCEV *BaseReg =
F.BaseRegs[
N];
4663 if (BaseReg != OrigReg)
4666 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4667 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4668 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4670 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4672 LU.Kind, LU.AccessTy, NewF)) {
4676 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4680 NewF.UnfoldedOffset = NewUnfoldedOffset;
4682 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4687 for (
const SCEV *NewReg : NewF.BaseRegs)
4688 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewReg)) {
4689 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4691 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4693 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4694 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4696 (
unsigned)llvm::countr_zero<uint64_t>(
4697 NewF.BaseOffset.getFixedValue()))
4702 NewF.canonicalize(*this->L);
4703 (void)InsertFormula(LU, LUIdx, NewF);
4714LSRInstance::GenerateAllReuseFormulae() {
4717 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4718 LSRUse &LU =
Uses[LUIdx];
4719 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4721 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4722 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4724 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4725 LSRUse &LU =
Uses[LUIdx];
4726 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4727 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4728 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4730 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4732 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4733 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4735 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4736 LSRUse &LU =
Uses[LUIdx];
4737 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4738 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4741 GenerateCrossUseConstantOffsets();
4744 "After generating reuse formulae:\n";
4745 print_uses(
dbgs()));
4750void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4755 bool ChangedFormulae =
false;
4760 using BestFormulaeTy =
4763 BestFormulaeTy BestFormulae;
4765 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4766 LSRUse &LU =
Uses[LUIdx];
4771 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4772 FIdx != NumForms; ++FIdx) {
4773 Formula &
F = LU.Formulae[FIdx];
4784 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4785 if (CostF.isLoser()) {
4797 for (
const SCEV *Reg :
F.BaseRegs) {
4798 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4802 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4803 Key.push_back(
F.ScaledReg);
4808 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4809 BestFormulae.insert(std::make_pair(Key, FIdx));
4813 Formula &Best = LU.Formulae[
P.first->second];
4815 Cost CostBest(L, SE,
TTI, AMK);
4817 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4818 if (CostF.isLess(CostBest))
4822 " in favor of formula ";
4823 Best.print(
dbgs());
dbgs() <<
'\n');
4826 ChangedFormulae =
true;
4828 LU.DeleteFormula(
F);
4836 LU.RecomputeRegs(LUIdx, RegUses);
4839 BestFormulae.clear();
4844 "After filtering out undesirable candidates:\n";
4852size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4854 for (
const LSRUse &LU :
Uses) {
4855 size_t FSize = LU.Formulae.size();
4870void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4874 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4875 "which use a superset of registers used by other "
4878 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4879 LSRUse &LU =
Uses[LUIdx];
4881 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4882 Formula &
F = LU.Formulae[i];
4883 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4889 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4895 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4896 (
uint64_t)
C->getValue()->getSExtValue());
4897 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4898 (
I -
F.BaseRegs.begin()));
4899 if (LU.HasFormulaWithSameRegs(NewF)) {
4902 LU.DeleteFormula(
F);
4908 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4909 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4913 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4914 (
I -
F.BaseRegs.begin()));
4915 if (LU.HasFormulaWithSameRegs(NewF)) {
4918 LU.DeleteFormula(
F);
4929 LU.RecomputeRegs(LUIdx, RegUses);