84#include "llvm/Config/llvm-config.h"
133#define DEBUG_TYPE "loop-reduce"
150 cl::desc(
"Enable LSR phi elimination"));
155 cl::desc(
"Add instruction count to a LSR cost model"));
160 cl::desc(
"Narrow LSR complex solution using"
161 " expectation of registers number"));
167 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
168 " with the same ScaledReg and Scale"));
172 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Don't prefer any addressing mode"),
178 "Prefer pre-indexed addressing mode"),
181 "Prefer post-indexed addressing mode")));
185 cl::init(std::numeric_limits<uint16_t>::max()),
186 cl::desc(
"LSR search space complexity limit"));
190 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
194 cl::desc(
"Attempt to replace primary IV with other IV."));
198 cl::desc(
"Attempt to drop solution if it is less profitable"));
202 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
206 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
209 "Number of terminating condition fold recognized and performed");
215 cl::desc(
"Stress test LSR IV chains"));
224 static const unsigned UnknownAddressSpace =
225 std::numeric_limits<unsigned>::max();
227 Type *MemTy =
nullptr;
228 unsigned AddrSpace = UnknownAddressSpace;
230 MemAccessTy() =
default;
231 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
234 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
240 unsigned AS = UnknownAddressSpace) {
261 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
262 : FixedOrScalableQuantity(MinVal, Scalable) {}
264 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
265 : FixedOrScalableQuantity(
V) {}
268 constexpr Immediate() =
delete;
270 static constexpr Immediate getFixed(ScalarTy MinVal) {
271 return {MinVal,
false};
273 static constexpr Immediate getScalable(ScalarTy MinVal) {
274 return {MinVal,
true};
276 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
277 return {MinVal, Scalable};
279 static constexpr Immediate getZero() {
return {0,
false}; }
280 static constexpr Immediate getFixedMin() {
281 return {std::numeric_limits<int64_t>::min(),
false};
283 static constexpr Immediate getFixedMax() {
284 return {std::numeric_limits<int64_t>::max(),
false};
286 static constexpr Immediate getScalableMin() {
287 return {std::numeric_limits<int64_t>::min(),
true};
289 static constexpr Immediate getScalableMax() {
290 return {std::numeric_limits<int64_t>::max(),
true};
293 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
295 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
297 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
298 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
301 constexpr bool isMin()
const {
302 return Quantity == std::numeric_limits<ScalarTy>::min();
305 constexpr bool isMax()
const {
306 return Quantity == std::numeric_limits<ScalarTy>::max();
310 constexpr Immediate addUnsigned(
const Immediate &RHS)
const {
311 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
313 return {
Value, Scalable ||
RHS.isScalable()};
316 constexpr Immediate subUnsigned(
const Immediate &RHS)
const {
317 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
319 return {
Value, Scalable ||
RHS.isScalable()};
323 constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const {
325 return {
Value, Scalable};
355struct KeyOrderTargetImmediate {
356 bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const {
357 if (
LHS.isScalable() && !
RHS.isScalable())
359 if (!
LHS.isScalable() &&
RHS.isScalable())
361 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
368struct KeyOrderSizeTAndImmediate {
369 bool operator()(
const std::pair<size_t, Immediate> &LHS,
370 const std::pair<size_t, Immediate> &RHS)
const {
371 size_t LSize =
LHS.first;
372 size_t RSize =
RHS.first;
374 return LSize < RSize;
375 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
380#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
382 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
396 RegUsesTy RegUsesMap;
400 void countRegister(
const SCEV *Reg,
size_t LUIdx);
401 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
402 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
404 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
422RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
423 std::pair<RegUsesTy::iterator, bool> Pair =
424 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
425 RegSortData &RSD = Pair.first->second;
428 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
429 RSD.UsedByIndices.set(LUIdx);
433RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
434 RegUsesTy::iterator It = RegUsesMap.find(Reg);
435 assert(It != RegUsesMap.end());
436 RegSortData &RSD = It->second;
437 assert(RSD.UsedByIndices.size() > LUIdx);
438 RSD.UsedByIndices.reset(LUIdx);
442RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
443 assert(LUIdx <= LastLUIdx);
447 for (
auto &Pair : RegUsesMap) {
449 if (LUIdx < UsedByIndices.
size())
450 UsedByIndices[LUIdx] =
451 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
452 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
457RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
458 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
459 if (
I == RegUsesMap.end())
463 if (i == -1)
return false;
464 if ((
size_t)i != LUIdx)
return true;
469 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
470 assert(
I != RegUsesMap.end() &&
"Unknown register!");
471 return I->second.UsedByIndices;
474void RegUseTracker::clear() {
488 Immediate BaseOffset = Immediate::getZero();
491 bool HasBaseReg =
false;
514 const SCEV *ScaledReg =
nullptr;
519 Immediate UnfoldedOffset = Immediate::getZero();
527 void canonicalize(
const Loop &L);
531 bool hasZeroEnd()
const;
533 size_t getNumRegs()
const;
536 void deleteBaseReg(
const SCEV *&S);
538 bool referencesReg(
const SCEV *S)
const;
539 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
540 const RegUseTracker &RegUses)
const;
561 for (
const SCEV *S :
Add->operands())
568 if (!AR->getStart()->isZero() && AR->isAffine()) {
571 AR->getStepRecurrence(SE),
587 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
589 for (
const SCEV *S : MyGood)
591 for (
const SCEV *S : MyBad)
610 BaseRegs.push_back(Sum);
616 BaseRegs.push_back(Sum);
624 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
631bool Formula::isCanonical(
const Loop &L)
const {
633 return BaseRegs.size() <= 1;
638 if (Scale == 1 && BaseRegs.empty())
658void Formula::canonicalize(
const Loop &L) {
662 if (BaseRegs.empty()) {
664 assert(ScaledReg &&
"Expected 1*reg => reg");
665 assert(Scale == 1 &&
"Expected 1*reg => reg");
666 BaseRegs.push_back(ScaledReg);
674 ScaledReg = BaseRegs.pop_back_val();
685 if (
I != BaseRegs.end())
695bool Formula::unscale() {
699 BaseRegs.push_back(ScaledReg);
704bool Formula::hasZeroEnd()
const {
705 if (UnfoldedOffset || BaseOffset)
707 if (BaseRegs.size() != 1 || ScaledReg)
714size_t Formula::getNumRegs()
const {
715 return !!ScaledReg + BaseRegs.size();
720Type *Formula::getType()
const {
721 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
722 ScaledReg ? ScaledReg->getType() :
723 BaseGV ? BaseGV->getType() :
728void Formula::deleteBaseReg(
const SCEV *&S) {
729 if (&S != &BaseRegs.back())
735bool Formula::referencesReg(
const SCEV *S)
const {
741bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
742 const RegUseTracker &RegUses)
const {
744 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
746 for (
const SCEV *BaseReg : BaseRegs)
747 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
752#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
757 BaseGV->printAsOperand(
OS,
false);
759 if (BaseOffset.isNonZero()) {
763 for (
const SCEV *BaseReg : BaseRegs) {
765 OS <<
"reg(" << *BaseReg <<
')';
767 if (HasBaseReg && BaseRegs.empty()) {
769 OS <<
"**error: HasBaseReg**";
770 }
else if (!HasBaseReg && !BaseRegs.empty()) {
772 OS <<
"**error: !HasBaseReg**";
776 OS << Scale <<
"*reg(";
783 if (UnfoldedOffset.isNonZero()) {
785 OS <<
"imm(" << UnfoldedOffset <<
')';
826 bool IgnoreSignificantBits =
false) {
837 if (
RA.isAllOnes()) {
851 const APInt &LA =
C->getAPInt();
860 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
862 IgnoreSignificantBits);
863 if (!Step)
return nullptr;
865 IgnoreSignificantBits);
866 if (!Start)
return nullptr;
879 for (
const SCEV *S :
Add->operands()) {
881 if (!
Op)
return nullptr;
897 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
912 IgnoreSignificantBits)) {
931 if (
C->getAPInt().getSignificantBits() <= 64) {
933 return Immediate::getFixed(
C->getValue()->getSExtValue());
938 if (Result.isNonZero())
941 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
944 if (Result.isNonZero())
949 }
else if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
951 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(M->getOperand(0)))
952 if (isa<SCEVVScale>(M->getOperand(1))) {
954 return Immediate::getScalable(
C->getValue()->getSExtValue());
958 return Immediate::getZero();
964 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
965 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
975 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
991 bool isAddress = isa<LoadInst>(Inst);
992 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
993 if (SI->getPointerOperand() == OperandVal)
998 switch (
II->getIntrinsicID()) {
999 case Intrinsic::memset:
1000 case Intrinsic::prefetch:
1001 case Intrinsic::masked_load:
1002 if (
II->getArgOperand(0) == OperandVal)
1005 case Intrinsic::masked_store:
1006 if (
II->getArgOperand(1) == OperandVal)
1009 case Intrinsic::memmove:
1010 case Intrinsic::memcpy:
1011 if (
II->getArgOperand(0) == OperandVal ||
1012 II->getArgOperand(1) == OperandVal)
1018 if (IntrInfo.
PtrVal == OperandVal)
1023 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1024 if (RMW->getPointerOperand() == OperandVal)
1027 if (CmpX->getPointerOperand() == OperandVal)
1036 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1040 AccessTy.MemTy = Ty;
1043 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1044 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1045 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1046 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1047 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1048 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1050 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1052 switch (
II->getIntrinsicID()) {
1053 case Intrinsic::prefetch:
1054 case Intrinsic::memset:
1055 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1056 AccessTy.MemTy = OperandVal->
getType();
1058 case Intrinsic::memmove:
1059 case Intrinsic::memcpy:
1061 AccessTy.MemTy = OperandVal->
getType();
1063 case Intrinsic::masked_load:
1064 AccessTy.AddrSpace =
1065 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1067 case Intrinsic::masked_store:
1068 AccessTy.AddrSpace =
1069 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1129 if (!Processed.
insert(S).second)
1133 for (
const SCEV *S :
Add->operands()) {
1149 Value *UVal = U->getValue();
1153 if (UI && UI->
getOpcode() == Instruction::Mul &&
1187 const LSRUse &LU,
const Formula &
F);
1191 const LSRUse &LU,
const Formula &
F,
1198 const Loop *
L =
nullptr;
1208 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1226 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1227 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1228 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1229 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1235 return C.NumRegs == ~0
u;
1238 void RateFormula(
const Formula &
F,
1248 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1250 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1263 Value *OperandValToReplace =
nullptr;
1273 Immediate
Offset = Immediate::getZero();
1275 LSRFixup() =
default;
1277 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1285struct UniquifierDenseMapInfo {
1288 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1294 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1330 MemAccessTy AccessTy;
1336 Immediate MinOffset = Immediate::getFixedMax();
1337 Immediate MaxOffset = Immediate::getFixedMin();
1341 bool AllFixupsOutsideLoop =
true;
1348 bool RigidFormula =
false;
1354 Type *WidestFixupType =
nullptr;
1364 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1366 LSRFixup &getNewFixup() {
1367 Fixups.push_back(LSRFixup());
1371 void pushFixup(LSRFixup &f) {
1373 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1374 MaxOffset =
f.Offset;
1375 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1376 MinOffset =
f.Offset;
1379 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1380 float getNotSelectedProbability(
const SCEV *Reg)
const;
1381 bool InsertFormula(
const Formula &
F,
const Loop &L);
1382 void DeleteFormula(Formula &
F);
1383 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1392 LSRUse::KindType Kind, MemAccessTy AccessTy,
1394 bool HasBaseReg, int64_t Scale,
1398 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1402 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1404 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1406 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1408 [&](
unsigned i,
const SCEV *Reg) {
1409 return i + getSetupCost(Reg, Depth - 1);
1411 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1418void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1424 if (AR->getLoop() != L) {
1431 if (!AR->getLoop()->contains(L)) {
1441 unsigned LoopCost = 1;
1448 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1449 if (Step->getAPInt() ==
F.BaseOffset.getFixedValue())
1452 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1453 if (isa<SCEVConstant>(LoopStep)) {
1454 const SCEV *LoopStart = AR->getStart();
1455 if (!isa<SCEVConstant>(LoopStart) &&
1461 C.AddRecCost += LoopCost;
1465 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1466 if (!Regs.
count(AR->getOperand(1))) {
1467 RateRegister(
F, AR->getOperand(1), Regs);
1479 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1481 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1488void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1491 if (LoserRegs && LoserRegs->
count(Reg)) {
1495 if (Regs.
insert(Reg).second) {
1496 RateRegister(
F, Reg, Regs);
1497 if (LoserRegs && isLoser())
1502void Cost::RateFormula(
const Formula &
F,
1509 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1511 unsigned PrevAddRecCost =
C.AddRecCost;
1512 unsigned PrevNumRegs =
C.NumRegs;
1513 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1514 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1515 if (VisitedRegs.
count(ScaledReg)) {
1519 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1523 for (
const SCEV *BaseReg :
F.BaseRegs) {
1524 if (VisitedRegs.
count(BaseReg)) {
1528 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1534 size_t NumBaseParts =
F.getNumRegs();
1535 if (NumBaseParts > 1)
1540 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1546 for (
const LSRFixup &
Fixup : LU.Fixups) {
1547 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1548 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1552 else if (
Offset.isNonZero())
1558 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1579 if (
C.NumRegs > TTIRegNum) {
1582 if (PrevNumRegs > TTIRegNum)
1583 C.Insns += (
C.NumRegs - PrevNumRegs);
1585 C.Insns += (
C.NumRegs - TTIRegNum);
1598 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1602 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1605 if (LU.Kind != LSRUse::ICmpZero)
1606 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1612 C.Insns = std::numeric_limits<unsigned>::max();
1613 C.NumRegs = std::numeric_limits<unsigned>::max();
1614 C.AddRecCost = std::numeric_limits<unsigned>::max();
1615 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1616 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1617 C.ImmCost = std::numeric_limits<unsigned>::max();
1618 C.SetupCost = std::numeric_limits<unsigned>::max();
1619 C.ScaleCost = std::numeric_limits<unsigned>::max();
1623bool Cost::isLess(
const Cost &
Other)
const {
1625 C.Insns !=
Other.C.Insns)
1626 return C.Insns <
Other.C.Insns;
1630#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1633 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1634 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1635 if (
C.AddRecCost != 0)
1636 OS <<
", with addrec cost " <<
C.AddRecCost;
1637 if (
C.NumIVMuls != 0)
1638 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1639 << (
C.NumIVMuls == 1 ?
"" :
"s");
1640 if (
C.NumBaseAdds != 0)
1641 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1642 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1643 if (
C.ScaleCost != 0)
1644 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1646 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1647 if (
C.SetupCost != 0)
1648 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1657bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1659 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1660 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1661 if (PN->getIncomingValue(i) == OperandValToReplace &&
1662 L->contains(PN->getIncomingBlock(i)))
1667 return !
L->contains(UserInst);
1670#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1674 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1676 Store->getOperand(0)->printAsOperand(
OS,
false);
1677 }
else if (UserInst->getType()->isVoidTy())
1678 OS << UserInst->getOpcodeName();
1680 UserInst->printAsOperand(
OS,
false);
1682 OS <<
", OperandValToReplace=";
1683 OperandValToReplace->printAsOperand(
OS,
false);
1685 for (
const Loop *PIL : PostIncLoops) {
1686 OS <<
", PostIncLoop=";
1687 PIL->getHeader()->printAsOperand(
OS,
false);
1701bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1703 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1706 return Uniquifier.
count(Key);
1710float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1712 for (
const Formula &
F : Formulae)
1713 if (
F.referencesReg(Reg))
1715 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1720bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1721 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1723 if (!Formulae.empty() && RigidFormula)
1727 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1731 if (!Uniquifier.
insert(Key).second)
1735 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1736 "Zero allocated in a scaled register!");
1738 for (
const SCEV *BaseReg :
F.BaseRegs)
1739 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1743 Formulae.push_back(
F);
1746 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1754void LSRUse::DeleteFormula(Formula &
F) {
1755 if (&
F != &Formulae.back())
1757 Formulae.pop_back();
1761void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1765 for (
const Formula &
F : Formulae) {
1766 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1767 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1771 for (
const SCEV *S : OldRegs)
1773 RegUses.dropRegister(S, LUIdx);
1776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1778 OS <<
"LSR Use: Kind=";
1780 case Basic:
OS <<
"Basic";
break;
1781 case Special:
OS <<
"Special";
break;
1782 case ICmpZero:
OS <<
"ICmpZero";
break;
1784 OS <<
"Address of ";
1785 if (AccessTy.MemTy->isPointerTy())
1788 OS << *AccessTy.MemTy;
1791 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1794 OS <<
", Offsets={";
1795 bool NeedComma =
false;
1796 for (
const LSRFixup &
Fixup : Fixups) {
1797 if (NeedComma)
OS <<
',';
1803 if (AllFixupsOutsideLoop)
1804 OS <<
", all-fixups-outside-loop";
1806 if (WidestFixupType)
1807 OS <<
", widest fixup type: " << *WidestFixupType;
1816 LSRUse::KindType Kind, MemAccessTy AccessTy,
1818 bool HasBaseReg, int64_t Scale,
1821 case LSRUse::Address: {
1822 int64_t FixedOffset =
1823 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1824 int64_t ScalableOffset =
1825 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1827 HasBaseReg, Scale, AccessTy.AddrSpace,
1828 Fixup, ScalableOffset);
1830 case LSRUse::ICmpZero:
1837 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1842 if (Scale != 0 && Scale != -1)
1847 if (BaseOffset.isNonZero()) {
1850 if (BaseOffset.isScalable())
1860 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1869 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1871 case LSRUse::Special:
1873 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1880 Immediate MinOffset, Immediate MaxOffset,
1881 LSRUse::KindType Kind, MemAccessTy AccessTy,
1883 bool HasBaseReg, int64_t Scale) {
1884 if (BaseOffset.isNonZero() &&
1885 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1886 BaseOffset.isScalable() != MaxOffset.isScalable()))
1889 int64_t
Base = BaseOffset.getKnownMinValue();
1890 int64_t Min = MinOffset.getKnownMinValue();
1891 int64_t Max = MaxOffset.getKnownMinValue();
1894 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1897 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1900 HasBaseReg, Scale) &&
1906 Immediate MinOffset, Immediate MaxOffset,
1907 LSRUse::KindType Kind, MemAccessTy AccessTy,
1908 const Formula &
F,
const Loop &L) {
1916 assert((
F.isCanonical(L) ||
F.Scale != 0));
1918 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1923 Immediate MaxOffset, LSRUse::KindType Kind,
1925 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1928 BaseOffset, HasBaseReg, Scale) ||
1933 BaseGV, BaseOffset,
true, 0));
1937 Immediate MaxOffset, LSRUse::KindType Kind,
1938 MemAccessTy AccessTy,
const Formula &
F) {
1939 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1940 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1952 const LSRUse &LU,
const Formula &
F) {
1955 for (
const LSRFixup &
Fixup : LU.Fixups)
1957 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1958 F.Scale,
Fixup.UserInst))
1964 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1969 const LSRUse &LU,
const Formula &
F,
1978 return F.Scale != 1;
1981 case LSRUse::Address: {
1983 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1984 if (
F.BaseOffset.isScalable()) {
1985 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1986 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1988 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1989 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1993 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1996 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1999 "Legal addressing mode has an illegal cost!");
2000 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
2002 case LSRUse::ICmpZero:
2004 case LSRUse::Special:
2014 LSRUse::KindType Kind, MemAccessTy AccessTy,
2018 if (BaseOffset.isZero() && !BaseGV)
2023 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2027 if (!HasBaseReg && Scale == 1) {
2037 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2047 Immediate MaxOffset, LSRUse::KindType Kind,
2048 MemAccessTy AccessTy,
const SCEV *S,
2051 if (S->
isZero())
return true;
2059 if (!S->
isZero())
return false;
2062 if (BaseOffset.isZero() && !BaseGV)
2065 if (BaseOffset.isScalable())
2070 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2073 BaseOffset, HasBaseReg, Scale);
2090 const SCEV *IncExpr;
2093 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2100 const SCEV *ExprBase =
nullptr;
2102 IVChain() =
default;
2103 IVChain(
const IVInc &Head,
const SCEV *
Base)
2104 : Incs(1, Head), ExprBase(
Base) {}
2111 return std::next(Incs.
begin());
2118 bool hasIncs()
const {
return Incs.
size() >= 2; }
2127 bool isProfitableIncrement(
const SCEV *OperExpr,
2128 const SCEV *IncExpr,
2153 bool Changed =
false;
2180 RegUseTracker RegUses;
2185 static const unsigned MaxChains = 8;
2196 void OptimizeShadowIV();
2199 void OptimizeLoopTermCond();
2203 void FinalizeChain(IVChain &Chain);
2204 void CollectChains();
2205 void GenerateIVChain(
const IVChain &Chain,
2208 void CollectInterestingTypesAndFactors();
2209 void CollectFixupsAndInitialFormulae();
2215 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2216 LSRUse::KindType Kind, MemAccessTy AccessTy);
2218 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2219 MemAccessTy AccessTy);
2221 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2223 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2225 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2226 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2227 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2228 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2230 void CollectLoopInvariantFixupsAndFormulae();
2232 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2233 unsigned Depth = 0);
2235 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2237 size_t Idx,
bool IsScaledReg =
false);
2238 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2239 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2240 const Formula &
Base,
size_t Idx,
2241 bool IsScaledReg =
false);
2242 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2243 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2244 const Formula &
Base,
2246 size_t Idx,
bool IsScaledReg =
false);
2247 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2248 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2250 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2251 void GenerateCrossUseConstantOffsets();
2252 void GenerateAllReuseFormulae();
2254 void FilterOutUndesirableDedicatedRegisters();
2256 size_t EstimateSearchSpaceComplexity()
const;
2257 void NarrowSearchSpaceByDetectingSupersets();
2258 void NarrowSearchSpaceByCollapsingUnrolledCode();
2259 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2260 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2261 void NarrowSearchSpaceByFilterPostInc();
2262 void NarrowSearchSpaceByDeletingCostlyFormulas();
2263 void NarrowSearchSpaceByPickingWinnerRegs();
2264 void NarrowSearchSpaceUsingHeuristics();
2269 const Cost &CurCost,
2279 const LSRUse &LU)
const;
2281 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2284 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2287 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2296 bool getChanged()
const {
return Changed; }
2298 return ScalarEvolutionIVs;
2312void LSRInstance::OptimizeShadowIV() {
2314 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2322 Type *DestTy =
nullptr;
2323 bool IsSigned =
false;
2337 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2339 DestTy = UCast->getDestTy();
2341 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2343 DestTy = SCast->getDestTy();
2345 if (!DestTy)
continue;
2365 if (Mantissa == -1)
continue;
2369 unsigned Entry, Latch;
2379 if (!
Init)
continue;
2380 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2381 (
double)
Init->getSExtValue() :
2382 (
double)
Init->getZExtValue());
2386 if (!Incr)
continue;
2387 if (Incr->
getOpcode() != Instruction::Add
2388 && Incr->
getOpcode() != Instruction::Sub)
2404 if (!
C->getValue().isStrictlyPositive())
2412 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2414 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2415 : Instruction::FSub,
2434 if (
U.getUser() ==
Cond) {
2502 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2507 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2508 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2515 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2516 Pred = ICmpInst::ICMP_SLE;
2518 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2519 Pred = ICmpInst::ICMP_SLT;
2521 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2522 Pred = ICmpInst::ICMP_ULT;
2531 if (
Max->getNumOperands() != 2)
2534 const SCEV *MaxLHS =
Max->getOperand(0);
2535 const SCEV *MaxRHS =
Max->getOperand(1);
2540 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2553 "Loop condition operand is an addrec in a different loop!");
2557 Value *NewRHS =
nullptr;
2558 if (ICmpInst::isTrueWhenEqual(Pred)) {
2561 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2562 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2563 NewRHS = BO->getOperand(0);
2565 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2566 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2567 NewRHS = BO->getOperand(0);
2574 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2575 NewRHS = SU->getValue();
2588 Cond->getOperand(0), NewRHS,
"scmp");
2592 Cond->replaceAllUsesWith(NewCond);
2595 Cond->eraseFromParent();
2597 if (
Cmp->use_empty())
2598 Cmp->eraseFromParent();
2604LSRInstance::OptimizeLoopTermCond() {
2621 L->getExitingBlocks(ExitingBlocks);
2629 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2635 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2645 if (!FindIVUserForCond(
Cond, CondUse))
2659 if (!DT.dominates(ExitingBlock, LatchBlock))
2664 if (LatchBlock != ExitingBlock)
2668 if (&*UI != CondUse &&
2669 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2672 const SCEV *
A = IU.getStride(*CondUse, L);
2673 const SCEV *
B = IU.getStride(*UI, L);
2674 if (!
A || !
B)
continue;
2687 if (
C->isOne() ||
C->isMinusOne())
2688 goto decline_post_inc;
2690 if (
C->getValue().getSignificantBits() >= 64 ||
2691 C->getValue().isMinSignedValue())
2692 goto decline_post_inc;
2696 TTI, UI->getUser(), UI->getOperandValToReplace());
2697 int64_t Scale =
C->getSExtValue();
2701 AccessTy.AddrSpace))
2702 goto decline_post_inc;
2707 AccessTy.AddrSpace))
2708 goto decline_post_inc;
2713 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2719 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2720 if (
Cond->hasOneUse()) {
2721 Cond->moveBefore(TermBr);
2725 Cond = cast<ICmpInst>(
Cond->clone());
2726 Cond->setName(
L->getHeader()->getName() +
".termcond");
2748 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2750 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2755bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2756 bool HasBaseReg, LSRUse::KindType Kind,
2757 MemAccessTy AccessTy) {
2758 Immediate NewMinOffset = LU.MinOffset;
2759 Immediate NewMaxOffset = LU.MaxOffset;
2760 MemAccessTy NewAccessTy = AccessTy;
2765 if (LU.Kind != Kind)
2771 if (Kind == LSRUse::Address) {
2772 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2773 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2774 AccessTy.AddrSpace);
2779 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2781 LU.MaxOffset - NewOffset, HasBaseReg))
2783 NewMinOffset = NewOffset;
2784 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2786 NewOffset - LU.MinOffset, HasBaseReg))
2788 NewMaxOffset = NewOffset;
2794 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2795 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2799 LU.MinOffset = NewMinOffset;
2800 LU.MaxOffset = NewMaxOffset;
2801 LU.AccessTy = NewAccessTy;
2808std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2809 LSRUse::KindType Kind,
2810 MemAccessTy AccessTy) {
2818 Offset = Immediate::getFixed(0);
2821 std::pair<UseMapTy::iterator, bool>
P =
2825 size_t LUIdx =
P.first->second;
2826 LSRUse &LU =
Uses[LUIdx];
2827 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2829 return std::make_pair(LUIdx,
Offset);
2833 size_t LUIdx =
Uses.size();
2834 P.first->second = LUIdx;
2835 Uses.push_back(LSRUse(Kind, AccessTy));
2836 LSRUse &LU =
Uses[LUIdx];
2840 return std::make_pair(LUIdx,
Offset);
2844void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2845 if (&LU != &
Uses.back())
2850 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2856LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2857 const LSRUse &OrigLU) {
2859 for (LSRUse &LU :
Uses) {
2865 if (&LU != &OrigLU &&
2866 LU.Kind != LSRUse::ICmpZero &&
2867 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2868 LU.WidestFixupType == OrigLU.WidestFixupType &&
2869 LU.HasFormulaWithSameRegs(OrigF)) {
2871 for (
const Formula &
F : LU.Formulae) {
2874 if (
F.BaseRegs == OrigF.BaseRegs &&
2875 F.ScaledReg == OrigF.ScaledReg &&
2876 F.BaseGV == OrigF.BaseGV &&
2877 F.Scale == OrigF.Scale &&
2878 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2879 if (
F.BaseOffset.isZero())
2894void LSRInstance::CollectInterestingTypesAndFactors() {
2900 const SCEV *Expr = IU.getExpr(U);
2918 }
while (!Worklist.
empty());
2923 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2925 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2926 const SCEV *OldStride = *
I;
2927 const SCEV *NewStride = *NewStrideIter;
2938 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2940 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2941 Factors.insert(Factor->getAPInt().getSExtValue());
2946 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2947 Factors.insert(Factor->getAPInt().getSExtValue());
2953 if (
Types.size() == 1)
2965 for(; OI != OE; ++OI) {
2966 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2971 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2983 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2984 return Trunc->getOperand(0);
3006 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3008 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3010 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3017 if (SubExpr->getSCEVType() ==
scAddExpr)
3020 if (SubExpr->getSCEVType() !=
scMulExpr)
3026 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3036bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3037 const SCEV *IncExpr,
3045 if (!isa<SCEVConstant>(IncExpr)) {
3047 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3072 if (!Chain.hasIncs())
3075 if (!
Users.empty()) {
3076 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3078 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3081 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3089 if (isa<PHINode>(Chain.tailUserInst())
3090 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3093 const SCEV *LastIncExpr =
nullptr;
3094 unsigned NumConstIncrements = 0;
3095 unsigned NumVarIncrements = 0;
3096 unsigned NumReusedIncrements = 0;
3101 for (
const IVInc &Inc : Chain) {
3104 if (Inc.IncExpr->isZero())
3109 if (isa<SCEVConstant>(Inc.IncExpr)) {
3110 ++NumConstIncrements;
3114 if (Inc.IncExpr == LastIncExpr)
3115 ++NumReusedIncrements;
3119 LastIncExpr = Inc.IncExpr;
3124 if (NumConstIncrements > 1)
3131 cost += NumVarIncrements;
3135 cost -= NumReusedIncrements;
3137 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3154 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3155 const SCEV *LastIncExpr =
nullptr;
3156 for (; ChainIdx < NChains; ++ChainIdx) {
3157 IVChain &Chain = IVChainVec[ChainIdx];
3171 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3177 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3180 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3181 LastIncExpr = IncExpr;
3187 if (ChainIdx == NChains) {
3188 if (isa<PHINode>(UserInst))
3194 LastIncExpr = OperExpr;
3198 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3201 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3203 ChainUsersVec.
resize(NChains);
3204 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3205 <<
") IV=" << *LastIncExpr <<
"\n");
3207 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3208 <<
") IV+" << *LastIncExpr <<
"\n");
3210 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3212 IVChain &Chain = IVChainVec[ChainIdx];
3216 if (!LastIncExpr->
isZero()) {
3217 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3233 IVChain::const_iterator IncIter = Chain.Incs.begin();
3234 IVChain::const_iterator IncEnd = Chain.Incs.end();
3235 for( ; IncIter != IncEnd; ++IncIter) {
3236 if (IncIter->UserInst == OtherUse)
3239 if (IncIter != IncEnd)
3243 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3244 && IU.isIVUserOrOperand(OtherUse)) {
3247 NearUsers.
insert(OtherUse);
3252 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3277void LSRInstance::CollectChains() {
3283 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3284 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3293 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3303 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3304 ChainIdx < NChains; ++ChainIdx) {
3305 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3311 while (IVOpIter != IVOpEnd) {
3312 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3313 if (UniqueOperands.
insert(IVOpInst).second)
3314 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3315 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3320 for (
PHINode &PN :
L->getHeader()->phis()) {
3325 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3327 ChainInstruction(&PN, IncV, ChainUsersVec);
3330 unsigned ChainIdx = 0;
3331 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3332 UsersIdx < NChains; ++UsersIdx) {
3334 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3337 if (ChainIdx != UsersIdx)
3338 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3339 FinalizeChain(IVChainVec[ChainIdx]);
3342 IVChainVec.resize(ChainIdx);
3345void LSRInstance::FinalizeChain(IVChain &Chain) {
3346 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3347 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3349 for (
const IVInc &Inc : Chain) {
3351 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3352 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3353 IVIncSet.insert(UseI);
3360 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3361 Immediate IncOffset = Immediate::getZero();
3368 auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3369 if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3370 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3372 auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3373 if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3375 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3391void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3395 const IVInc &Head = Chain.Incs[0];
3400 Value *IVSrc =
nullptr;
3401 while (IVOpIter != IVOpEnd) {
3412 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3413 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3416 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3418 if (IVOpIter == IVOpEnd) {
3420 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3423 assert(IVSrc &&
"Failed to find IV chain source");
3428 const SCEV *LeftOverExpr =
nullptr;
3433 for (
const IVInc &Inc : Chain) {
3435 if (isa<PHINode>(InsertPt))
3436 InsertPt =
L->getLoopLatch()->getTerminator();
3440 Value *IVOper = IVSrc;
3441 if (!Inc.IncExpr->isZero()) {
3446 LeftOverExpr = LeftOverExpr ?
3447 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3451 bool FoundBase =
false;
3452 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3455 if (!Remainder->
isZero()) {
3457 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3458 const SCEV *IVOperExpr =
3460 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3469 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3472 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3475 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3479 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3482 LeftOverExpr =
nullptr;
3485 Type *OperTy = Inc.IVOperand->getType();
3486 if (IVTy != OperTy) {
3488 "cannot extend a chained IV");
3490 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3492 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3493 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3498 if (isa<PHINode>(Chain.tailUserInst())) {
3499 for (
PHINode &Phi :
L->getHeader()->phis()) {
3503 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3506 Value *IVOper = IVSrc;
3508 if (IVTy != PostIncTy) {
3510 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3511 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3512 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3514 Phi.replaceUsesOfWith(PostIncV, IVOper);
3520void LSRInstance::CollectFixupsAndInitialFormulae() {
3522 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3534 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3535 if (IVIncSet.count(UseI)) {
3536 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3540 LSRUse::KindType
Kind = LSRUse::Basic;
3541 MemAccessTy AccessTy;
3543 Kind = LSRUse::Address;
3547 const SCEV *S = IU.getExpr(U);
3558 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3561 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3563 if (CI->isEquality()) {
3566 Value *
NV = CI->getOperand(1);
3567 if (NV ==
U.getOperandValToReplace()) {
3568 CI->setOperand(1, CI->getOperand(0));
3569 CI->setOperand(0, NV);
3570 NV = CI->getOperand(1);
3577 (!
NV->getType()->isPointerTy() ||
3584 Kind = LSRUse::ICmpZero;
3586 }
else if (
L->isLoopInvariant(NV) &&
3587 (!isa<Instruction>(NV) ||
3588 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3589 !
NV->getType()->isPointerTy()) {
3600 Kind = LSRUse::ICmpZero;
3602 assert(!isa<SCEVCouldNotCompute>(S));
3607 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3608 if (Factors[i] != -1)
3609 Factors.insert(-(
uint64_t)Factors[i]);
3615 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3616 size_t LUIdx =
P.first;
3618 LSRUse &LU =
Uses[LUIdx];
3621 LSRFixup &LF = LU.getNewFixup();
3622 LF.UserInst = UserInst;
3623 LF.OperandValToReplace =
U.getOperandValToReplace();
3624 LF.PostIncLoops = TmpPostIncLoops;
3626 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3629 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3631 F.initialMatch(S, L, SE);
3632 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3633 VisitedLSRUse.
insert(LUIdx);
3636 if (!LU.WidestFixupType ||
3639 LU.WidestFixupType = LF.OperandValToReplace->getType();
3642 if (LU.Formulae.empty()) {
3643 InsertInitialFormula(S, LU, LUIdx);
3644 CountRegisters(LU.Formulae.back(), LUIdx);
3653void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3657 LU.RigidFormula =
true;
3660 F.initialMatch(S, L, SE);
3661 bool Inserted = InsertFormula(LU, LUIdx,
F);
3662 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3668LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3669 LSRUse &LU,
size_t LUIdx) {
3671 F.BaseRegs.push_back(S);
3672 F.HasBaseReg =
true;
3673 bool Inserted = InsertFormula(LU, LUIdx,
F);
3674 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3678void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3680 RegUses.countRegister(
F.ScaledReg, LUIdx);
3681 for (
const SCEV *BaseReg :
F.BaseRegs)
3682 RegUses.countRegister(BaseReg, LUIdx);
3687bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3690 "Formula is illegal");
3692 if (!LU.InsertFormula(
F, *L))
3695 CountRegisters(
F, LUIdx);
3705LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3714 while (!Worklist.
empty()) {
3718 if (!Visited.
insert(S).second)
3725 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3728 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3729 const Value *
V = US->getValue();
3730 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3732 if (
L->contains(Inst))
continue;
3733 }
else if (isa<Constant>(V))
3736 for (
const Use &U :
V->uses()) {
3737 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3746 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3749 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3751 cast<PHINode>(UserInst)->getIncomingBlock(
3753 if (!DT.dominates(
L->getHeader(), UseBB))
3766 if (isa<PHINode>(UserInst)) {
3767 const auto *PhiNode = cast<PHINode>(UserInst);
3768 bool HasIncompatibleEHPTerminatedBlock =
false;
3770 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3771 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3772 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3773 HasIncompatibleEHPTerminatedBlock =
true;
3778 if (HasIncompatibleEHPTerminatedBlock) {
3784 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3791 if (!isa<SCEVUnknown>(UserS))
3800 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3801 unsigned OtherIdx = !
U.getOperandNo();
3802 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3807 std::pair<size_t, Immediate>
P =
3808 getUse(S, LSRUse::Basic, MemAccessTy());
3809 size_t LUIdx =
P.first;
3811 LSRUse &LU =
Uses[LUIdx];
3812 LSRFixup &LF = LU.getNewFixup();
3813 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3814 LF.OperandValToReplace =
U;
3816 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3817 if (!LU.WidestFixupType ||
3820 LU.WidestFixupType = LF.OperandValToReplace->getType();
3821 InsertSupplementalFormula(US, LU, LUIdx);
3822 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3838 unsigned Depth = 0) {
3845 for (
const SCEV *S :
Add->operands()) {
3851 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3860 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3862 Remainder =
nullptr;
3880 const SCEV *Remainder =
3893 LSRUse &LU,
const SCEV *S,
const Loop *L,
3895 if (LU.Kind != LSRUse::Address ||
3896 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3902 if (!isa<SCEVConstant>(LoopStep))
3908 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3915void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3916 const Formula &
Base,
3919 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3931 if (AddOps.
size() == 1)
3945 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3951 InnerAddOps.append(std::next(J),
3956 if (InnerAddOps.size() == 1 &&
3958 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3966 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3970 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3975 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3978 F.ScaledReg =
nullptr;
3980 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3981 }
else if (IsScaledReg)
3982 F.ScaledReg = InnerSum;
3984 F.BaseRegs[
Idx] = InnerSum;
3990 SC->getValue()->getZExtValue()))
3992 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3993 SC->getValue()->getZExtValue());
3995 F.BaseRegs.push_back(*J);
4000 if (InsertFormula(LU, LUIdx,
F))
4007 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4013void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4015 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4020 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4021 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4023 if (
Base.Scale == 1)
4024 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4030void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4033 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4034 (
Base.UnfoldedOffset.isNonZero()) <=
4042 Formula NewBase =
Base;
4043 NewBase.BaseRegs.clear();
4044 Type *CombinedIntegerType =
nullptr;
4045 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4048 if (!CombinedIntegerType)
4053 NewBase.BaseRegs.push_back(BaseReg);
4057 if (Ops.
size() == 0)
4062 auto GenerateFormula = [&](
const SCEV *Sum) {
4063 Formula
F = NewBase;
4071 F.BaseRegs.push_back(Sum);
4073 (void)InsertFormula(LU, LUIdx,
F);
4077 if (Ops.
size() > 1) {
4084 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4085 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4087 NewBase.UnfoldedOffset.getFixedValue(),
true));
4088 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4094void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4095 const Formula &
Base,
size_t Idx,
4099 if (
G->isZero() || !GV)
4103 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4108 F.BaseRegs[
Idx] =
G;
4109 (void)InsertFormula(LU, LUIdx,
F);
4113void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4116 if (
Base.BaseGV)
return;
4118 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4119 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4120 if (
Base.Scale == 1)
4121 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4126void LSRInstance::GenerateConstantOffsetsImpl(
4127 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4130 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4132 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4134 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4136 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4138 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4144 F.ScaledReg =
nullptr;
4146 F.deleteBaseReg(
F.BaseRegs[
Idx]);
4148 }
else if (IsScaledReg)
4151 F.BaseRegs[
Idx] = NewG;
4153 (void)InsertFormula(LU, LUIdx,
F);
4168 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
4170 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4171 const APInt &StepInt = StepRec->getAPInt();
4175 for (Immediate
Offset : Worklist) {
4177 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4184 for (Immediate
Offset : Worklist)
4188 if (
G->isZero() ||
Imm.isZero() ||
4189 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4192 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4193 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4198 F.BaseRegs[
Idx] =
G;
4203 (void)InsertFormula(LU, LUIdx,
F);
4207void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4213 if (LU.MaxOffset != LU.MinOffset)
4216 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4217 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4218 if (
Base.Scale == 1)
4219 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4225void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4227 if (LU.Kind != LSRUse::ICmpZero)
return;
4235 if (LU.MinOffset != LU.MaxOffset)
return;
4238 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4240 for (
const SCEV *BaseReg :
Base.BaseRegs)
4243 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4246 for (int64_t Factor : Factors) {
4251 if (
Base.BaseOffset.isMin() && Factor == -1)
4254 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4256 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4257 assert(Factor != 0 &&
"Zero factor not expected!");
4258 if (NewBaseOffset.getFixedValue() / Factor !=
4259 Base.BaseOffset.getFixedValue())
4267 Immediate
Offset = LU.MinOffset;
4268 if (
Offset.isMin() && Factor == -1)
4271 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4279 F.BaseOffset = NewBaseOffset;
4286 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4291 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4305 if (
F.UnfoldedOffset.isNonZero()) {
4306 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4308 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4309 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4310 Base.UnfoldedOffset.getFixedValue())
4314 IntTy,
F.UnfoldedOffset.getFixedValue()))
4319 (void)InsertFormula(LU, LUIdx,
F);
4326void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4333 if (
Base.Scale != 0 && !
Base.unscale())
4336 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4339 for (int64_t Factor : Factors) {
4340 Base.Scale = Factor;
4341 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4343 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4347 if (LU.Kind == LSRUse::Basic &&
4348 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4349 LU.AccessTy,
Base) &&
4350 LU.AllFixupsOutsideLoop)
4351 LU.Kind = LSRUse::Special;
4357 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4358 Base.BaseOffset.isZero() && !
Base.BaseGV)
4361 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4363 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4370 if (!Quotient->isZero()) {
4373 F.ScaledReg = Quotient;
4374 F.deleteBaseReg(
F.BaseRegs[i]);
4378 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4379 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4383 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4385 (void)InsertFormula(LU, LUIdx,
F);
4401 const SCEV *Result =
nullptr;
4402 for (
auto &L :
Loops) {
4406 if (!New || (Result && New != Result))
4411 assert(Result &&
"failed to create expression");
4416void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4418 if (
Base.BaseGV)
return;
4428 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4431 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4435 for (
auto &LF : LU.Fixups)
4436 Loops.push_back(LF.PostIncLoops);
4438 for (
Type *SrcTy : Types) {
4447 const SCEV *NewScaledReg =
4449 if (!NewScaledReg || NewScaledReg->
isZero())
4451 F.ScaledReg = NewScaledReg;
4453 bool HasZeroBaseReg =
false;
4454 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4455 const SCEV *NewBaseReg =
4457 if (!NewBaseReg || NewBaseReg->
isZero()) {
4458 HasZeroBaseReg =
true;
4461 BaseReg = NewBaseReg;
4468 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4472 (void)InsertFormula(LU, LUIdx,
F);
4485 const SCEV *OrigReg;
4488 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4496#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4498 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4499 <<
" , add offset " <<
Imm;
4509void LSRInstance::GenerateCrossUseConstantOffsets() {
4511 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4516 for (
const SCEV *
Use : RegUses) {
4519 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4522 Pair.first->second.insert(std::make_pair(Imm,
Use));
4523 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4532 for (
const SCEV *Reg : Sequence) {
4533 const ImmMapTy &Imms =
Map.find(Reg)->second;
4536 if (Imms.size() == 1)
4539 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4540 for (
const auto &Entry
4542 <<
' ' <<
Entry.first;
4546 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4548 const SCEV *OrigReg = J->second;
4550 Immediate JImm = J->first;
4551 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4553 if (!isa<SCEVConstant>(OrigReg) &&
4554 UsedByIndicesMap[Reg].
count() == 1) {
4562 Immediate
First = Imms.begin()->first;
4563 Immediate
Last = std::prev(Imms.end())->first;
4564 if (!
First.isCompatibleImmediate(
Last)) {
4571 bool Scalable =
First.isScalable() ||
Last.isScalable();
4572 int64_t FI =
First.getKnownMinValue();
4573 int64_t LI =
Last.getKnownMinValue();
4576 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4579 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4580 ImmMapTy::const_iterator OtherImms[] = {
4581 Imms.begin(), std::prev(Imms.end()),
4582 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4583 for (
const auto &M : OtherImms) {
4584 if (M == J || M == JE)
continue;
4585 if (!JImm.isCompatibleImmediate(
M->first))
4589 Immediate
Imm = JImm.subUnsigned(
M->first);
4590 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4592 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4600 UsedByIndicesMap.
clear();
4601 UniqueItems.
clear();
4604 for (
const WorkItem &WI : WorkItems) {
4605 size_t LUIdx = WI.LUIdx;
4606 LSRUse &LU =
Uses[LUIdx];
4607 Immediate
Imm = WI.Imm;
4608 const SCEV *OrigReg = WI.OrigReg;
4611 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4615 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4616 Formula
F = LU.Formulae[
L];
4623 if (
F.ScaledReg == OrigReg) {
4624 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4626 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4628 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4629 if (
F.referencesReg(S))
4632 NewF.BaseOffset =
Offset;
4633 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4636 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4641 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4645 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4647 if (
C->getValue()->isNegative() !=
4648 (NewF.BaseOffset.isLessThanZero()) &&
4650 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4655 NewF.canonicalize(*this->L);
4656 (void)InsertFormula(LU, LUIdx, NewF);
4659 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4660 const SCEV *BaseReg =
F.BaseRegs[
N];
4661 if (BaseReg != OrigReg)
4664 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4665 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4666 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4668 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4670 LU.Kind, LU.AccessTy, NewF)) {
4674 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4678 NewF.UnfoldedOffset = NewUnfoldedOffset;
4680 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4685 for (
const SCEV *NewReg : NewF.BaseRegs)
4686 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewReg)) {
4687 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4689 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4691 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4692 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4694 (
unsigned)llvm::countr_zero<uint64_t>(
4695 NewF.BaseOffset.getFixedValue()))
4700 NewF.canonicalize(*this->L);
4701 (void)InsertFormula(LU, LUIdx, NewF);
4712LSRInstance::GenerateAllReuseFormulae() {
4715 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4716 LSRUse &LU =
Uses[LUIdx];
4717 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4718 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4719 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4722 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4723 LSRUse &LU =
Uses[LUIdx];
4724 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4725 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4726 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4727 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4728 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4730 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4733 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4734 LSRUse &LU =
Uses[LUIdx];
4735 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4736 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4739 GenerateCrossUseConstantOffsets();
4742 "After generating reuse formulae:\n";
4743 print_uses(
dbgs()));
4748void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4753 bool ChangedFormulae =
false;
4758 using BestFormulaeTy =
4761 BestFormulaeTy BestFormulae;
4763 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4764 LSRUse &LU =
Uses[LUIdx];
4769 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4770 FIdx != NumForms; ++FIdx) {
4771 Formula &
F = LU.Formulae[FIdx];
4782 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4783 if (CostF.isLoser()) {
4795 for (
const SCEV *Reg :
F.BaseRegs) {
4796 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4800 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4801 Key.push_back(
F.ScaledReg);
4806 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4807 BestFormulae.insert(std::make_pair(Key, FIdx));
4811 Formula &Best = LU.Formulae[
P.first->second];
4813 Cost CostBest(L, SE,
TTI, AMK);
4815 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4816 if (CostF.isLess(CostBest))
4820 " in favor of formula ";
4821 Best.print(
dbgs());
dbgs() <<
'\n');
4824 ChangedFormulae =
true;
4826 LU.DeleteFormula(
F);
4834 LU.RecomputeRegs(LUIdx, RegUses);
4837 BestFormulae.clear();
4842 "After filtering out undesirable candidates:\n";
4850size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4852 for (
const LSRUse &LU :
Uses) {
4853 size_t FSize = LU.Formulae.size();
4868void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4872 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4873 "which use a superset of registers used by other "
4876 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4877 LSRUse &LU =
Uses[LUIdx];
4879 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4880 Formula &
F = LU.Formulae[i];
4881 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4887 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4893 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4894 (
uint64_t)
C->getValue()->getSExtValue());
4895 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4896 (
I -
F.BaseRegs.begin()));
4897 if (LU.HasFormulaWithSameRegs(NewF)) {
4900 LU.DeleteFormula(
F);
4906 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4907 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4911 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4912 (
I -
F.BaseRegs.begin()));
4913 if (LU.HasFormulaWithSameRegs(NewF)) {
4916 LU.DeleteFormula(
F);
4927 LU.RecomputeRegs(LUIdx, RegUses);