84#include "llvm/Config/llvm-config.h"
133#define DEBUG_TYPE "loop-reduce"
150 cl::desc(
"Enable LSR phi elimination"));
155 cl::desc(
"Add instruction count to a LSR cost model"));
160 cl::desc(
"Narrow LSR complex solution using"
161 " expectation of registers number"));
167 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
168 " with the same ScaledReg and Scale"));
172 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Don't prefer any addressing mode"),
178 "Prefer pre-indexed addressing mode"),
181 "Prefer post-indexed addressing mode")));
185 cl::init(std::numeric_limits<uint16_t>::max()),
186 cl::desc(
"LSR search space complexity limit"));
190 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
194 cl::desc(
"Attempt to drop solution if it is less profitable"));
198 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
202 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
208 cl::desc(
"Stress test LSR IV chains"));
217 static const unsigned UnknownAddressSpace =
218 std::numeric_limits<unsigned>::max();
220 Type *MemTy =
nullptr;
221 unsigned AddrSpace = UnknownAddressSpace;
223 MemAccessTy() =
default;
224 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
227 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
233 unsigned AS = UnknownAddressSpace) {
254 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
255 : FixedOrScalableQuantity(MinVal, Scalable) {}
257 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
258 : FixedOrScalableQuantity(
V) {}
261 constexpr Immediate() =
delete;
263 static constexpr Immediate getFixed(ScalarTy MinVal) {
264 return {MinVal,
false};
266 static constexpr Immediate getScalable(ScalarTy MinVal) {
267 return {MinVal,
true};
269 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
270 return {MinVal, Scalable};
272 static constexpr Immediate getZero() {
return {0,
false}; }
273 static constexpr Immediate getFixedMin() {
274 return {std::numeric_limits<int64_t>::min(),
false};
276 static constexpr Immediate getFixedMax() {
277 return {std::numeric_limits<int64_t>::max(),
false};
279 static constexpr Immediate getScalableMin() {
280 return {std::numeric_limits<int64_t>::min(),
true};
282 static constexpr Immediate getScalableMax() {
283 return {std::numeric_limits<int64_t>::max(),
true};
286 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
288 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
290 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
291 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
294 constexpr bool isMin()
const {
295 return Quantity == std::numeric_limits<ScalarTy>::min();
298 constexpr bool isMax()
const {
299 return Quantity == std::numeric_limits<ScalarTy>::max();
303 constexpr Immediate addUnsigned(
const Immediate &RHS)
const {
304 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
306 return {
Value, Scalable ||
RHS.isScalable()};
309 constexpr Immediate subUnsigned(
const Immediate &RHS)
const {
310 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
312 return {
Value, Scalable ||
RHS.isScalable()};
316 constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const {
318 return {
Value, Scalable};
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &LHS,
363 const std::pair<size_t, Immediate> &RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
415RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair =
417 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
418 RegSortData &RSD = Pair.first->second;
421 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
422 RSD.UsedByIndices.set(LUIdx);
426RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
427 RegUsesTy::iterator It = RegUsesMap.find(Reg);
428 assert(It != RegUsesMap.end());
429 RegSortData &RSD = It->second;
430 assert(RSD.UsedByIndices.size() > LUIdx);
431 RSD.UsedByIndices.reset(LUIdx);
435RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
436 assert(LUIdx <= LastLUIdx);
440 for (
auto &Pair : RegUsesMap) {
442 if (LUIdx < UsedByIndices.
size())
443 UsedByIndices[LUIdx] =
444 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
445 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
450RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
451 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
452 if (
I == RegUsesMap.end())
456 if (i == -1)
return false;
457 if ((
size_t)i != LUIdx)
return true;
462 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
463 assert(
I != RegUsesMap.end() &&
"Unknown register!");
464 return I->second.UsedByIndices;
467void RegUseTracker::clear() {
481 Immediate BaseOffset = Immediate::getZero();
484 bool HasBaseReg =
false;
507 const SCEV *ScaledReg =
nullptr;
512 Immediate UnfoldedOffset = Immediate::getZero();
520 void canonicalize(
const Loop &L);
524 bool hasZeroEnd()
const;
526 size_t getNumRegs()
const;
529 void deleteBaseReg(
const SCEV *&S);
531 bool referencesReg(
const SCEV *S)
const;
532 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
533 const RegUseTracker &RegUses)
const;
554 for (
const SCEV *S :
Add->operands())
561 if (!AR->getStart()->isZero() && AR->isAffine()) {
564 AR->getStepRecurrence(SE),
580 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
582 for (
const SCEV *S : MyGood)
584 for (
const SCEV *S : MyBad)
603 BaseRegs.push_back(Sum);
609 BaseRegs.push_back(Sum);
617 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
624bool Formula::isCanonical(
const Loop &L)
const {
626 return BaseRegs.size() <= 1;
631 if (Scale == 1 && BaseRegs.empty())
651void Formula::canonicalize(
const Loop &L) {
655 if (BaseRegs.empty()) {
657 assert(ScaledReg &&
"Expected 1*reg => reg");
658 assert(Scale == 1 &&
"Expected 1*reg => reg");
659 BaseRegs.push_back(ScaledReg);
667 ScaledReg = BaseRegs.pop_back_val();
678 if (
I != BaseRegs.end())
688bool Formula::unscale() {
692 BaseRegs.push_back(ScaledReg);
697bool Formula::hasZeroEnd()
const {
698 if (UnfoldedOffset || BaseOffset)
700 if (BaseRegs.size() != 1 || ScaledReg)
707size_t Formula::getNumRegs()
const {
708 return !!ScaledReg + BaseRegs.size();
713Type *Formula::getType()
const {
714 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
715 ScaledReg ? ScaledReg->getType() :
716 BaseGV ? BaseGV->getType() :
721void Formula::deleteBaseReg(
const SCEV *&S) {
722 if (&S != &BaseRegs.back())
728bool Formula::referencesReg(
const SCEV *S)
const {
734bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
735 const RegUseTracker &RegUses)
const {
737 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
739 for (
const SCEV *BaseReg : BaseRegs)
740 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
745#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
750 BaseGV->printAsOperand(
OS,
false);
752 if (BaseOffset.isNonZero()) {
756 for (
const SCEV *BaseReg : BaseRegs) {
758 OS <<
"reg(" << *BaseReg <<
')';
760 if (HasBaseReg && BaseRegs.empty()) {
762 OS <<
"**error: HasBaseReg**";
763 }
else if (!HasBaseReg && !BaseRegs.empty()) {
765 OS <<
"**error: !HasBaseReg**";
769 OS << Scale <<
"*reg(";
776 if (UnfoldedOffset.isNonZero()) {
778 OS <<
"imm(" << UnfoldedOffset <<
')';
819 bool IgnoreSignificantBits =
false) {
830 if (
RA.isAllOnes()) {
844 const APInt &LA =
C->getAPInt();
853 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
855 IgnoreSignificantBits);
856 if (!Step)
return nullptr;
858 IgnoreSignificantBits);
859 if (!Start)
return nullptr;
872 for (
const SCEV *S :
Add->operands()) {
874 if (!
Op)
return nullptr;
890 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
905 IgnoreSignificantBits)) {
924 if (
C->getAPInt().getSignificantBits() <= 64) {
926 return Immediate::getFixed(
C->getValue()->getSExtValue());
931 if (Result.isNonZero())
934 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
937 if (Result.isNonZero())
942 }
else if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
944 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(M->getOperand(0)))
945 if (isa<SCEVVScale>(M->getOperand(1))) {
947 return Immediate::getScalable(
C->getValue()->getSExtValue());
951 return Immediate::getZero();
957 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
958 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
968 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
984 bool isAddress = isa<LoadInst>(Inst);
985 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
986 if (SI->getPointerOperand() == OperandVal)
991 switch (
II->getIntrinsicID()) {
992 case Intrinsic::memset:
993 case Intrinsic::prefetch:
994 case Intrinsic::masked_load:
995 if (
II->getArgOperand(0) == OperandVal)
998 case Intrinsic::masked_store:
999 if (
II->getArgOperand(1) == OperandVal)
1002 case Intrinsic::memmove:
1003 case Intrinsic::memcpy:
1004 if (
II->getArgOperand(0) == OperandVal ||
1005 II->getArgOperand(1) == OperandVal)
1011 if (IntrInfo.
PtrVal == OperandVal)
1016 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1017 if (RMW->getPointerOperand() == OperandVal)
1020 if (CmpX->getPointerOperand() == OperandVal)
1029 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1033 AccessTy.MemTy = Ty;
1036 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1037 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1038 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1039 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1040 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1041 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1043 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1045 switch (
II->getIntrinsicID()) {
1046 case Intrinsic::prefetch:
1047 case Intrinsic::memset:
1048 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1049 AccessTy.MemTy = OperandVal->
getType();
1051 case Intrinsic::memmove:
1052 case Intrinsic::memcpy:
1054 AccessTy.MemTy = OperandVal->
getType();
1056 case Intrinsic::masked_load:
1057 AccessTy.AddrSpace =
1058 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1060 case Intrinsic::masked_store:
1061 AccessTy.AddrSpace =
1062 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1122 if (!Processed.
insert(S).second)
1126 for (
const SCEV *S :
Add->operands()) {
1142 Value *UVal = U->getValue();
1146 if (UI && UI->
getOpcode() == Instruction::Mul &&
1180 const LSRUse &LU,
const Formula &
F);
1184 const LSRUse &LU,
const Formula &
F,
1191 const Loop *
L =
nullptr;
1201 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1219 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1220 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1221 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1222 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1228 return C.NumRegs == ~0
u;
1231 void RateFormula(
const Formula &
F,
1241 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1243 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1256 Value *OperandValToReplace =
nullptr;
1266 Immediate
Offset = Immediate::getZero();
1268 LSRFixup() =
default;
1270 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1278struct UniquifierDenseMapInfo {
1281 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1287 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1323 MemAccessTy AccessTy;
1329 Immediate MinOffset = Immediate::getFixedMax();
1330 Immediate MaxOffset = Immediate::getFixedMin();
1334 bool AllFixupsOutsideLoop =
true;
1341 bool RigidFormula =
false;
1347 Type *WidestFixupType =
nullptr;
1357 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1359 LSRFixup &getNewFixup() {
1360 Fixups.push_back(LSRFixup());
1364 void pushFixup(LSRFixup &f) {
1366 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1367 MaxOffset =
f.Offset;
1368 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1369 MinOffset =
f.Offset;
1372 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1373 float getNotSelectedProbability(
const SCEV *Reg)
const;
1374 bool InsertFormula(
const Formula &
F,
const Loop &L);
1375 void DeleteFormula(Formula &
F);
1376 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1385 LSRUse::KindType Kind, MemAccessTy AccessTy,
1387 bool HasBaseReg, int64_t Scale,
1391 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1395 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1397 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1399 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1401 [&](
unsigned i,
const SCEV *Reg) {
1402 return i + getSetupCost(Reg, Depth - 1);
1404 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1411void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1417 if (AR->getLoop() != L) {
1424 if (!AR->getLoop()->contains(L)) {
1434 unsigned LoopCost = 1;
1441 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1442 if (Step->getAPInt() ==
F.BaseOffset.getFixedValue())
1445 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1446 if (isa<SCEVConstant>(LoopStep)) {
1447 const SCEV *LoopStart = AR->getStart();
1448 if (!isa<SCEVConstant>(LoopStart) &&
1454 C.AddRecCost += LoopCost;
1458 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1459 if (!Regs.
count(AR->getOperand(1))) {
1460 RateRegister(
F, AR->getOperand(1), Regs);
1472 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1474 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1481void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1484 if (LoserRegs && LoserRegs->
count(Reg)) {
1488 if (Regs.
insert(Reg).second) {
1489 RateRegister(
F, Reg, Regs);
1490 if (LoserRegs && isLoser())
1495void Cost::RateFormula(
const Formula &
F,
1502 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1504 unsigned PrevAddRecCost =
C.AddRecCost;
1505 unsigned PrevNumRegs =
C.NumRegs;
1506 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1507 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1508 if (VisitedRegs.
count(ScaledReg)) {
1512 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1516 for (
const SCEV *BaseReg :
F.BaseRegs) {
1517 if (VisitedRegs.
count(BaseReg)) {
1521 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1527 size_t NumBaseParts =
F.getNumRegs();
1528 if (NumBaseParts > 1)
1533 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1539 for (
const LSRFixup &
Fixup : LU.Fixups) {
1540 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1541 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1545 else if (
Offset.isNonZero())
1551 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1572 if (
C.NumRegs > TTIRegNum) {
1575 if (PrevNumRegs > TTIRegNum)
1576 C.Insns += (
C.NumRegs - PrevNumRegs);
1578 C.Insns += (
C.NumRegs - TTIRegNum);
1591 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1595 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1598 if (LU.Kind != LSRUse::ICmpZero)
1599 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1605 C.Insns = std::numeric_limits<unsigned>::max();
1606 C.NumRegs = std::numeric_limits<unsigned>::max();
1607 C.AddRecCost = std::numeric_limits<unsigned>::max();
1608 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1609 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1610 C.ImmCost = std::numeric_limits<unsigned>::max();
1611 C.SetupCost = std::numeric_limits<unsigned>::max();
1612 C.ScaleCost = std::numeric_limits<unsigned>::max();
1616bool Cost::isLess(
const Cost &
Other)
const {
1618 C.Insns !=
Other.C.Insns)
1619 return C.Insns <
Other.C.Insns;
1623#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1626 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1627 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1628 if (
C.AddRecCost != 0)
1629 OS <<
", with addrec cost " <<
C.AddRecCost;
1630 if (
C.NumIVMuls != 0)
1631 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1632 << (
C.NumIVMuls == 1 ?
"" :
"s");
1633 if (
C.NumBaseAdds != 0)
1634 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1635 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1636 if (
C.ScaleCost != 0)
1637 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1639 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1640 if (
C.SetupCost != 0)
1641 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1650bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1652 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1653 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1654 if (PN->getIncomingValue(i) == OperandValToReplace &&
1655 L->contains(PN->getIncomingBlock(i)))
1660 return !
L->contains(UserInst);
1663#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1667 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1669 Store->getOperand(0)->printAsOperand(
OS,
false);
1670 }
else if (UserInst->getType()->isVoidTy())
1671 OS << UserInst->getOpcodeName();
1673 UserInst->printAsOperand(
OS,
false);
1675 OS <<
", OperandValToReplace=";
1676 OperandValToReplace->printAsOperand(
OS,
false);
1678 for (
const Loop *PIL : PostIncLoops) {
1679 OS <<
", PostIncLoop=";
1680 PIL->getHeader()->printAsOperand(
OS,
false);
1694bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1696 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1699 return Uniquifier.
count(Key);
1703float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1705 for (
const Formula &
F : Formulae)
1706 if (
F.referencesReg(Reg))
1708 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1713bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1714 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1716 if (!Formulae.empty() && RigidFormula)
1720 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1724 if (!Uniquifier.
insert(Key).second)
1728 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1729 "Zero allocated in a scaled register!");
1731 for (
const SCEV *BaseReg :
F.BaseRegs)
1732 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1736 Formulae.push_back(
F);
1739 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1747void LSRUse::DeleteFormula(Formula &
F) {
1748 if (&
F != &Formulae.back())
1750 Formulae.pop_back();
1754void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1758 for (
const Formula &
F : Formulae) {
1759 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1760 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1764 for (
const SCEV *S : OldRegs)
1766 RegUses.dropRegister(S, LUIdx);
1769#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1771 OS <<
"LSR Use: Kind=";
1773 case Basic:
OS <<
"Basic";
break;
1774 case Special:
OS <<
"Special";
break;
1775 case ICmpZero:
OS <<
"ICmpZero";
break;
1777 OS <<
"Address of ";
1778 if (AccessTy.MemTy->isPointerTy())
1781 OS << *AccessTy.MemTy;
1784 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1787 OS <<
", Offsets={";
1788 bool NeedComma =
false;
1789 for (
const LSRFixup &
Fixup : Fixups) {
1790 if (NeedComma)
OS <<
',';
1796 if (AllFixupsOutsideLoop)
1797 OS <<
", all-fixups-outside-loop";
1799 if (WidestFixupType)
1800 OS <<
", widest fixup type: " << *WidestFixupType;
1809 LSRUse::KindType Kind, MemAccessTy AccessTy,
1811 bool HasBaseReg, int64_t Scale,
1814 case LSRUse::Address: {
1815 int64_t FixedOffset =
1816 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1817 int64_t ScalableOffset =
1818 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1820 HasBaseReg, Scale, AccessTy.AddrSpace,
1821 Fixup, ScalableOffset);
1823 case LSRUse::ICmpZero:
1830 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1835 if (Scale != 0 && Scale != -1)
1840 if (BaseOffset.isNonZero()) {
1843 if (BaseOffset.isScalable())
1853 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1862 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1864 case LSRUse::Special:
1866 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1873 Immediate MinOffset, Immediate MaxOffset,
1874 LSRUse::KindType Kind, MemAccessTy AccessTy,
1876 bool HasBaseReg, int64_t Scale) {
1877 if (BaseOffset.isNonZero() &&
1878 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1879 BaseOffset.isScalable() != MaxOffset.isScalable()))
1882 int64_t
Base = BaseOffset.getKnownMinValue();
1883 int64_t Min = MinOffset.getKnownMinValue();
1884 int64_t Max = MaxOffset.getKnownMinValue();
1887 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1890 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1893 HasBaseReg, Scale) &&
1899 Immediate MinOffset, Immediate MaxOffset,
1900 LSRUse::KindType Kind, MemAccessTy AccessTy,
1901 const Formula &
F,
const Loop &L) {
1909 assert((
F.isCanonical(L) ||
F.Scale != 0));
1911 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1916 Immediate MaxOffset, LSRUse::KindType Kind,
1918 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1921 BaseOffset, HasBaseReg, Scale) ||
1926 BaseGV, BaseOffset,
true, 0));
1930 Immediate MaxOffset, LSRUse::KindType Kind,
1931 MemAccessTy AccessTy,
const Formula &
F) {
1932 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1933 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1945 const LSRUse &LU,
const Formula &
F) {
1948 for (
const LSRFixup &
Fixup : LU.Fixups)
1950 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1951 F.Scale,
Fixup.UserInst))
1957 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1962 const LSRUse &LU,
const Formula &
F,
1971 return F.Scale != 1;
1974 case LSRUse::Address: {
1976 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1977 if (
F.BaseOffset.isScalable()) {
1978 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1979 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1981 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1982 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1986 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1989 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1992 "Legal addressing mode has an illegal cost!");
1993 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1995 case LSRUse::ICmpZero:
1997 case LSRUse::Special:
2007 LSRUse::KindType Kind, MemAccessTy AccessTy,
2011 if (BaseOffset.isZero() && !BaseGV)
2016 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2020 if (!HasBaseReg && Scale == 1) {
2030 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2040 Immediate MaxOffset, LSRUse::KindType Kind,
2041 MemAccessTy AccessTy,
const SCEV *S,
2044 if (S->
isZero())
return true;
2052 if (!S->
isZero())
return false;
2055 if (BaseOffset.isZero() && !BaseGV)
2058 if (BaseOffset.isScalable())
2063 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2066 BaseOffset, HasBaseReg, Scale);
2083 const SCEV *IncExpr;
2086 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2093 const SCEV *ExprBase =
nullptr;
2095 IVChain() =
default;
2096 IVChain(
const IVInc &Head,
const SCEV *
Base)
2097 : Incs(1, Head), ExprBase(
Base) {}
2104 return std::next(Incs.
begin());
2111 bool hasIncs()
const {
return Incs.
size() >= 2; }
2120 bool isProfitableIncrement(
const SCEV *OperExpr,
2121 const SCEV *IncExpr,
2146 bool Changed =
false;
2173 RegUseTracker RegUses;
2178 static const unsigned MaxChains = 8;
2189 void OptimizeShadowIV();
2192 void OptimizeLoopTermCond();
2196 void FinalizeChain(IVChain &Chain);
2197 void CollectChains();
2198 void GenerateIVChain(
const IVChain &Chain,
2201 void CollectInterestingTypesAndFactors();
2202 void CollectFixupsAndInitialFormulae();
2208 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2209 LSRUse::KindType Kind, MemAccessTy AccessTy);
2211 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2212 MemAccessTy AccessTy);
2214 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2216 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2218 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2219 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2220 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2221 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2223 void CollectLoopInvariantFixupsAndFormulae();
2225 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2226 unsigned Depth = 0);
2228 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2230 size_t Idx,
bool IsScaledReg =
false);
2231 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2232 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2233 const Formula &
Base,
size_t Idx,
2234 bool IsScaledReg =
false);
2235 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2236 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2237 const Formula &
Base,
2239 size_t Idx,
bool IsScaledReg =
false);
2240 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2241 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2242 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2243 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2244 void GenerateCrossUseConstantOffsets();
2245 void GenerateAllReuseFormulae();
2247 void FilterOutUndesirableDedicatedRegisters();
2249 size_t EstimateSearchSpaceComplexity()
const;
2250 void NarrowSearchSpaceByDetectingSupersets();
2251 void NarrowSearchSpaceByCollapsingUnrolledCode();
2252 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2253 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2254 void NarrowSearchSpaceByFilterPostInc();
2255 void NarrowSearchSpaceByDeletingCostlyFormulas();
2256 void NarrowSearchSpaceByPickingWinnerRegs();
2257 void NarrowSearchSpaceUsingHeuristics();
2262 const Cost &CurCost,
2272 const LSRUse &LU)
const;
2274 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2277 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2280 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2289 bool getChanged()
const {
return Changed; }
2291 return ScalarEvolutionIVs;
2305void LSRInstance::OptimizeShadowIV() {
2307 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2315 Type *DestTy =
nullptr;
2316 bool IsSigned =
false;
2330 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2332 DestTy = UCast->getDestTy();
2334 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2336 DestTy = SCast->getDestTy();
2338 if (!DestTy)
continue;
2358 if (Mantissa == -1)
continue;
2362 unsigned Entry, Latch;
2372 if (!
Init)
continue;
2373 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2374 (
double)
Init->getSExtValue() :
2375 (
double)
Init->getZExtValue());
2379 if (!Incr)
continue;
2380 if (Incr->
getOpcode() != Instruction::Add
2381 && Incr->
getOpcode() != Instruction::Sub)
2397 if (!
C->getValue().isStrictlyPositive())
2405 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2407 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2408 : Instruction::FSub,
2427 if (
U.getUser() ==
Cond) {
2495 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2500 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2501 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2508 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2509 Pred = ICmpInst::ICMP_SLE;
2511 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2512 Pred = ICmpInst::ICMP_SLT;
2514 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2515 Pred = ICmpInst::ICMP_ULT;
2524 if (
Max->getNumOperands() != 2)
2527 const SCEV *MaxLHS =
Max->getOperand(0);
2528 const SCEV *MaxRHS =
Max->getOperand(1);
2533 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2546 "Loop condition operand is an addrec in a different loop!");
2550 Value *NewRHS =
nullptr;
2551 if (ICmpInst::isTrueWhenEqual(Pred)) {
2554 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2555 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2556 NewRHS = BO->getOperand(0);
2558 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2559 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2560 NewRHS = BO->getOperand(0);
2567 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2568 NewRHS = SU->getValue();
2581 Cond->getOperand(0), NewRHS,
"scmp");
2585 Cond->replaceAllUsesWith(NewCond);
2588 Cond->eraseFromParent();
2590 if (
Cmp->use_empty())
2591 Cmp->eraseFromParent();
2597LSRInstance::OptimizeLoopTermCond() {
2614 L->getExitingBlocks(ExitingBlocks);
2622 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2628 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2638 if (!FindIVUserForCond(
Cond, CondUse))
2652 if (!DT.dominates(ExitingBlock, LatchBlock))
2657 if (LatchBlock != ExitingBlock)
2661 if (&*UI != CondUse &&
2662 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2665 const SCEV *
A = IU.getStride(*CondUse, L);
2666 const SCEV *
B = IU.getStride(*UI, L);
2667 if (!
A || !
B)
continue;
2680 if (
C->isOne() ||
C->isMinusOne())
2681 goto decline_post_inc;
2683 if (
C->getValue().getSignificantBits() >= 64 ||
2684 C->getValue().isMinSignedValue())
2685 goto decline_post_inc;
2689 TTI, UI->getUser(), UI->getOperandValToReplace());
2690 int64_t Scale =
C->getSExtValue();
2694 AccessTy.AddrSpace))
2695 goto decline_post_inc;
2700 AccessTy.AddrSpace))
2701 goto decline_post_inc;
2706 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2712 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2713 if (
Cond->hasOneUse()) {
2714 Cond->moveBefore(TermBr);
2718 Cond = cast<ICmpInst>(
Cond->clone());
2719 Cond->setName(
L->getHeader()->getName() +
".termcond");
2741 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2743 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2748bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2749 bool HasBaseReg, LSRUse::KindType Kind,
2750 MemAccessTy AccessTy) {
2751 Immediate NewMinOffset = LU.MinOffset;
2752 Immediate NewMaxOffset = LU.MaxOffset;
2753 MemAccessTy NewAccessTy = AccessTy;
2758 if (LU.Kind != Kind)
2764 if (Kind == LSRUse::Address) {
2765 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2766 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2767 AccessTy.AddrSpace);
2772 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2774 LU.MaxOffset - NewOffset, HasBaseReg))
2776 NewMinOffset = NewOffset;
2777 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2779 NewOffset - LU.MinOffset, HasBaseReg))
2781 NewMaxOffset = NewOffset;
2787 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2788 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2792 LU.MinOffset = NewMinOffset;
2793 LU.MaxOffset = NewMaxOffset;
2794 LU.AccessTy = NewAccessTy;
2801std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2802 LSRUse::KindType Kind,
2803 MemAccessTy AccessTy) {
2811 Offset = Immediate::getFixed(0);
2814 std::pair<UseMapTy::iterator, bool>
P =
2818 size_t LUIdx =
P.first->second;
2819 LSRUse &LU =
Uses[LUIdx];
2820 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2822 return std::make_pair(LUIdx,
Offset);
2826 size_t LUIdx =
Uses.size();
2827 P.first->second = LUIdx;
2828 Uses.push_back(LSRUse(Kind, AccessTy));
2829 LSRUse &LU =
Uses[LUIdx];
2833 return std::make_pair(LUIdx,
Offset);
2837void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2838 if (&LU != &
Uses.back())
2843 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2849LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2850 const LSRUse &OrigLU) {
2852 for (LSRUse &LU :
Uses) {
2858 if (&LU != &OrigLU &&
2859 LU.Kind != LSRUse::ICmpZero &&
2860 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2861 LU.WidestFixupType == OrigLU.WidestFixupType &&
2862 LU.HasFormulaWithSameRegs(OrigF)) {
2864 for (
const Formula &
F : LU.Formulae) {
2867 if (
F.BaseRegs == OrigF.BaseRegs &&
2868 F.ScaledReg == OrigF.ScaledReg &&
2869 F.BaseGV == OrigF.BaseGV &&
2870 F.Scale == OrigF.Scale &&
2871 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2872 if (
F.BaseOffset.isZero())
2887void LSRInstance::CollectInterestingTypesAndFactors() {
2893 const SCEV *Expr = IU.getExpr(U);
2911 }
while (!Worklist.
empty());
2916 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2918 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2919 const SCEV *OldStride = *
I;
2920 const SCEV *NewStride = *NewStrideIter;
2931 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2933 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2934 Factors.insert(Factor->getAPInt().getSExtValue());
2939 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2940 Factors.insert(Factor->getAPInt().getSExtValue());
2946 if (
Types.size() == 1)
2958 for(; OI != OE; ++OI) {
2959 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2964 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2976 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2977 return Trunc->getOperand(0);
2999 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3001 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3003 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3010 if (SubExpr->getSCEVType() ==
scAddExpr)
3013 if (SubExpr->getSCEVType() !=
scMulExpr)
3019 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3029bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3030 const SCEV *IncExpr,
3038 if (!isa<SCEVConstant>(IncExpr)) {
3040 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3065 if (!Chain.hasIncs())
3068 if (!
Users.empty()) {
3069 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3071 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3074 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3082 if (isa<PHINode>(Chain.tailUserInst())
3083 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3086 const SCEV *LastIncExpr =
nullptr;
3087 unsigned NumConstIncrements = 0;
3088 unsigned NumVarIncrements = 0;
3089 unsigned NumReusedIncrements = 0;
3094 for (
const IVInc &Inc : Chain) {
3097 if (Inc.IncExpr->isZero())
3102 if (isa<SCEVConstant>(Inc.IncExpr)) {
3103 ++NumConstIncrements;
3107 if (Inc.IncExpr == LastIncExpr)
3108 ++NumReusedIncrements;
3112 LastIncExpr = Inc.IncExpr;
3117 if (NumConstIncrements > 1)
3124 cost += NumVarIncrements;
3128 cost -= NumReusedIncrements;
3130 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3147 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3148 const SCEV *LastIncExpr =
nullptr;
3149 for (; ChainIdx < NChains; ++ChainIdx) {
3150 IVChain &Chain = IVChainVec[ChainIdx];
3164 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3170 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3173 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3174 LastIncExpr = IncExpr;
3180 if (ChainIdx == NChains) {
3181 if (isa<PHINode>(UserInst))
3187 LastIncExpr = OperExpr;
3191 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3194 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3196 ChainUsersVec.
resize(NChains);
3197 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3198 <<
") IV=" << *LastIncExpr <<
"\n");
3200 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3201 <<
") IV+" << *LastIncExpr <<
"\n");
3203 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3205 IVChain &Chain = IVChainVec[ChainIdx];
3209 if (!LastIncExpr->
isZero()) {
3210 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3226 IVChain::const_iterator IncIter = Chain.Incs.begin();
3227 IVChain::const_iterator IncEnd = Chain.Incs.end();
3228 for( ; IncIter != IncEnd; ++IncIter) {
3229 if (IncIter->UserInst == OtherUse)
3232 if (IncIter != IncEnd)
3236 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3237 && IU.isIVUserOrOperand(OtherUse)) {
3240 NearUsers.
insert(OtherUse);
3245 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3270void LSRInstance::CollectChains() {
3276 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3277 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3286 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3296 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3297 ChainIdx < NChains; ++ChainIdx) {
3298 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3304 while (IVOpIter != IVOpEnd) {
3305 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3306 if (UniqueOperands.
insert(IVOpInst).second)
3307 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3308 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3313 for (
PHINode &PN :
L->getHeader()->phis()) {
3318 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3320 ChainInstruction(&PN, IncV, ChainUsersVec);
3323 unsigned ChainIdx = 0;
3324 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3325 UsersIdx < NChains; ++UsersIdx) {
3327 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3330 if (ChainIdx != UsersIdx)
3331 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3332 FinalizeChain(IVChainVec[ChainIdx]);
3335 IVChainVec.resize(ChainIdx);
3338void LSRInstance::FinalizeChain(IVChain &Chain) {
3339 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3340 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3342 for (
const IVInc &Inc : Chain) {
3344 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3345 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3346 IVIncSet.insert(UseI);
3353 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3354 Immediate IncOffset = Immediate::getZero();
3361 auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3362 if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3363 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3365 auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3366 if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3368 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3384void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3388 const IVInc &Head = Chain.Incs[0];
3393 Value *IVSrc =
nullptr;
3394 while (IVOpIter != IVOpEnd) {
3405 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3406 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3409 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3411 if (IVOpIter == IVOpEnd) {
3413 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3416 assert(IVSrc &&
"Failed to find IV chain source");
3421 const SCEV *LeftOverExpr =
nullptr;
3426 for (
const IVInc &Inc : Chain) {
3428 if (isa<PHINode>(InsertPt))
3429 InsertPt =
L->getLoopLatch()->getTerminator();
3433 Value *IVOper = IVSrc;
3434 if (!Inc.IncExpr->isZero()) {
3439 LeftOverExpr = LeftOverExpr ?
3440 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3444 bool FoundBase =
false;
3445 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3448 if (!Remainder->
isZero()) {
3450 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3451 const SCEV *IVOperExpr =
3453 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3462 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3465 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3468 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3472 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3475 LeftOverExpr =
nullptr;
3478 Type *OperTy = Inc.IVOperand->getType();
3479 if (IVTy != OperTy) {
3481 "cannot extend a chained IV");
3483 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3485 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3486 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3491 if (isa<PHINode>(Chain.tailUserInst())) {
3492 for (
PHINode &Phi :
L->getHeader()->phis()) {
3496 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3499 Value *IVOper = IVSrc;
3501 if (IVTy != PostIncTy) {
3503 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3504 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3505 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3507 Phi.replaceUsesOfWith(PostIncV, IVOper);
3513void LSRInstance::CollectFixupsAndInitialFormulae() {
3515 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3527 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3528 if (IVIncSet.count(UseI)) {
3529 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3533 LSRUse::KindType
Kind = LSRUse::Basic;
3534 MemAccessTy AccessTy;
3536 Kind = LSRUse::Address;
3540 const SCEV *S = IU.getExpr(U);
3551 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3554 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3556 if (CI->isEquality()) {
3559 Value *
NV = CI->getOperand(1);
3560 if (NV ==
U.getOperandValToReplace()) {
3561 CI->setOperand(1, CI->getOperand(0));
3562 CI->setOperand(0, NV);
3563 NV = CI->getOperand(1);
3570 (!
NV->getType()->isPointerTy() ||
3577 Kind = LSRUse::ICmpZero;
3579 }
else if (
L->isLoopInvariant(NV) &&
3580 (!isa<Instruction>(NV) ||
3581 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3582 !
NV->getType()->isPointerTy()) {
3593 Kind = LSRUse::ICmpZero;
3595 assert(!isa<SCEVCouldNotCompute>(S));
3600 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3601 if (Factors[i] != -1)
3602 Factors.insert(-(
uint64_t)Factors[i]);
3608 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3609 size_t LUIdx =
P.first;
3611 LSRUse &LU =
Uses[LUIdx];
3614 LSRFixup &LF = LU.getNewFixup();
3615 LF.UserInst = UserInst;
3616 LF.OperandValToReplace =
U.getOperandValToReplace();
3617 LF.PostIncLoops = TmpPostIncLoops;
3619 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3622 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3624 F.initialMatch(S, L, SE);
3625 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3626 VisitedLSRUse.
insert(LUIdx);
3629 if (!LU.WidestFixupType ||
3632 LU.WidestFixupType = LF.OperandValToReplace->getType();
3635 if (LU.Formulae.empty()) {
3636 InsertInitialFormula(S, LU, LUIdx);
3637 CountRegisters(LU.Formulae.back(), LUIdx);
3646void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3650 LU.RigidFormula =
true;
3653 F.initialMatch(S, L, SE);
3654 bool Inserted = InsertFormula(LU, LUIdx,
F);
3655 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3661LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3662 LSRUse &LU,
size_t LUIdx) {
3664 F.BaseRegs.push_back(S);
3665 F.HasBaseReg =
true;
3666 bool Inserted = InsertFormula(LU, LUIdx,
F);
3667 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3671void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3673 RegUses.countRegister(
F.ScaledReg, LUIdx);
3674 for (
const SCEV *BaseReg :
F.BaseRegs)
3675 RegUses.countRegister(BaseReg, LUIdx);
3680bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3683 "Formula is illegal");
3685 if (!LU.InsertFormula(
F, *L))
3688 CountRegisters(
F, LUIdx);
3698LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3707 while (!Worklist.
empty()) {
3711 if (!Visited.
insert(S).second)
3718 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3721 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3722 const Value *
V = US->getValue();
3723 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3725 if (
L->contains(Inst))
continue;
3726 }
else if (isa<Constant>(V))
3729 for (
const Use &U :
V->uses()) {
3730 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3739 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3742 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3744 cast<PHINode>(UserInst)->getIncomingBlock(
3746 if (!DT.dominates(
L->getHeader(), UseBB))
3759 if (isa<PHINode>(UserInst)) {
3760 const auto *PhiNode = cast<PHINode>(UserInst);
3761 bool HasIncompatibleEHPTerminatedBlock =
false;
3763 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3764 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3765 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3766 HasIncompatibleEHPTerminatedBlock =
true;
3771 if (HasIncompatibleEHPTerminatedBlock) {
3777 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3784 if (!isa<SCEVUnknown>(UserS))
3793 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3794 unsigned OtherIdx = !
U.getOperandNo();
3795 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3800 std::pair<size_t, Immediate>
P =
3801 getUse(S, LSRUse::Basic, MemAccessTy());
3802 size_t LUIdx =
P.first;
3804 LSRUse &LU =
Uses[LUIdx];
3805 LSRFixup &LF = LU.getNewFixup();
3806 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3807 LF.OperandValToReplace =
U;
3809 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3810 if (!LU.WidestFixupType ||
3813 LU.WidestFixupType = LF.OperandValToReplace->getType();
3814 InsertSupplementalFormula(US, LU, LUIdx);
3815 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3831 unsigned Depth = 0) {
3838 for (
const SCEV *S :
Add->operands()) {
3844 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3853 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3855 Remainder =
nullptr;
3873 const SCEV *Remainder =
3886 LSRUse &LU,
const SCEV *S,
const Loop *L,
3888 if (LU.Kind != LSRUse::Address ||
3889 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3895 if (!isa<SCEVConstant>(LoopStep))
3901 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3908void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3909 const Formula &
Base,
3912 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3924 if (AddOps.
size() == 1)
3938 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3944 InnerAddOps.append(std::next(J),
3949 if (InnerAddOps.size() == 1 &&
3951 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3959 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3963 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3968 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3971 F.ScaledReg =
nullptr;
3973 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3974 }
else if (IsScaledReg)
3975 F.ScaledReg = InnerSum;
3977 F.BaseRegs[
Idx] = InnerSum;
3983 SC->getValue()->getZExtValue()))
3985 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3986 SC->getValue()->getZExtValue());
3988 F.BaseRegs.push_back(*J);
3993 if (InsertFormula(LU, LUIdx,
F))
4000 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4006void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4008 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4013 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4014 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4016 if (
Base.Scale == 1)
4017 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4023void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4026 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4027 (
Base.UnfoldedOffset.isNonZero()) <=
4035 Formula NewBase =
Base;
4036 NewBase.BaseRegs.clear();
4037 Type *CombinedIntegerType =
nullptr;
4038 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4041 if (!CombinedIntegerType)
4046 NewBase.BaseRegs.push_back(BaseReg);
4050 if (Ops.
size() == 0)
4055 auto GenerateFormula = [&](
const SCEV *Sum) {
4056 Formula
F = NewBase;
4064 F.BaseRegs.push_back(Sum);
4066 (void)InsertFormula(LU, LUIdx,
F);
4070 if (Ops.
size() > 1) {
4077 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4078 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4080 NewBase.UnfoldedOffset.getFixedValue(),
true));
4081 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4087void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4088 const Formula &
Base,
size_t Idx,
4092 if (
G->isZero() || !GV)
4096 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4101 F.BaseRegs[
Idx] =
G;
4102 (void)InsertFormula(LU, LUIdx,
F);
4106void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4109 if (
Base.BaseGV)
return;
4111 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4112 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4113 if (
Base.Scale == 1)
4114 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4119void LSRInstance::GenerateConstantOffsetsImpl(
4120 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4123 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4125 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4127 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4129 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4131 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4137 F.ScaledReg =
nullptr;
4139 F.deleteBaseReg(
F.BaseRegs[
Idx]);
4141 }
else if (IsScaledReg)
4144 F.BaseRegs[
Idx] = NewG;
4146 (void)InsertFormula(LU, LUIdx,
F);
4161 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
4163 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4164 const APInt &StepInt = StepRec->getAPInt();
4168 for (Immediate
Offset : Worklist) {
4170 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4177 for (Immediate
Offset : Worklist)
4181 if (
G->isZero() ||
Imm.isZero() ||
4182 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4185 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4186 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4191 F.BaseRegs[
Idx] =
G;
4196 (void)InsertFormula(LU, LUIdx,
F);
4200void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4206 if (LU.MaxOffset != LU.MinOffset)
4209 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4210 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4211 if (
Base.Scale == 1)
4212 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4218void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4220 if (LU.Kind != LSRUse::ICmpZero)
return;
4228 if (LU.MinOffset != LU.MaxOffset)
return;
4231 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4233 for (
const SCEV *BaseReg :
Base.BaseRegs)
4236 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4239 for (int64_t Factor : Factors) {
4244 if (
Base.BaseOffset.isMin() && Factor == -1)
4247 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4249 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4250 assert(Factor != 0 &&
"Zero factor not expected!");
4251 if (NewBaseOffset.getFixedValue() / Factor !=
4252 Base.BaseOffset.getFixedValue())
4260 Immediate
Offset = LU.MinOffset;
4261 if (
Offset.isMin() && Factor == -1)
4264 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4272 F.BaseOffset = NewBaseOffset;
4279 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4284 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4298 if (
F.UnfoldedOffset.isNonZero()) {
4299 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4301 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4302 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4303 Base.UnfoldedOffset.getFixedValue())
4307 IntTy,
F.UnfoldedOffset.getFixedValue()))
4312 (void)InsertFormula(LU, LUIdx,
F);
4319void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4326 if (
Base.Scale != 0 && !
Base.unscale())
4329 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4332 for (int64_t Factor : Factors) {
4333 Base.Scale = Factor;
4334 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4336 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4340 if (LU.Kind == LSRUse::Basic &&
4341 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4342 LU.AccessTy,
Base) &&
4343 LU.AllFixupsOutsideLoop)
4344 LU.Kind = LSRUse::Special;
4350 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4351 Base.BaseOffset.isZero() && !
Base.BaseGV)
4354 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4356 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4363 if (!Quotient->isZero()) {
4366 F.ScaledReg = Quotient;
4367 F.deleteBaseReg(
F.BaseRegs[i]);
4371 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4372 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4376 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4378 (void)InsertFormula(LU, LUIdx,
F);
4394 const SCEV *Result =
nullptr;
4395 for (
auto &L :
Loops) {
4399 if (!New || (Result && New != Result))
4404 assert(Result &&
"failed to create expression");
4409void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4411 if (
Base.BaseGV)
return;
4421 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4424 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4428 for (
auto &LF : LU.Fixups)
4429 Loops.push_back(LF.PostIncLoops);
4431 for (
Type *SrcTy : Types) {
4440 const SCEV *NewScaledReg =
4442 if (!NewScaledReg || NewScaledReg->
isZero())
4444 F.ScaledReg = NewScaledReg;
4446 bool HasZeroBaseReg =
false;
4447 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4448 const SCEV *NewBaseReg =
4450 if (!NewBaseReg || NewBaseReg->
isZero()) {
4451 HasZeroBaseReg =
true;
4454 BaseReg = NewBaseReg;
4461 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4465 (void)InsertFormula(LU, LUIdx,
F);
4478 const SCEV *OrigReg;
4481 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4489#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4491 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4492 <<
" , add offset " <<
Imm;
4502void LSRInstance::GenerateCrossUseConstantOffsets() {
4504 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4509 for (
const SCEV *
Use : RegUses) {
4512 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4515 Pair.first->second.insert(std::make_pair(Imm,
Use));
4516 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4525 for (
const SCEV *Reg : Sequence) {
4526 const ImmMapTy &Imms =
Map.find(Reg)->second;
4529 if (Imms.size() == 1)
4532 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4533 for (
const auto &Entry
4535 <<
' ' <<
Entry.first;
4539 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4541 const SCEV *OrigReg = J->second;
4543 Immediate JImm = J->first;
4544 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4546 if (!isa<SCEVConstant>(OrigReg) &&
4547 UsedByIndicesMap[Reg].
count() == 1) {
4555 Immediate
First = Imms.begin()->first;
4556 Immediate
Last = std::prev(Imms.end())->first;
4557 if (!
First.isCompatibleImmediate(
Last)) {
4564 bool Scalable =
First.isScalable() ||
Last.isScalable();
4565 int64_t FI =
First.getKnownMinValue();
4566 int64_t LI =
Last.getKnownMinValue();
4569 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4572 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4573 ImmMapTy::const_iterator OtherImms[] = {
4574 Imms.begin(), std::prev(Imms.end()),
4575 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4576 for (
const auto &M : OtherImms) {
4577 if (M == J || M == JE)
continue;
4578 if (!JImm.isCompatibleImmediate(
M->first))
4582 Immediate
Imm = JImm.subUnsigned(
M->first);
4583 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4585 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4593 UsedByIndicesMap.
clear();
4594 UniqueItems.
clear();
4597 for (
const WorkItem &WI : WorkItems) {
4598 size_t LUIdx = WI.LUIdx;
4599 LSRUse &LU =
Uses[LUIdx];
4600 Immediate
Imm = WI.Imm;
4601 const SCEV *OrigReg = WI.OrigReg;
4604 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4608 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4609 Formula
F = LU.Formulae[
L];
4616 if (
F.ScaledReg == OrigReg) {
4617 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4619 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4621 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4622 if (
F.referencesReg(S))
4625 NewF.BaseOffset =
Offset;
4626 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4629 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4634 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4638 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4640 if (
C->getValue()->isNegative() !=
4641 (NewF.BaseOffset.isLessThanZero()) &&
4643 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4648 NewF.canonicalize(*this->L);
4649 (void)InsertFormula(LU, LUIdx, NewF);
4652 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4653 const SCEV *BaseReg =
F.BaseRegs[
N];
4654 if (BaseReg != OrigReg)
4657 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4658 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4659 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4661 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4663 LU.Kind, LU.AccessTy, NewF)) {
4667 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4671 NewF.UnfoldedOffset = NewUnfoldedOffset;
4673 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4678 for (
const SCEV *NewReg : NewF.BaseRegs)
4679 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewReg)) {
4680 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4682 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4684 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4685 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4687 (
unsigned)llvm::countr_zero<uint64_t>(
4688 NewF.BaseOffset.getFixedValue()))
4693 NewF.canonicalize(*this->L);
4694 (void)InsertFormula(LU, LUIdx, NewF);
4705LSRInstance::GenerateAllReuseFormulae() {
4708 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4709 LSRUse &LU =
Uses[LUIdx];
4710 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4711 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4712 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4713 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4715 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4716 LSRUse &LU =
Uses[LUIdx];
4717 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4718 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4719 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4721 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4722 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4723 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4724 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4726 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4727 LSRUse &LU =
Uses[LUIdx];
4728 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4732 GenerateCrossUseConstantOffsets();
4735 "After generating reuse formulae:\n";
4736 print_uses(
dbgs()));
4741void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4746 bool ChangedFormulae =
false;
4751 using BestFormulaeTy =
4754 BestFormulaeTy BestFormulae;
4756 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4757 LSRUse &LU =
Uses[LUIdx];
4762 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4763 FIdx != NumForms; ++FIdx) {
4764 Formula &
F = LU.Formulae[FIdx];
4775 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4776 if (CostF.isLoser()) {
4788 for (
const SCEV *Reg :
F.BaseRegs) {
4789 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4793 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4794 Key.push_back(
F.ScaledReg);
4799 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4800 BestFormulae.insert(std::make_pair(Key, FIdx));
4804 Formula &Best = LU.Formulae[
P.first->second];
4806 Cost CostBest(L, SE,
TTI, AMK);
4808 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4809 if (CostF.isLess(CostBest))
4813 " in favor of formula ";
4814 Best.print(
dbgs());
dbgs() <<
'\n');
4817 ChangedFormulae =
true;
4819 LU.DeleteFormula(
F);
4827 LU.RecomputeRegs(LUIdx, RegUses);
4830 BestFormulae.clear();
4835 "After filtering out undesirable candidates:\n";
4843size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4845 for (
const LSRUse &LU :
Uses) {
4846 size_t FSize = LU.Formulae.size();
4861void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4865 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4866 "which use a superset of registers used by other "
4869 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4870 LSRUse &LU =
Uses[LUIdx];
4872 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4873 Formula &
F = LU.Formulae[i];
4874 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4880 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4886 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4887 (
uint64_t)
C->getValue()->getSExtValue());
4888 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4889 (
I -
F.BaseRegs.begin()));
4890 if (LU.HasFormulaWithSameRegs(NewF)) {
4893 LU.DeleteFormula(
F);
4899 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4900 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4904 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4905 (
I -
F.BaseRegs.begin()));
4906 if (LU.HasFormulaWithSameRegs(NewF)) {
4909 LU.DeleteFormula(
F);
4920 LU.RecomputeRegs(LUIdx, RegUses);
4929void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4934 dbgs() <<
"The search space is too complex.\n"
4935 "Narrowing the search space by assuming that uses separated "
4936 "by a constant offset will use the same registers.\n");
4940 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4941 LSRUse &LU =
Uses[LUIdx];
4942 for (
const Formula &
F : LU.Formulae) {
4943 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4946 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4950 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4951 LU.Kind, LU.AccessTy))
4956 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4959 for (LSRFixup &
Fixup : LU.Fixups) {
4960 Fixup.Offset +=
F.BaseOffset;
4961 LUThatHas->pushFixup(
Fixup);
4967 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4968 Formula &
F = LUThatHas->Formulae[i];
4969 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4970 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4972 LUThatHas->DeleteFormula(
F);
4980 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4983 DeleteUse(LU, LUIdx);
4996void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5000 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5001 "undesirable dedicated registers.\n");
5003 FilterOutUndesirableDedicatedRegisters();
5018void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5023 dbgs() <<
"The search space is too complex.\n"
5024 "Narrowing the search space by choosing the best Formula "
5025 "from the Formulae with the same Scale and ScaledReg.\n");
5030 BestFormulaeTy BestFormulae;
5032 bool ChangedFormulae =
false;
5037 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5038 LSRUse &LU =
Uses[LUIdx];
5043 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5048 size_t FARegNum = 0;
5049 for (
const SCEV *Reg : FA.BaseRegs) {
5050 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5051 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5053 size_t FBRegNum = 0;
5054 for (
const SCEV *Reg : FB.BaseRegs) {
5055 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5056 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5058 if (FARegNum != FBRegNum)
5059 return FARegNum < FBRegNum;
5066 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
5068 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
5069 return CostFA.isLess(CostFB);
5073 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5075 Formula &
F = LU.Formulae[FIdx];
5078 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5082 Formula &Best = LU.Formulae[
P.first->second];
5083 if (IsBetterThan(
F, Best))
5087 " in favor of formula ";
5088 Best.print(
dbgs());
dbgs() <<
'\n');
5090 ChangedFormulae =
true;
5092 LU.DeleteFormula(
F);
5098 LU.RecomputeRegs(LUIdx, RegUses);
5101 BestFormulae.clear();
5106 "After filtering out undesirable candidates:\n";
5113void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5120 "Narrowing the search space by choosing the lowest "
5121 "register Formula for PostInc Uses.\n");
5123 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5124 LSRUse &LU =
Uses[LUIdx];
5126 if (LU.Kind != LSRUse::Address)
5132 size_t MinRegs = std::numeric_limits<size_t>::max();
5133 for (
const Formula &
F : LU.Formulae)
5134 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5137 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5139 Formula &
F = LU.Formulae[FIdx];
5140 if (
F.getNumRegs() > MinRegs) {
5143 LU.DeleteFormula(
F);
5150 LU.RecomputeRegs(LUIdx, RegUses);
5201void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5214 DenseMap <const SCEV *, float> RegNumMap;
5215 for (
const SCEV *Reg : RegUses) {
5216 if (UniqRegs.
count(Reg))
5219 for (
const LSRUse &LU :
Uses) {
5220 if (!LU.Regs.count(Reg))
5222 float P = LU.getNotSelectedProbability(Reg);
5228 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
5232 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5235 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5236 LSRUse &LU =
Uses[LUIdx];
5238 if (LU.Formulae.size() < 2)
5243 float FMinRegNum = LU.Formulae[0].getNumRegs();
5244 float FMinARegNum = LU.Formulae[0].getNumRegs();
5246 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5247 Formula &
F = LU.Formulae[i];
5250 for (
const SCEV *BaseReg :
F.BaseRegs) {
5251 if (UniqRegs.
count(BaseReg))
5253 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5254 if (isa<SCEVAddRecExpr>(BaseReg))
5256 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5258 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5259 if (!UniqRegs.
count(ScaledReg)) {
5261 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5262 if (isa<SCEVAddRecExpr>(ScaledReg))
5264 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5267 if (FMinRegNum > FRegNum ||
5268 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5269 FMinRegNum = FRegNum;
5270 FMinARegNum = FARegNum;
5275 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5277 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5278 while (LU.Formulae.size() != 1) {
5281 LU.Formulae.pop_back();
5283 LU.RecomputeRegs(LUIdx, RegUses);
5284 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5285 Formula &
F = LU.Formulae[0];
5288 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5301 MemAccessTy AccessType) {
5302 if (Best->
getType() != Reg->getType() ||
5303 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5304 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5305 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5307 const auto *Diff = dyn_cast<SCEVConstant>(SE.
getMinusSCEV(Best, Reg));
5312 AccessType.MemTy,
nullptr,
5313 Diff->getAPInt().getSExtValue(),
5314 true, 0, AccessType.AddrSpace) &&
5316 AccessType.MemTy,
nullptr,
5317 -Diff->getAPInt().getSExtValue(),
5318 true, 0, AccessType.AddrSpace);
5324void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5335 const SCEV *Best =
nullptr;
5336 unsigned BestNum = 0;
5337 for (
const SCEV *Reg : RegUses) {
5338 if (Taken.
count(Reg))
5342 BestNum = RegUses.getUsedByIndices(Reg).count();
5344 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5345 if (Count > BestNum) {
5353 if (Count == BestNum) {
5354 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5355 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5357 Uses[LUIdx].AccessTy)) {
5364 assert(Best &&
"Failed to find best LSRUse candidate");
5366 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5367 <<
" will yield profitable reuse.\n");
5372 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5373 LSRUse &LU =
Uses[LUIdx];
5374 if (!LU.Regs.count(Best))
continue;
5377 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5378 Formula &
F = LU.Formulae[i];
5379 if (!
F.referencesReg(Best)) {
5381 LU.DeleteFormula(
F);
5385 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5391 LU.RecomputeRegs(LUIdx, RegUses);
5402void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5403 NarrowSearchSpaceByDetectingSupersets();
5404 NarrowSearchSpaceByCollapsingUnrolledCode();
5405 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5407 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5408 NarrowSearchSpaceByFilterPostInc();
5410 NarrowSearchSpaceByDeletingCostlyFormulas();
5412 NarrowSearchSpaceByPickingWinnerRegs();
5419 const Cost &CurCost,
5432 const LSRUse &LU =
Uses[Workspace.
size()];
5439 for (
const SCEV *S : CurRegs)
5440 if (LU.Regs.count(S))
5444 Cost NewCost(L, SE,
TTI, AMK);
5445 for (
const Formula &
F : LU.Formulae) {
5453 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5454 for (
const SCEV *Reg : ReqRegs) {
5455 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5458 if (NumReqRegsToFind == 0)
5462 if (NumReqRegsToFind != 0) {
5473 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5474 if (NewCost.isLess(SolutionCost)) {
5476 if (Workspace.
size() !=
Uses.size()) {
5477 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5478 NewRegs, VisitedRegs);
5479 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5480 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5483 dbgs() <<
".\nRegs:\n";
5484 for (
const SCEV *S : NewRegs)
dbgs()
5485 <<
"- " << *S <<
"\n";
5488 SolutionCost = NewCost;
5489 Solution = Workspace;
5500 Cost SolutionCost(L, SE,
TTI, AMK);
5501 SolutionCost.Lose();
5502 Cost CurCost(L, SE,
TTI, AMK);
5508 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5509 CurRegs, VisitedRegs);
5510 if (Solution.
empty()) {
5517 "The chosen solution requires ";
5518 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5519 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5524 Solution[i]->print(
dbgs());
5530 const bool EnableDropUnprofitableSolution = [&] {
5542 if (BaselineCost.isLess(SolutionCost)) {
5543 if (!EnableDropUnprofitableSolution)
5545 dbgs() <<
"Baseline is more profitable than chosen solution, "
5546 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5549 "solution, dropping LSR solution.\n";);
5564 bool AllDominate =
true;
5568 if (isa<CatchSwitchInst>(Tentative))
5572 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5573 AllDominate =
false;
5578 if (Tentative->
getParent() == Inst->getParent() &&
5579 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5589 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5590 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5593 for (
DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
5594 if (!Rung)
return IP;
5595 Rung = Rung->getIDom();
5596 if (!Rung)
return IP;
5597 IDom = Rung->getBlock();
5600 const Loop *IDomLoop = LI.getLoopFor(IDom);
5601 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5602 if (IDomDepth <= IPLoopDepth &&
5603 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5621 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5623 if (LU.Kind == LSRUse::ICmpZero)
5625 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5627 if (LF.PostIncLoops.count(L)) {
5628 if (LF.isUseFullyOutsideLoop(L))
5629 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5635 for (
const Loop *PIL : LF.PostIncLoops) {
5636 if (PIL == L)
continue;
5641 if (!ExitingBlocks.
empty()) {
5643 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5644 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5649 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5650 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5651 "Insertion point must be a normal instruction");
5658 while (isa<PHINode>(IP)) ++IP;
5661 while (IP->isEHPad()) ++IP;
5664 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5669 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5677Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5680 if (LU.RigidFormula)
5681 return LF.OperandValToReplace;
5685 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5690 Rewriter.setPostInc(LF.PostIncLoops);
5693 Type *OpTy = LF.OperandValToReplace->getType();
5695 Type *Ty =
F.getType();
5709 for (
const SCEV *Reg :
F.BaseRegs) {
5710 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5718 Value *ICmpScaledV =
nullptr;
5720 const SCEV *ScaledS =
F.ScaledReg;
5726 if (LU.Kind == LSRUse::ICmpZero) {
5736 "The only scale supported by ICmpZero uses is -1!");
5737 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5745 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5781 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5782 "Expanding mismatched offsets\n");
5784 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5785 if (
Offset.isNonZero()) {
5786 if (LU.Kind == LSRUse::ICmpZero) {
5794 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5804 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5805 if (UnfoldedOffset.isNonZero()) {
5807 Ops.
push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5822 if (LU.Kind == LSRUse::ICmpZero) {
5823 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5824 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5826 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5827 "a scale at the same time!");
5828 if (
F.Scale == -1) {
5829 if (ICmpScaledV->
getType() != OpTy) {
5839 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5840 "ICmp does not support folding a global value and "
5841 "a scale at the same time!");
5844 if (
C->getType() != OpTy) {
5848 assert(
C &&
"Cast of ConstantInt should have folded");
5861void LSRInstance::RewriteForPHI(
5862 PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
5874 bool needUpdateFixups =
false;
5885 Loop *PNLoop = LI.getLoopFor(Parent);
5886 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5893 .setMergeIdenticalEdges()
5894 .setKeepOneInputPHIs());
5908 if (
L->contains(BB) && !
L->contains(PN))
5916 needUpdateFixups =
true;
5921 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5922 Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5930 Type *OpTy = LF.OperandValToReplace->getType();
5934 LF.OperandValToReplace->getType(),
"tmp",
5940 if (
auto *
I = dyn_cast<Instruction>(FullV))
5941 if (
L->contains(
I) && !
L->contains(BB))
5945 Pair.first->second = FullV;
5952 if (needUpdateFixups) {
5953 for (LSRUse &LU :
Uses)
5954 for (LSRFixup &
Fixup : LU.Fixups)
5958 if (
Fixup.UserInst == PN) {
5961 bool foundInOriginalPHI =
false;
5963 if (val ==
Fixup.OperandValToReplace) {
5964 foundInOriginalPHI =
true;
5969 if (foundInOriginalPHI)
5980 if (val ==
Fixup.OperandValToReplace)
5981 Fixup.UserInst = NewPN;
5993void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5998 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5999 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6001 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
6004 Type *OpTy = LF.OperandValToReplace->getType();
6005 if (FullV->
getType() != OpTy) {
6008 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
6017 if (LU.Kind == LSRUse::ICmpZero)
6020 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
6023 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
6033 if (LU.Kind != LSRUse::Address)
6040 if (IVIncInsertPos->
getParent() == LHeader)
6043 if (!
Fixup.OperandValToReplace ||
6045 Instruction *UI = cast<Instruction>(U);
6046 return UI->getParent() != LHeader;
6051 Type *Ty =
I->getType();
6059void LSRInstance::ImplementSolution(
6066 for (
const IVChain &Chain : IVChainVec) {
6067 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
6072 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6073 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6076 ?
L->getHeader()->getTerminator()
6078 Rewriter.setIVIncInsertPos(L, InsertPos);
6079 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6083 for (
const IVChain &Chain : IVChainVec) {
6084 GenerateIVChain(Chain, DeadInsts);
6090 ScalarEvolutionIVs.push_back(
IV);
6106 for (
PHINode &PN :
L->getHeader()->phis()) {
6108 Value *Start =
nullptr, *Step =
nullptr;
6113 case Instruction::Sub:
6118 case Instruction::Add:
6124 if (!isa<Constant>(Step))
6129 if (BO->
getParent() == IVIncInsertPos->getParent())
6135 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6148 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6151 :
TTI.getPreferredAddressingMode(
L, &SE)),
6153 BaselineCost(
L, SE,
TTI, AMK) {
6155 if (!
L->isLoopSimplifyForm())
6159 if (IU.
empty())
return;
6163 unsigned NumUsers = 0;
6167 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6174 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
6175 auto *FirstNonPHI = PN->
getParent()->getFirstNonPHI();
6176 if (isa<FuncletPadInst>(FirstNonPHI) ||
6177 isa<CatchSwitchInst>(FirstNonPHI))
6179 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
6185 L->getHeader()->printAsOperand(
dbgs(),
false);
6198 OptimizeLoopTermCond();
6201 if (IU.empty())
return;
6204 if (!
L->isInnermost()) {
6217 CollectInterestingTypesAndFactors();
6218 CollectFixupsAndInitialFormulae();
6219 CollectLoopInvariantFixupsAndFormulae();
6225 print_uses(
dbgs()));
6227 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6231 GenerateAllReuseFormulae();
6233 FilterOutUndesirableDedicatedRegisters();
6234 NarrowSearchSpaceUsingHeuristics();
6244 if (Solution.
empty())
6249 for (
const LSRUse &LU :
Uses) {
6250 for (
const Formula &
F : LU.Formulae)
6252 F) &&
"Illegal formula generated!");
6257 ImplementSolution(Solution);
6260#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6261void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
6262 if (Factors.empty() &&
Types.empty())
return;
6264 OS <<
"LSR has identified the following interesting factors and types: ";
6267 for (int64_t Factor : Factors) {
6270 OS <<
'*' << Factor;
6273 for (
Type *Ty : Types) {
6276 OS <<
'(' << *Ty <<
')';
6282 OS <<
"LSR is examining the following fixup sites:\n";
6283 for (
const LSRUse &LU :
Uses)
6284 for (
const LSRFixup &LF : LU.Fixups) {
6292 OS <<
"LSR is examining the following uses:\n";
6293 for (
const LSRUse &LU :
Uses) {
6297 for (
const Formula &
F : LU.Formulae) {
6306 print_factors_and_types(
OS);
6318class LoopStrengthReduce :
public LoopPass {
6322 LoopStrengthReduce();
6331LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6335void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6367 return {Begin,
End};
6370struct SCEVDbgValueBuilder {
6371 SCEVDbgValueBuilder() =
default;
6372 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6374 void clone(
const SCEVDbgValueBuilder &
Base) {
6375 LocationOps =
Base.LocationOps;
6380 LocationOps.clear();
6397 unsigned ArgIndex = 0;
6398 if (It != LocationOps.
end()) {
6399 ArgIndex = std::distance(LocationOps.
begin(), It);
6401 ArgIndex = LocationOps.
size();
6413 if (
C->getAPInt().getSignificantBits() > 64)
6415 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6416 Expr.
push_back(
C->getAPInt().getSExtValue());
6423 return ToDwarfOpIter(Expr);
6430 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6431 "Expected arithmetic SCEV type");
6433 unsigned EmitOperator = 0;
6434 for (
const auto &
Op : CommExpr->
operands()) {
6437 if (EmitOperator >= 1)
6438 pushOperator(DwarfOp);
6449 bool Success = pushSCEV(Inner);
6451 IsSigned ? llvm::dwarf::DW_ATE_signed
6452 : llvm::dwarf::DW_ATE_unsigned};
6453 for (
const auto &
Op : CastOps)
6461 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6462 Success &= pushConst(StartInt);
6464 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6467 pushLocation(
U->getValue());
6469 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6470 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6472 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6473 Success &= pushSCEV(UDiv->getLHS());
6474 Success &= pushSCEV(UDiv->getRHS());
6475 pushOperator(llvm::dwarf::DW_OP_div);
6477 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6479 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6480 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6481 "Unexpected cast type in SCEV.");
6482 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6484 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6485 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6487 }
else if (isa<SCEVAddRecExpr>(S)) {
6502 if (
C->getAPInt().getSignificantBits() > 64)
6504 int64_t
I =
C->getAPInt().getSExtValue();
6506 case llvm::dwarf::DW_OP_plus:
6507 case llvm::dwarf::DW_OP_minus:
6509 case llvm::dwarf::DW_OP_mul:
6510 case llvm::dwarf::DW_OP_div:
6526 if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6533 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6534 if (!pushSCEV(Stride))
6536 pushOperator(llvm::dwarf::DW_OP_mul);
6538 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6539 if (!pushSCEV(Start))
6541 pushOperator(llvm::dwarf::DW_OP_plus);
6547 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6548 pushLocation(OffsetValue);
6551 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6552 << std::to_string(
Offset) <<
"\n");
6558 bool createIterCountExpr(
const SCEV *S,
6559 const SCEVDbgValueBuilder &IterationCount,
6566 if (!isa<SCEVAddRecExpr>(S))
6569 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6572 const auto *Rec = cast<SCEVAddRecExpr>(S);
6573 if (!Rec->isAffine())
6581 clone(IterationCount);
6582 if (!SCEVToValueExpr(*Rec, SE))
6596 if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6597 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6605 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6606 if (!pushSCEV(Start))
6608 pushOperator(llvm::dwarf::DW_OP_minus);
6610 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6611 if (!pushSCEV(Stride))
6613 pushOperator(llvm::dwarf::DW_OP_div);
6624 "Expected the locations vector to contain the IV");
6629 "Expected the location ops to contain the IV.");
6633 for (
const auto &
Op : LocationOps) {
6634 auto It =
find(DestLocations,
Op);
6635 if (It != DestLocations.
end()) {
6637 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6645 for (
const auto &
Op : expr_ops()) {
6647 Op.appendToVector(DestExpr);
6654 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6662struct DVIRecoveryRec {
6665 HadLocationArgList(
false) {}
6667 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6671 bool HadLocationArgList;
6677 for (
auto &RE : RecoveryExprs)
6679 RecoveryExprs.clear();
6682 ~DVIRecoveryRec() {
clear(); }
6690 auto expr_ops = ToDwarfOpIter(Expr);
6692 for (
auto Op : expr_ops)
6701template <
typename T>
6705 "contain any DW_OP_llvm_arg operands.");
6707 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6708 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6712template <
typename T>
6717 "Expected expression that references DIArglist locations using "
6718 "DW_OP_llvm_arg operands.");
6720 for (
Value *V : Locations)
6724 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6735 auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6737 if (NumLLVMArgs == 0) {
6744 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6756 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6759 DbgVal->setExpression(SalvageExpr);
6762 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6763 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6765 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6779 auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6780 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6781 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6782 assert(DVIRec.Expr &&
"Expected an expression");
6783 DbgVal->setExpression(DVIRec.Expr);
6787 if (!DVIRec.HadLocationArgList) {
6788 assert(DVIRec.LocationOps.size() == 1 &&
6789 "Unexpected number of location ops.");
6793 Value *CachedValue =
6798 for (
WeakVH VH : DVIRec.LocationOps) {
6803 DbgVal->setRawLocation(
6806 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6808 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6809 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6811 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6816 const SCEV *SCEVInductionVar,
6817 SCEVDbgValueBuilder IterCountExpr) {
6819 if (isa<DbgValueInst *>(DVIRec.DbgRef)
6820 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6821 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6833 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6835 NewLocationOps.
push_back(LSRInductionVar);
6837 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6838 WeakVH VH = DVIRec.LocationOps[i];
6842 if (VH && !isa<UndefValue>(VH)) {
6844 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6846 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6854 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6855 <<
" refers to a location that is now undef or erased. "
6856 "Salvage abandoned.\n");
6860 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6861 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6863 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6864 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6868 if (std::optional<APInt>
Offset =
6870 if (
Offset->getSignificantBits() <= 64)
6871 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6872 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6880 if (DVIRec.Expr->getNumElements() == 0) {
6881 assert(DVIRec.RecoveryExprs.size() == 1 &&
6882 "Expected only a single recovery expression for an empty "
6884 assert(DVIRec.RecoveryExprs[0] &&
6885 "Expected a SCEVDbgSalvageBuilder for location 0");
6886 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6887 B->appendToVectors(
NewExpr, NewLocationOps);
6889 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6897 SCEVDbgValueBuilder *DbgBuilder =
6898 DVIRec.RecoveryExprs[LocationArgIndex].get();
6904 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6905 "Expected a positive index for the location-op position.");
6906 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6910 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6914 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6916 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6919 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6927 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6928 if (DVIToUpdate.empty())
6932 assert(SCEVInductionVar &&
6933 "Anticipated a SCEV for the post-LSR induction variable");
6936 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6937 if (!IVAddRec->isAffine())
6945 SCEVDbgValueBuilder IterCountExpr;
6946 IterCountExpr.pushLocation(LSRInductionVar);
6947 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6950 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6953 for (
auto &DVIRec : DVIToUpdate) {
6954 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6965 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6967 for (
const auto &
B : L->getBlocks()) {
6968 for (
auto &
I : *
B) {
6969 auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6972 if (DbgVal->isKillLocation())
6977 const auto &HasTranslatableLocationOps =
6978 [&](
const auto *DbgValToTranslate) ->
bool {
6979 for (
const auto LocOp : DbgValToTranslate->location_ops()) {
6993 if (!HasTranslatableLocationOps(DbgVal))
6996 std::unique_ptr<DVIRecoveryRec> NewRec =
6997 std::make_unique<DVIRecoveryRec>(DbgVal);
7001 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
7002 for (
const auto LocOp : DbgVal->location_ops()) {
7003 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7004 NewRec->LocationOps.push_back(LocOp);
7005 NewRec->HadLocationArgList = DbgVal->hasArgList();
7007 SalvageableDVISCEVs.push_back(std::move(NewRec));
7011 if (DVR.isDbgValue() || DVR.isDbgAssign())
7012 ProcessDbgValue(&DVR);
7014 auto DVI = dyn_cast<DbgValueInst>(&
I);
7017 if (ProcessDbgValue(DVI))
7018 DVIHandles.insert(DVI);
7027 const LSRInstance &LSR) {
7029 auto IsSuitableIV = [&](
PHINode *
P) {
7040 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7047 if (IsSuitableIV(
P))
7051 for (
PHINode &
P : L.getHeader()->phis()) {
7052 if (IsSuitableIV(&
P))
7070 bool Changed =
false;
7071 std::unique_ptr<MemorySSAUpdater> MSSAU;
7073 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7076 const LSRInstance &Reducer =
7077 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7078 Changed |= Reducer.getChanged();
7084 const DataLayout &
DL = L->getHeader()->getDataLayout();
7089 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7103 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7105 const DataLayout &
DL = L->getHeader()->getDataLayout();
7118 if (SalvageableDVIRecords.
empty())
7124 for (
const auto &L : LI) {
7128 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7129 "could not be identified.\n");
7133 for (
auto &Rec : SalvageableDVIRecords)
7135 SalvageableDVIRecords.
clear();
7144 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7145 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7146 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7147 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7148 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7149 *
L->getHeader()->getParent());
7150 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7151 *
L->getHeader()->getParent());
7152 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7153 *
L->getHeader()->getParent());
7154 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7157 MSSA = &MSSAAnalysis->getMSSA();
7174char LoopStrengthReduce::ID = 0;
7177 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This defines the Use class.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Conditional or Unconditional Branch instruction.
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getVScale(Type *Ty)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.