84#include "llvm/Config/llvm-config.h"
133#define DEBUG_TYPE "loop-reduce"
150 cl::desc(
"Enable LSR phi elimination"));
155 cl::desc(
"Add instruction count to a LSR cost model"));
160 cl::desc(
"Narrow LSR complex solution using"
161 " expectation of registers number"));
167 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
168 " with the same ScaledReg and Scale"));
172 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Don't prefer any addressing mode"),
178 "Prefer pre-indexed addressing mode"),
181 "Prefer post-indexed addressing mode")));
185 cl::init(std::numeric_limits<uint16_t>::max()),
186 cl::desc(
"LSR search space complexity limit"));
190 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
194 cl::desc(
"Attempt to replace primary IV with other IV."));
198 cl::desc(
"Attempt to drop solution if it is less profitable"));
201 "Number of terminating condition fold recognized and performed");
207 cl::desc(
"Stress test LSR IV chains"));
216 static const unsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
219 Type *MemTy =
nullptr;
220 unsigned AddrSpace = UnknownAddressSpace;
222 MemAccessTy() =
default;
223 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
226 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
232 unsigned AS = UnknownAddressSpace) {
252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
254 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
268 RegUsesTy RegUsesMap;
272 void countRegister(
const SCEV *Reg,
size_t LUIdx);
273 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
274 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
276 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
294RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
295 std::pair<RegUsesTy::iterator, bool> Pair =
296 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
297 RegSortData &RSD = Pair.first->second;
300 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
301 RSD.UsedByIndices.set(LUIdx);
305RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
306 RegUsesTy::iterator It = RegUsesMap.find(Reg);
307 assert(It != RegUsesMap.end());
308 RegSortData &RSD = It->second;
309 assert(RSD.UsedByIndices.size() > LUIdx);
310 RSD.UsedByIndices.reset(LUIdx);
314RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
315 assert(LUIdx <= LastLUIdx);
319 for (
auto &Pair : RegUsesMap) {
321 if (LUIdx < UsedByIndices.
size())
322 UsedByIndices[LUIdx] =
323 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
324 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
329RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
330 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
331 if (
I == RegUsesMap.end())
335 if (i == -1)
return false;
336 if ((
size_t)i != LUIdx)
return true;
341 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
342 assert(
I != RegUsesMap.end() &&
"Unknown register!");
343 return I->second.UsedByIndices;
346void RegUseTracker::clear() {
360 int64_t BaseOffset = 0;
363 bool HasBaseReg =
false;
386 const SCEV *ScaledReg =
nullptr;
391 int64_t UnfoldedOffset = 0;
399 void canonicalize(
const Loop &L);
403 bool hasZeroEnd()
const;
405 size_t getNumRegs()
const;
408 void deleteBaseReg(
const SCEV *&S);
410 bool referencesReg(
const SCEV *S)
const;
411 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
412 const RegUseTracker &RegUses)
const;
433 for (
const SCEV *S :
Add->operands())
440 if (!AR->getStart()->isZero() && AR->isAffine()) {
443 AR->getStepRecurrence(SE),
459 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
461 for (
const SCEV *S : MyGood)
463 for (
const SCEV *S : MyBad)
482 BaseRegs.push_back(Sum);
488 BaseRegs.push_back(Sum);
496 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
503bool Formula::isCanonical(
const Loop &L)
const {
505 return BaseRegs.size() <= 1;
510 if (Scale == 1 && BaseRegs.empty())
530void Formula::canonicalize(
const Loop &L) {
534 if (BaseRegs.empty()) {
536 assert(ScaledReg &&
"Expected 1*reg => reg");
537 assert(Scale == 1 &&
"Expected 1*reg => reg");
538 BaseRegs.push_back(ScaledReg);
546 ScaledReg = BaseRegs.pop_back_val();
557 if (
I != BaseRegs.end())
567bool Formula::unscale() {
571 BaseRegs.push_back(ScaledReg);
576bool Formula::hasZeroEnd()
const {
577 if (UnfoldedOffset || BaseOffset)
579 if (BaseRegs.size() != 1 || ScaledReg)
586size_t Formula::getNumRegs()
const {
587 return !!ScaledReg + BaseRegs.size();
592Type *Formula::getType()
const {
593 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
594 ScaledReg ? ScaledReg->getType() :
595 BaseGV ? BaseGV->getType() :
600void Formula::deleteBaseReg(
const SCEV *&S) {
601 if (&S != &BaseRegs.back())
607bool Formula::referencesReg(
const SCEV *S)
const {
613bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
614 const RegUseTracker &RegUses)
const {
616 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
618 for (
const SCEV *BaseReg : BaseRegs)
619 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
624#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
629 BaseGV->printAsOperand(
OS,
false);
631 if (BaseOffset != 0) {
635 for (
const SCEV *BaseReg : BaseRegs) {
637 OS <<
"reg(" << *BaseReg <<
')';
639 if (HasBaseReg && BaseRegs.empty()) {
641 OS <<
"**error: HasBaseReg**";
642 }
else if (!HasBaseReg && !BaseRegs.empty()) {
644 OS <<
"**error: !HasBaseReg**";
648 OS << Scale <<
"*reg(";
655 if (UnfoldedOffset != 0) {
657 OS <<
"imm(" << UnfoldedOffset <<
')';
698 bool IgnoreSignificantBits =
false) {
709 if (
RA.isAllOnes()) {
723 const APInt &LA =
C->getAPInt();
732 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
734 IgnoreSignificantBits);
735 if (!Step)
return nullptr;
737 IgnoreSignificantBits);
738 if (!Start)
return nullptr;
751 for (
const SCEV *S :
Add->operands()) {
753 if (!
Op)
return nullptr;
769 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
784 IgnoreSignificantBits)) {
803 if (
C->getAPInt().getSignificantBits() <= 64) {
805 return C->getValue()->getSExtValue();
813 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
828 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
829 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
839 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
855 bool isAddress = isa<LoadInst>(Inst);
856 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
857 if (SI->getPointerOperand() == OperandVal)
859 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
862 switch (II->getIntrinsicID()) {
863 case Intrinsic::memset:
864 case Intrinsic::prefetch:
865 case Intrinsic::masked_load:
866 if (II->getArgOperand(0) == OperandVal)
869 case Intrinsic::masked_store:
870 if (II->getArgOperand(1) == OperandVal)
873 case Intrinsic::memmove:
874 case Intrinsic::memcpy:
875 if (II->getArgOperand(0) == OperandVal ||
876 II->getArgOperand(1) == OperandVal)
882 if (IntrInfo.
PtrVal == OperandVal)
887 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
888 if (RMW->getPointerOperand() == OperandVal)
891 if (CmpX->getPointerOperand() == OperandVal)
900 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
907 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
908 AccessTy.AddrSpace = SI->getPointerAddressSpace();
909 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
910 AccessTy.AddrSpace = LI->getPointerAddressSpace();
911 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
912 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
914 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
915 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
916 switch (II->getIntrinsicID()) {
917 case Intrinsic::prefetch:
918 case Intrinsic::memset:
919 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
920 AccessTy.MemTy = OperandVal->
getType();
922 case Intrinsic::memmove:
923 case Intrinsic::memcpy:
925 AccessTy.MemTy = OperandVal->
getType();
927 case Intrinsic::masked_load:
931 case Intrinsic::masked_store:
933 II->getArgOperand(1)->getType()->getPointerAddressSpace();
993 if (!Processed.
insert(S).second)
997 for (
const SCEV *S :
Add->operands()) {
1013 Value *UVal = U->getValue();
1017 if (UI && UI->
getOpcode() == Instruction::Mul &&
1051 const LSRUse &LU,
const Formula &
F);
1055 const LSRUse &LU,
const Formula &
F,
1062 const Loop *
L =
nullptr;
1072 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1090 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1091 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1092 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1093 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1099 return C.NumRegs == ~0
u;
1102 void RateFormula(
const Formula &
F,
1112 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1114 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1127 Value *OperandValToReplace =
nullptr;
1139 LSRFixup() =
default;
1141 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1149struct UniquifierDenseMapInfo {
1152 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1158 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1194 MemAccessTy AccessTy;
1200 int64_t MinOffset = std::numeric_limits<int64_t>::max();
1201 int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1205 bool AllFixupsOutsideLoop =
true;
1212 bool RigidFormula =
false;
1218 Type *WidestFixupType =
nullptr;
1228 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1230 LSRFixup &getNewFixup() {
1231 Fixups.push_back(LSRFixup());
1235 void pushFixup(LSRFixup &f) {
1237 if (
f.Offset > MaxOffset)
1238 MaxOffset =
f.Offset;
1239 if (
f.Offset < MinOffset)
1240 MinOffset =
f.Offset;
1243 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1244 float getNotSelectedProbability(
const SCEV *Reg)
const;
1245 bool InsertFormula(
const Formula &
F,
const Loop &L);
1246 void DeleteFormula(Formula &
F);
1247 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1256 LSRUse::KindType Kind, MemAccessTy AccessTy,
1258 bool HasBaseReg, int64_t Scale,
1262 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1266 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1268 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1270 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1272 [&](
unsigned i,
const SCEV *Reg) {
1273 return i + getSetupCost(Reg, Depth - 1);
1275 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1282void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1288 if (AR->getLoop() != L) {
1295 if (!AR->getLoop()->contains(L)) {
1305 unsigned LoopCost = 1;
1312 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1313 if (Step->getAPInt() ==
F.BaseOffset)
1316 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1317 if (isa<SCEVConstant>(LoopStep)) {
1318 const SCEV *LoopStart = AR->getStart();
1319 if (!isa<SCEVConstant>(LoopStart) &&
1325 C.AddRecCost += LoopCost;
1329 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1330 if (!Regs.
count(AR->getOperand(1))) {
1331 RateRegister(
F, AR->getOperand(1), Regs);
1343 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1345 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1352void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1355 if (LoserRegs && LoserRegs->
count(Reg)) {
1359 if (Regs.
insert(Reg).second) {
1360 RateRegister(
F, Reg, Regs);
1361 if (LoserRegs && isLoser())
1366void Cost::RateFormula(
const Formula &
F,
1373 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1375 unsigned PrevAddRecCost =
C.AddRecCost;
1376 unsigned PrevNumRegs =
C.NumRegs;
1377 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1378 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1379 if (VisitedRegs.
count(ScaledReg)) {
1383 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1387 for (
const SCEV *BaseReg :
F.BaseRegs) {
1388 if (VisitedRegs.
count(BaseReg)) {
1392 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1398 size_t NumBaseParts =
F.getNumRegs();
1399 if (NumBaseParts > 1)
1404 C.NumBaseAdds += (
F.UnfoldedOffset != 0);
1410 for (
const LSRFixup &
Fixup : LU.Fixups) {
1411 int64_t
O =
Fixup.Offset;
1421 if (LU.Kind == LSRUse::Address &&
Offset != 0 &&
1438 if (
C.NumRegs > TTIRegNum) {
1441 if (PrevNumRegs > TTIRegNum)
1442 C.Insns += (
C.NumRegs - PrevNumRegs);
1444 C.Insns += (
C.NumRegs - TTIRegNum);
1457 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1461 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1464 if (LU.Kind != LSRUse::ICmpZero)
1465 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1471 C.Insns = std::numeric_limits<unsigned>::max();
1472 C.NumRegs = std::numeric_limits<unsigned>::max();
1473 C.AddRecCost = std::numeric_limits<unsigned>::max();
1474 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1475 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1476 C.ImmCost = std::numeric_limits<unsigned>::max();
1477 C.SetupCost = std::numeric_limits<unsigned>::max();
1478 C.ScaleCost = std::numeric_limits<unsigned>::max();
1482bool Cost::isLess(
const Cost &
Other)
const {
1484 C.Insns !=
Other.C.Insns)
1485 return C.Insns <
Other.C.Insns;
1489#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1492 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1493 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1494 if (
C.AddRecCost != 0)
1495 OS <<
", with addrec cost " <<
C.AddRecCost;
1496 if (
C.NumIVMuls != 0)
1497 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1498 << (
C.NumIVMuls == 1 ?
"" :
"s");
1499 if (
C.NumBaseAdds != 0)
1500 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1501 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1502 if (
C.ScaleCost != 0)
1503 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1505 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1506 if (
C.SetupCost != 0)
1507 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1516bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1518 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1519 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1520 if (PN->getIncomingValue(i) == OperandValToReplace &&
1521 L->contains(PN->getIncomingBlock(i)))
1526 return !
L->contains(UserInst);
1529#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1533 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1535 Store->getOperand(0)->printAsOperand(
OS,
false);
1536 }
else if (UserInst->getType()->isVoidTy())
1537 OS << UserInst->getOpcodeName();
1539 UserInst->printAsOperand(
OS,
false);
1541 OS <<
", OperandValToReplace=";
1542 OperandValToReplace->printAsOperand(
OS,
false);
1544 for (
const Loop *PIL : PostIncLoops) {
1545 OS <<
", PostIncLoop=";
1546 PIL->getHeader()->printAsOperand(
OS,
false);
1560bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1562 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1565 return Uniquifier.
count(Key);
1569float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1571 for (
const Formula &
F : Formulae)
1572 if (
F.referencesReg(Reg))
1574 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1579bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1580 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1582 if (!Formulae.empty() && RigidFormula)
1586 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1590 if (!Uniquifier.
insert(Key).second)
1594 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1595 "Zero allocated in a scaled register!");
1597 for (
const SCEV *BaseReg :
F.BaseRegs)
1598 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1602 Formulae.push_back(
F);
1605 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1613void LSRUse::DeleteFormula(Formula &
F) {
1614 if (&
F != &Formulae.back())
1616 Formulae.pop_back();
1620void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1624 for (
const Formula &
F : Formulae) {
1625 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1626 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1630 for (
const SCEV *S : OldRegs)
1632 RegUses.dropRegister(S, LUIdx);
1635#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1637 OS <<
"LSR Use: Kind=";
1639 case Basic:
OS <<
"Basic";
break;
1640 case Special:
OS <<
"Special";
break;
1641 case ICmpZero:
OS <<
"ICmpZero";
break;
1643 OS <<
"Address of ";
1644 if (AccessTy.MemTy->isPointerTy())
1647 OS << *AccessTy.MemTy;
1650 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1653 OS <<
", Offsets={";
1654 bool NeedComma =
false;
1655 for (
const LSRFixup &
Fixup : Fixups) {
1656 if (NeedComma)
OS <<
',';
1662 if (AllFixupsOutsideLoop)
1663 OS <<
", all-fixups-outside-loop";
1665 if (WidestFixupType)
1666 OS <<
", widest fixup type: " << *WidestFixupType;
1675 LSRUse::KindType Kind, MemAccessTy AccessTy,
1677 bool HasBaseReg, int64_t Scale,
1680 case LSRUse::Address:
1682 HasBaseReg, Scale, AccessTy.AddrSpace,
Fixup);
1684 case LSRUse::ICmpZero:
1691 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1696 if (Scale != 0 && Scale != -1)
1701 if (BaseOffset != 0) {
1709 BaseOffset = -(
uint64_t)BaseOffset;
1718 return !BaseGV && Scale == 0 && BaseOffset == 0;
1720 case LSRUse::Special:
1722 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1729 int64_t MinOffset, int64_t MaxOffset,
1730 LSRUse::KindType Kind, MemAccessTy AccessTy,
1732 bool HasBaseReg, int64_t Scale) {
1734 if (((int64_t)((
uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1737 MinOffset = (
uint64_t)BaseOffset + MinOffset;
1738 if (((int64_t)((
uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1741 MaxOffset = (
uint64_t)BaseOffset + MaxOffset;
1744 HasBaseReg, Scale) &&
1750 int64_t MinOffset, int64_t MaxOffset,
1751 LSRUse::KindType Kind, MemAccessTy AccessTy,
1752 const Formula &
F,
const Loop &L) {
1760 assert((
F.isCanonical(L) ||
F.Scale != 0));
1762 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1767 int64_t MaxOffset, LSRUse::KindType Kind,
1769 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1772 BaseOffset, HasBaseReg, Scale) ||
1777 BaseGV, BaseOffset,
true, 0));
1781 int64_t MaxOffset, LSRUse::KindType Kind,
1782 MemAccessTy AccessTy,
const Formula &
F) {
1783 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1784 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1788 const LSRUse &LU,
const Formula &
F) {
1791 for (
const LSRFixup &
Fixup : LU.Fixups)
1793 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1794 F.Scale,
Fixup.UserInst))
1800 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1805 const LSRUse &LU,
const Formula &
F,
1814 return F.Scale != 1;
1817 case LSRUse::Address: {
1820 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MinOffset,
F.HasBaseReg,
1821 F.Scale, LU.AccessTy.AddrSpace);
1823 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MaxOffset,
F.HasBaseReg,
1824 F.Scale, LU.AccessTy.AddrSpace);
1827 "Legal addressing mode has an illegal cost!");
1828 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1830 case LSRUse::ICmpZero:
1832 case LSRUse::Special:
1842 LSRUse::KindType Kind, MemAccessTy AccessTy,
1846 if (BaseOffset == 0 && !BaseGV)
return true;
1850 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1854 if (!HasBaseReg && Scale == 1) {
1865 int64_t MaxOffset, LSRUse::KindType Kind,
1866 MemAccessTy AccessTy,
const SCEV *S,
1869 if (S->
isZero())
return true;
1877 if (!S->
isZero())
return false;
1880 if (BaseOffset == 0 && !BaseGV)
return true;
1884 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1887 BaseOffset, HasBaseReg, Scale);
1904 const SCEV *IncExpr;
1907 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
1914 const SCEV *ExprBase =
nullptr;
1916 IVChain() =
default;
1917 IVChain(
const IVInc &Head,
const SCEV *
Base)
1918 : Incs(1, Head), ExprBase(
Base) {}
1925 return std::next(Incs.
begin());
1932 bool hasIncs()
const {
return Incs.
size() >= 2; }
1941 bool isProfitableIncrement(
const SCEV *OperExpr,
1942 const SCEV *IncExpr,
1967 bool Changed =
false;
1994 RegUseTracker RegUses;
1999 static const unsigned MaxChains = 8;
2010 void OptimizeShadowIV();
2013 void OptimizeLoopTermCond();
2017 void FinalizeChain(IVChain &Chain);
2018 void CollectChains();
2019 void GenerateIVChain(
const IVChain &Chain,
2022 void CollectInterestingTypesAndFactors();
2023 void CollectFixupsAndInitialFormulae();
2029 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
2030 LSRUse::KindType Kind, MemAccessTy AccessTy);
2032 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2033 MemAccessTy AccessTy);
2035 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2037 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2039 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2040 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2041 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2042 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2044 void CollectLoopInvariantFixupsAndFormulae();
2046 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2047 unsigned Depth = 0);
2049 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2051 size_t Idx,
bool IsScaledReg =
false);
2052 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2053 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2054 const Formula &
Base,
size_t Idx,
2055 bool IsScaledReg =
false);
2056 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2057 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2058 const Formula &
Base,
2060 size_t Idx,
bool IsScaledReg =
false);
2061 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2062 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2063 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2064 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2065 void GenerateCrossUseConstantOffsets();
2066 void GenerateAllReuseFormulae();
2068 void FilterOutUndesirableDedicatedRegisters();
2070 size_t EstimateSearchSpaceComplexity()
const;
2071 void NarrowSearchSpaceByDetectingSupersets();
2072 void NarrowSearchSpaceByCollapsingUnrolledCode();
2073 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2074 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2075 void NarrowSearchSpaceByFilterPostInc();
2076 void NarrowSearchSpaceByDeletingCostlyFormulas();
2077 void NarrowSearchSpaceByPickingWinnerRegs();
2078 void NarrowSearchSpaceUsingHeuristics();
2083 const Cost &CurCost,
2093 const LSRUse &LU)
const;
2095 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2098 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2101 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2110 bool getChanged()
const {
return Changed; }
2112 return ScalarEvolutionIVs;
2126void LSRInstance::OptimizeShadowIV() {
2128 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2136 Type *DestTy =
nullptr;
2137 bool IsSigned =
false;
2151 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2153 DestTy = UCast->getDestTy();
2155 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2157 DestTy = SCast->getDestTy();
2159 if (!DestTy)
continue;
2179 if (Mantissa == -1)
continue;
2183 unsigned Entry, Latch;
2193 if (!
Init)
continue;
2194 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2195 (
double)
Init->getSExtValue() :
2196 (
double)
Init->getZExtValue());
2200 if (!Incr)
continue;
2201 if (Incr->
getOpcode() != Instruction::Add
2202 && Incr->
getOpcode() != Instruction::Sub)
2218 if (!
C->getValue().isStrictlyPositive())
2225 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2227 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2228 : Instruction::FSub,
2246 if (
U.getUser() ==
Cond) {
2314 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2319 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2320 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2327 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2328 Pred = ICmpInst::ICMP_SLE;
2330 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2331 Pred = ICmpInst::ICMP_SLT;
2333 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2334 Pred = ICmpInst::ICMP_ULT;
2343 if (
Max->getNumOperands() != 2)
2346 const SCEV *MaxLHS =
Max->getOperand(0);
2347 const SCEV *MaxRHS =
Max->getOperand(1);
2352 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2365 "Loop condition operand is an addrec in a different loop!");
2369 Value *NewRHS =
nullptr;
2370 if (ICmpInst::isTrueWhenEqual(Pred)) {
2373 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2374 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2375 NewRHS = BO->getOperand(0);
2377 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2378 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2379 NewRHS = BO->getOperand(0);
2386 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2387 NewRHS = SU->getValue();
2400 Cond->getOperand(0), NewRHS,
"scmp");
2404 Cond->replaceAllUsesWith(NewCond);
2407 Cond->eraseFromParent();
2409 if (
Cmp->use_empty())
2410 Cmp->eraseFromParent();
2416LSRInstance::OptimizeLoopTermCond() {
2433 L->getExitingBlocks(ExitingBlocks);
2441 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2447 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2457 if (!FindIVUserForCond(
Cond, CondUse))
2471 if (!DT.dominates(ExitingBlock, LatchBlock))
2476 if (LatchBlock != ExitingBlock)
2480 if (&*UI != CondUse &&
2481 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2484 const SCEV *
A = IU.getStride(*CondUse, L);
2485 const SCEV *
B = IU.getStride(*UI, L);
2486 if (!
A || !
B)
continue;
2499 if (
C->isOne() ||
C->isMinusOne())
2500 goto decline_post_inc;
2502 if (
C->getValue().getSignificantBits() >= 64 ||
2503 C->getValue().isMinSignedValue())
2504 goto decline_post_inc;
2508 TTI, UI->getUser(), UI->getOperandValToReplace());
2509 int64_t Scale =
C->getSExtValue();
2513 AccessTy.AddrSpace))
2514 goto decline_post_inc;
2519 AccessTy.AddrSpace))
2520 goto decline_post_inc;
2525 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2531 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2532 if (
Cond->hasOneUse()) {
2533 Cond->moveBefore(TermBr);
2537 Cond = cast<ICmpInst>(
Cond->clone());
2538 Cond->setName(
L->getHeader()->getName() +
".termcond");
2560 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2562 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2567bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2568 bool HasBaseReg, LSRUse::KindType Kind,
2569 MemAccessTy AccessTy) {
2570 int64_t NewMinOffset = LU.MinOffset;
2571 int64_t NewMaxOffset = LU.MaxOffset;
2572 MemAccessTy NewAccessTy = AccessTy;
2577 if (LU.Kind != Kind)
2583 if (Kind == LSRUse::Address) {
2584 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2585 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2586 AccessTy.AddrSpace);
2591 if (NewOffset < LU.MinOffset) {
2593 LU.MaxOffset - NewOffset, HasBaseReg))
2595 NewMinOffset = NewOffset;
2596 }
else if (NewOffset > LU.MaxOffset) {
2598 NewOffset - LU.MinOffset, HasBaseReg))
2600 NewMaxOffset = NewOffset;
2604 LU.MinOffset = NewMinOffset;
2605 LU.MaxOffset = NewMaxOffset;
2606 LU.AccessTy = NewAccessTy;
2613std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2614 LSRUse::KindType Kind,
2615 MemAccessTy AccessTy) {
2626 std::pair<UseMapTy::iterator, bool>
P =
2630 size_t LUIdx =
P.first->second;
2631 LSRUse &LU =
Uses[LUIdx];
2632 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2634 return std::make_pair(LUIdx,
Offset);
2638 size_t LUIdx =
Uses.size();
2639 P.first->second = LUIdx;
2640 Uses.push_back(LSRUse(Kind, AccessTy));
2641 LSRUse &LU =
Uses[LUIdx];
2645 return std::make_pair(LUIdx,
Offset);
2649void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2650 if (&LU != &
Uses.back())
2655 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2661LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2662 const LSRUse &OrigLU) {
2664 for (LSRUse &LU :
Uses) {
2670 if (&LU != &OrigLU &&
2671 LU.Kind != LSRUse::ICmpZero &&
2672 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2673 LU.WidestFixupType == OrigLU.WidestFixupType &&
2674 LU.HasFormulaWithSameRegs(OrigF)) {
2676 for (
const Formula &
F : LU.Formulae) {
2679 if (
F.BaseRegs == OrigF.BaseRegs &&
2680 F.ScaledReg == OrigF.ScaledReg &&
2681 F.BaseGV == OrigF.BaseGV &&
2682 F.Scale == OrigF.Scale &&
2683 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2684 if (
F.BaseOffset == 0)
2699void LSRInstance::CollectInterestingTypesAndFactors() {
2705 const SCEV *Expr = IU.getExpr(U);
2723 }
while (!Worklist.
empty());
2728 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2730 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2731 const SCEV *OldStride = *
I;
2732 const SCEV *NewStride = *NewStrideIter;
2743 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2745 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2746 Factors.insert(Factor->getAPInt().getSExtValue());
2751 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2752 Factors.insert(Factor->getAPInt().getSExtValue());
2758 if (
Types.size() == 1)
2770 for(; OI != OE; ++OI) {
2771 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2776 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2788 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2789 return Trunc->getOperand(0);
2811 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2813 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2815 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2822 if (SubExpr->getSCEVType() ==
scAddExpr)
2825 if (SubExpr->getSCEVType() !=
scMulExpr)
2831 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2841bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2842 const SCEV *IncExpr,
2850 if (!isa<SCEVConstant>(IncExpr)) {
2852 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2877 if (!Chain.hasIncs())
2880 if (!
Users.empty()) {
2881 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2883 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
2886 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2894 if (isa<PHINode>(Chain.tailUserInst())
2895 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2898 const SCEV *LastIncExpr =
nullptr;
2899 unsigned NumConstIncrements = 0;
2900 unsigned NumVarIncrements = 0;
2901 unsigned NumReusedIncrements = 0;
2906 for (
const IVInc &Inc : Chain) {
2909 if (Inc.IncExpr->isZero())
2914 if (isa<SCEVConstant>(Inc.IncExpr)) {
2915 ++NumConstIncrements;
2919 if (Inc.IncExpr == LastIncExpr)
2920 ++NumReusedIncrements;
2924 LastIncExpr = Inc.IncExpr;
2929 if (NumConstIncrements > 1)
2936 cost += NumVarIncrements;
2940 cost -= NumReusedIncrements;
2942 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2959 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2960 const SCEV *LastIncExpr =
nullptr;
2961 for (; ChainIdx < NChains; ++ChainIdx) {
2962 IVChain &Chain = IVChainVec[ChainIdx];
2976 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2982 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
2985 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2986 LastIncExpr = IncExpr;
2992 if (ChainIdx == NChains) {
2993 if (isa<PHINode>(UserInst))
2999 LastIncExpr = OperExpr;
3003 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3006 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3008 ChainUsersVec.
resize(NChains);
3009 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3010 <<
") IV=" << *LastIncExpr <<
"\n");
3012 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3013 <<
") IV+" << *LastIncExpr <<
"\n");
3015 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3017 IVChain &Chain = IVChainVec[ChainIdx];
3021 if (!LastIncExpr->
isZero()) {
3022 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3038 IVChain::const_iterator IncIter = Chain.Incs.begin();
3039 IVChain::const_iterator IncEnd = Chain.Incs.end();
3040 for( ; IncIter != IncEnd; ++IncIter) {
3041 if (IncIter->UserInst == OtherUse)
3044 if (IncIter != IncEnd)
3048 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3049 && IU.isIVUserOrOperand(OtherUse)) {
3052 NearUsers.
insert(OtherUse);
3057 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3082void LSRInstance::CollectChains() {
3088 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3089 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3098 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3108 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3109 ChainIdx < NChains; ++ChainIdx) {
3110 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3116 while (IVOpIter != IVOpEnd) {
3117 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3118 if (UniqueOperands.
insert(IVOpInst).second)
3119 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3120 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3125 for (
PHINode &PN :
L->getHeader()->phis()) {
3130 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3132 ChainInstruction(&PN, IncV, ChainUsersVec);
3135 unsigned ChainIdx = 0;
3136 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3137 UsersIdx < NChains; ++UsersIdx) {
3139 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3142 if (ChainIdx != UsersIdx)
3143 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3144 FinalizeChain(IVChainVec[ChainIdx]);
3147 IVChainVec.resize(ChainIdx);
3150void LSRInstance::FinalizeChain(IVChain &Chain) {
3151 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3152 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3154 for (
const IVInc &Inc : Chain) {
3156 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3157 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3158 IVIncSet.insert(UseI);
3165 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3183void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3187 const IVInc &Head = Chain.Incs[0];
3192 Value *IVSrc =
nullptr;
3193 while (IVOpIter != IVOpEnd) {
3204 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3205 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3208 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3210 if (IVOpIter == IVOpEnd) {
3212 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3215 assert(IVSrc &&
"Failed to find IV chain source");
3220 const SCEV *LeftOverExpr =
nullptr;
3221 for (
const IVInc &Inc : Chain) {
3223 if (isa<PHINode>(InsertPt))
3224 InsertPt =
L->getLoopLatch()->getTerminator();
3228 Value *IVOper = IVSrc;
3229 if (!Inc.IncExpr->isZero()) {
3233 LeftOverExpr = LeftOverExpr ?
3234 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3236 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
3239 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3242 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3246 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3248 LeftOverExpr =
nullptr;
3251 Type *OperTy = Inc.IVOperand->getType();
3252 if (IVTy != OperTy) {
3254 "cannot extend a chained IV");
3256 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3258 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3259 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3264 if (isa<PHINode>(Chain.tailUserInst())) {
3265 for (
PHINode &Phi :
L->getHeader()->phis()) {
3269 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3272 Value *IVOper = IVSrc;
3274 if (IVTy != PostIncTy) {
3276 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3277 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3278 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3280 Phi.replaceUsesOfWith(PostIncV, IVOper);
3286void LSRInstance::CollectFixupsAndInitialFormulae() {
3288 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3300 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3301 if (IVIncSet.count(UseI)) {
3302 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3306 LSRUse::KindType
Kind = LSRUse::Basic;
3307 MemAccessTy AccessTy;
3309 Kind = LSRUse::Address;
3313 const SCEV *S = IU.getExpr(U);
3324 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3327 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3329 if (CI->isEquality()) {
3332 Value *
NV = CI->getOperand(1);
3333 if (NV ==
U.getOperandValToReplace()) {
3334 CI->setOperand(1, CI->getOperand(0));
3335 CI->setOperand(0, NV);
3336 NV = CI->getOperand(1);
3343 (!
NV->getType()->isPointerTy() ||
3350 Kind = LSRUse::ICmpZero;
3352 }
else if (
L->isLoopInvariant(NV) &&
3353 (!isa<Instruction>(NV) ||
3354 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3355 !
NV->getType()->isPointerTy()) {
3366 Kind = LSRUse::ICmpZero;
3368 assert(!isa<SCEVCouldNotCompute>(S));
3373 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3374 if (Factors[i] != -1)
3375 Factors.insert(-(
uint64_t)Factors[i]);
3381 std::pair<size_t, int64_t>
P = getUse(S, Kind, AccessTy);
3382 size_t LUIdx =
P.first;
3384 LSRUse &LU =
Uses[LUIdx];
3387 LSRFixup &LF = LU.getNewFixup();
3388 LF.UserInst = UserInst;
3389 LF.OperandValToReplace =
U.getOperandValToReplace();
3390 LF.PostIncLoops = TmpPostIncLoops;
3392 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3395 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3397 F.initialMatch(S, L, SE);
3398 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3399 VisitedLSRUse.
insert(LUIdx);
3402 if (!LU.WidestFixupType ||
3405 LU.WidestFixupType = LF.OperandValToReplace->getType();
3408 if (LU.Formulae.empty()) {
3409 InsertInitialFormula(S, LU, LUIdx);
3410 CountRegisters(LU.Formulae.back(), LUIdx);
3419void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3423 LU.RigidFormula =
true;
3426 F.initialMatch(S, L, SE);
3427 bool Inserted = InsertFormula(LU, LUIdx,
F);
3428 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3434LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3435 LSRUse &LU,
size_t LUIdx) {
3437 F.BaseRegs.push_back(S);
3438 F.HasBaseReg =
true;
3439 bool Inserted = InsertFormula(LU, LUIdx,
F);
3440 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3444void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3446 RegUses.countRegister(
F.ScaledReg, LUIdx);
3447 for (
const SCEV *BaseReg :
F.BaseRegs)
3448 RegUses.countRegister(BaseReg, LUIdx);
3453bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3456 "Formula is illegal");
3458 if (!LU.InsertFormula(
F, *L))
3461 CountRegisters(
F, LUIdx);
3471LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3480 while (!Worklist.
empty()) {
3484 if (!Visited.
insert(S).second)
3491 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3494 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3495 const Value *
V = US->getValue();
3496 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3498 if (
L->contains(Inst))
continue;
3499 }
else if (isa<Constant>(V))
3502 for (
const Use &U :
V->uses()) {
3503 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3515 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3517 cast<PHINode>(UserInst)->getIncomingBlock(
3519 if (!DT.dominates(
L->getHeader(), UseBB))
3532 if (isa<PHINode>(UserInst)) {
3533 const auto *PhiNode = cast<PHINode>(UserInst);
3534 bool HasIncompatibleEHPTerminatedBlock =
false;
3536 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3537 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3538 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3539 HasIncompatibleEHPTerminatedBlock =
true;
3544 if (HasIncompatibleEHPTerminatedBlock) {
3557 if (!isa<SCEVUnknown>(UserS))
3566 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3567 unsigned OtherIdx = !
U.getOperandNo();
3568 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3573 std::pair<size_t, int64_t>
P = getUse(
3574 S, LSRUse::Basic, MemAccessTy());
3575 size_t LUIdx =
P.first;
3577 LSRUse &LU =
Uses[LUIdx];
3578 LSRFixup &LF = LU.getNewFixup();
3579 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3580 LF.OperandValToReplace =
U;
3582 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3583 if (!LU.WidestFixupType ||
3586 LU.WidestFixupType = LF.OperandValToReplace->getType();
3587 InsertSupplementalFormula(US, LU, LUIdx);
3588 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3604 unsigned Depth = 0) {
3611 for (
const SCEV *S :
Add->operands()) {
3617 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3626 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3628 Remainder =
nullptr;
3646 const SCEV *Remainder =
3659 LSRUse &LU,
const SCEV *S,
const Loop *L,
3661 if (LU.Kind != LSRUse::Address ||
3662 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3668 if (!isa<SCEVConstant>(LoopStep))
3674 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3681void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3682 const Formula &
Base,
3685 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3697 if (AddOps.
size() == 1)
3711 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3717 InnerAddOps.append(std::next(J),
3722 if (InnerAddOps.size() == 1 &&
3724 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3733 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3740 F.ScaledReg =
nullptr;
3742 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3743 }
else if (IsScaledReg)
3744 F.ScaledReg = InnerSum;
3746 F.BaseRegs[
Idx] = InnerSum;
3752 SC->getValue()->getZExtValue()))
3754 (
uint64_t)
F.UnfoldedOffset +
SC->getValue()->getZExtValue();
3756 F.BaseRegs.push_back(*J);
3761 if (InsertFormula(LU, LUIdx,
F))
3768 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3774void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3776 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
3781 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3782 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
3784 if (
Base.Scale == 1)
3785 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
3791void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3794 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
3795 (
Base.UnfoldedOffset != 0) <= 1)
3802 Formula NewBase =
Base;
3803 NewBase.BaseRegs.clear();
3804 Type *CombinedIntegerType =
nullptr;
3805 for (
const SCEV *BaseReg :
Base.BaseRegs) {
3808 if (!CombinedIntegerType)
3813 NewBase.BaseRegs.push_back(BaseReg);
3817 if (Ops.
size() == 0)
3822 auto GenerateFormula = [&](
const SCEV *Sum) {
3823 Formula
F = NewBase;
3831 F.BaseRegs.push_back(Sum);
3833 (void)InsertFormula(LU, LUIdx,
F);
3837 if (Ops.
size() > 1) {
3844 if (NewBase.UnfoldedOffset) {
3845 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
3848 NewBase.UnfoldedOffset = 0;
3854void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3855 const Formula &
Base,
size_t Idx,
3859 if (
G->isZero() || !GV)
3863 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
3868 F.BaseRegs[
Idx] =
G;
3869 (void)InsertFormula(LU, LUIdx,
F);
3873void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3876 if (
Base.BaseGV)
return;
3878 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3879 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
3880 if (
Base.Scale == 1)
3881 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
3886void LSRInstance::GenerateConstantOffsetsImpl(
3887 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
3890 auto GenerateOffset = [&](
const SCEV *
G, int64_t
Offset) {
3894 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
3901 F.ScaledReg =
nullptr;
3903 F.deleteBaseReg(
F.BaseRegs[
Idx]);
3905 }
else if (IsScaledReg)
3908 F.BaseRegs[
Idx] = NewG;
3910 (void)InsertFormula(LU, LUIdx,
F);
3925 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
3927 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
3928 const APInt &StepInt = StepRec->getAPInt();
3932 for (int64_t
Offset : Worklist) {
3939 for (int64_t
Offset : Worklist)
3943 if (
G->isZero() || Imm == 0)
3952 F.BaseRegs[
Idx] =
G;
3957 (void)InsertFormula(LU, LUIdx,
F);
3961void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3967 if (LU.MaxOffset != LU.MinOffset)
3970 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
3971 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
3972 if (
Base.Scale == 1)
3973 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
3979void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3981 if (LU.Kind != LSRUse::ICmpZero)
return;
3989 if (LU.MinOffset != LU.MaxOffset)
return;
3992 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
3994 for (
const SCEV *BaseReg :
Base.BaseRegs)
3997 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4000 for (int64_t Factor : Factors) {
4005 if (
Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
4007 int64_t NewBaseOffset = (
uint64_t)
Base.BaseOffset * Factor;
4008 assert(Factor != 0 &&
"Zero factor not expected!");
4009 if (NewBaseOffset / Factor !=
Base.BaseOffset)
4017 int64_t
Offset = LU.MinOffset;
4018 if (
Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
4021 if (
Offset / Factor != LU.MinOffset)
4029 F.BaseOffset = NewBaseOffset;
4041 for (
size_t i = 0, e =
F.BaseRegs.size(); i !=
e; ++i) {
4055 if (
F.UnfoldedOffset != 0) {
4056 if (
F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
4059 F.UnfoldedOffset = (
uint64_t)
F.UnfoldedOffset * Factor;
4060 if (
F.UnfoldedOffset / Factor !=
Base.UnfoldedOffset)
4069 (void)InsertFormula(LU, LUIdx,
F);
4076void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4083 if (
Base.Scale != 0 && !
Base.unscale())
4086 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4089 for (int64_t Factor : Factors) {
4090 Base.Scale = Factor;
4091 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4093 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4097 if (LU.Kind == LSRUse::Basic &&
4098 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4099 LU.AccessTy,
Base) &&
4100 LU.AllFixupsOutsideLoop)
4101 LU.Kind = LSRUse::Special;
4107 if (LU.Kind == LSRUse::ICmpZero &&
4108 !
Base.HasBaseReg &&
Base.BaseOffset == 0 && !
Base.BaseGV)
4111 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4113 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4120 if (!Quotient->isZero()) {
4123 F.ScaledReg = Quotient;
4124 F.deleteBaseReg(
F.BaseRegs[i]);
4128 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4129 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4133 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4135 (void)InsertFormula(LU, LUIdx,
F);
4151 const SCEV *Result =
nullptr;
4152 for (
auto &L :
Loops) {
4156 if (!New || (Result && New != Result))
4161 assert(Result &&
"failed to create expression");
4166void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4168 if (
Base.BaseGV)
return;
4178 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4181 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4185 for (
auto &LF : LU.Fixups)
4186 Loops.push_back(LF.PostIncLoops);
4188 for (
Type *SrcTy : Types) {
4197 const SCEV *NewScaledReg =
4199 if (!NewScaledReg || NewScaledReg->
isZero())
4201 F.ScaledReg = NewScaledReg;
4203 bool HasZeroBaseReg =
false;
4204 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4205 const SCEV *NewBaseReg =
4207 if (!NewBaseReg || NewBaseReg->
isZero()) {
4208 HasZeroBaseReg =
true;
4211 BaseReg = NewBaseReg;
4218 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4222 (void)InsertFormula(LU, LUIdx,
F);
4235 const SCEV *OrigReg;
4238 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4248 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4249 <<
" , add offset " <<
Imm;
4259void LSRInstance::GenerateCrossUseConstantOffsets() {
4261 using ImmMapTy = std::map<int64_t, const SCEV *>;
4266 for (
const SCEV *
Use : RegUses) {
4269 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4272 Pair.first->second.insert(std::make_pair(Imm,
Use));
4273 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4281 for (
const SCEV *Reg : Sequence) {
4282 const ImmMapTy &Imms =
Map.find(Reg)->second;
4285 if (Imms.size() == 1)
4288 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4289 for (
const auto &Entry
4291 <<
' ' << Entry.first;
4295 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4297 const SCEV *OrigReg = J->second;
4299 int64_t JImm = J->first;
4300 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4302 if (!isa<SCEVConstant>(OrigReg) &&
4303 UsedByIndicesMap[Reg].
count() == 1) {
4311 int64_t
First = Imms.begin()->first;
4312 int64_t
Last = std::prev(Imms.end())->first;
4319 ImmMapTy::const_iterator OtherImms[] = {
4320 Imms.begin(), std::prev(Imms.end()),
4321 Imms.lower_bound(Avg)};
4322 for (
const auto &M : OtherImms) {
4323 if (M == J || M == JE)
continue;
4329 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4337 UsedByIndicesMap.
clear();
4338 UniqueItems.
clear();
4341 for (
const WorkItem &WI : WorkItems) {
4342 size_t LUIdx = WI.LUIdx;
4343 LSRUse &LU =
Uses[LUIdx];
4344 int64_t
Imm = WI.Imm;
4345 const SCEV *OrigReg = WI.OrigReg;
4352 for (
size_t L = 0, LE = LU.Formulae.size();
L !=
LE; ++
L) {
4353 Formula
F = LU.Formulae[
L];
4360 if (
F.ScaledReg == OrigReg) {
4367 NewF.BaseOffset =
Offset;
4368 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4371 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4376 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
4377 if (
C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
4379 .ule(std::abs(NewF.BaseOffset)))
4383 NewF.canonicalize(*this->L);
4384 (void)InsertFormula(LU, LUIdx, NewF);
4387 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4388 const SCEV *BaseReg =
F.BaseRegs[
N];
4389 if (BaseReg != OrigReg)
4392 NewF.BaseOffset = (
uint64_t)NewF.BaseOffset + Imm;
4394 LU.Kind, LU.AccessTy, NewF)) {
4401 NewF.UnfoldedOffset = (
uint64_t)NewF.UnfoldedOffset + Imm;
4403 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4408 for (
const SCEV *NewReg : NewF.BaseRegs)
4410 if ((
C->getAPInt() + NewF.BaseOffset)
4412 .slt(std::abs(NewF.BaseOffset)) &&
4414 (
unsigned)llvm::countr_zero<uint64_t>(NewF.BaseOffset))
4418 NewF.canonicalize(*this->L);
4419 (void)InsertFormula(LU, LUIdx, NewF);
4430LSRInstance::GenerateAllReuseFormulae() {
4433 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4434 LSRUse &LU =
Uses[LUIdx];
4435 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4436 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4437 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4438 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4440 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4441 LSRUse &LU =
Uses[LUIdx];
4442 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4443 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4444 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4445 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4446 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4447 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4448 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4449 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4451 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4452 LSRUse &LU =
Uses[LUIdx];
4453 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4454 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4457 GenerateCrossUseConstantOffsets();
4460 "After generating reuse formulae:\n";
4461 print_uses(
dbgs()));
4466void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4471 bool ChangedFormulae =
false;
4476 using BestFormulaeTy =
4479 BestFormulaeTy BestFormulae;
4481 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4482 LSRUse &LU =
Uses[LUIdx];
4487 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4488 FIdx != NumForms; ++FIdx) {
4489 Formula &
F = LU.Formulae[FIdx];
4500 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4501 if (CostF.isLoser()) {
4513 for (
const SCEV *Reg :
F.BaseRegs) {
4514 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4518 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4519 Key.push_back(
F.ScaledReg);
4524 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4525 BestFormulae.insert(std::make_pair(Key, FIdx));
4529 Formula &Best = LU.Formulae[
P.first->second];
4531 Cost CostBest(L, SE,
TTI, AMK);
4533 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4534 if (CostF.isLess(CostBest))
4538 " in favor of formula ";
4539 Best.print(
dbgs());
dbgs() <<
'\n');
4542 ChangedFormulae =
true;
4544 LU.DeleteFormula(
F);
4552 LU.RecomputeRegs(LUIdx, RegUses);
4555 BestFormulae.clear();
4560 "After filtering out undesirable candidates:\n";
4568size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4570 for (
const LSRUse &LU :
Uses) {
4571 size_t FSize = LU.Formulae.size();
4586void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4590 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4591 "which use a superset of registers used by other "
4594 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4595 LSRUse &LU =
Uses[LUIdx];
4597 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4598 Formula &
F = LU.Formulae[i];
4603 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4608 NewF.BaseOffset += (
uint64_t)
C->getValue()->getSExtValue();
4609 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4610 (
I -
F.BaseRegs.begin()));
4611 if (LU.HasFormulaWithSameRegs(NewF)) {
4614 LU.DeleteFormula(
F);
4620 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4621 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4625 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4626 (
I -
F.BaseRegs.begin()));
4627 if (LU.HasFormulaWithSameRegs(NewF)) {
4630 LU.DeleteFormula(
F);
4641 LU.RecomputeRegs(LUIdx, RegUses);
4650void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4655 dbgs() <<
"The search space is too complex.\n"
4656 "Narrowing the search space by assuming that uses separated "
4657 "by a constant offset will use the same registers.\n");
4661 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4662 LSRUse &LU =
Uses[LUIdx];
4663 for (
const Formula &
F : LU.Formulae) {
4664 if (
F.BaseOffset == 0 || (
F.Scale != 0 &&
F.Scale != 1))
4667 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4671 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4672 LU.Kind, LU.AccessTy))
4677 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4680 for (LSRFixup &
Fixup : LU.Fixups) {
4681 Fixup.Offset +=
F.BaseOffset;
4682 LUThatHas->pushFixup(
Fixup);
4688 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4689 Formula &
F = LUThatHas->Formulae[i];
4690 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4691 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4693 LUThatHas->DeleteFormula(
F);
4701 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4704 DeleteUse(LU, LUIdx);
4717void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4721 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4722 "undesirable dedicated registers.\n");
4724 FilterOutUndesirableDedicatedRegisters();
4739void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4744 dbgs() <<
"The search space is too complex.\n"
4745 "Narrowing the search space by choosing the best Formula "
4746 "from the Formulae with the same Scale and ScaledReg.\n");
4751 BestFormulaeTy BestFormulae;
4753 bool ChangedFormulae =
false;
4758 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4759 LSRUse &LU =
Uses[LUIdx];
4764 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4769 size_t FARegNum = 0;
4770 for (
const SCEV *Reg : FA.BaseRegs) {
4771 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4772 FARegNum += (NumUses - UsedByIndices.
count() + 1);
4774 size_t FBRegNum = 0;
4775 for (
const SCEV *Reg : FB.BaseRegs) {
4776 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4777 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
4779 if (FARegNum != FBRegNum)
4780 return FARegNum < FBRegNum;
4787 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
4789 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
4790 return CostFA.isLess(CostFB);
4794 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4796 Formula &
F = LU.Formulae[FIdx];
4799 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
4803 Formula &Best = LU.Formulae[
P.first->second];
4804 if (IsBetterThan(
F, Best))
4808 " in favor of formula ";
4809 Best.print(
dbgs());
dbgs() <<
'\n');
4811 ChangedFormulae =
true;
4813 LU.DeleteFormula(
F);
4819 LU.RecomputeRegs(LUIdx, RegUses);
4822 BestFormulae.clear();
4827 "After filtering out undesirable candidates:\n";
4834void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
4841 "Narrowing the search space by choosing the lowest "
4842 "register Formula for PostInc Uses.\n");
4844 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4845 LSRUse &LU =
Uses[LUIdx];
4847 if (LU.Kind != LSRUse::Address)
4853 size_t MinRegs = std::numeric_limits<size_t>::max();
4854 for (
const Formula &
F : LU.Formulae)
4855 MinRegs = std::min(
F.getNumRegs(), MinRegs);
4858 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4860 Formula &
F = LU.Formulae[FIdx];
4861 if (
F.getNumRegs() > MinRegs) {
4864 LU.DeleteFormula(
F);
4871 LU.RecomputeRegs(LUIdx, RegUses);
4922void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4935 DenseMap <const SCEV *, float> RegNumMap;
4936 for (
const SCEV *Reg : RegUses) {
4937 if (UniqRegs.
count(Reg))
4940 for (
const LSRUse &LU :
Uses) {
4941 if (!LU.Regs.count(Reg))
4943 float P = LU.getNotSelectedProbability(Reg);
4949 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
4953 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
4956 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4957 LSRUse &LU =
Uses[LUIdx];
4959 if (LU.Formulae.size() < 2)
4964 float FMinRegNum = LU.Formulae[0].getNumRegs();
4965 float FMinARegNum = LU.Formulae[0].getNumRegs();
4967 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4968 Formula &
F = LU.Formulae[i];
4971 for (
const SCEV *BaseReg :
F.BaseRegs) {
4972 if (UniqRegs.
count(BaseReg))
4974 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4975 if (isa<SCEVAddRecExpr>(BaseReg))
4977 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4979 if (
const SCEV *ScaledReg =
F.ScaledReg) {
4980 if (!UniqRegs.
count(ScaledReg)) {
4982 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4983 if (isa<SCEVAddRecExpr>(ScaledReg))
4985 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4988 if (FMinRegNum > FRegNum ||
4989 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
4990 FMinRegNum = FRegNum;
4991 FMinARegNum = FARegNum;
4996 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
4998 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
4999 while (LU.Formulae.size() != 1) {
5002 LU.Formulae.pop_back();
5004 LU.RecomputeRegs(LUIdx, RegUses);
5005 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5006 Formula &
F = LU.Formulae[0];
5009 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5022 MemAccessTy AccessType) {
5023 if (Best->
getType() != Reg->getType() ||
5024 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5025 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5026 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5028 const auto *Diff = dyn_cast<SCEVConstant>(SE.
getMinusSCEV(Best, Reg));
5033 AccessType.MemTy,
nullptr,
5034 Diff->getAPInt().getSExtValue(),
5035 true, 0, AccessType.AddrSpace) &&
5037 AccessType.MemTy,
nullptr,
5038 -Diff->getAPInt().getSExtValue(),
5039 true, 0, AccessType.AddrSpace);
5045void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5056 const SCEV *Best =
nullptr;
5057 unsigned BestNum = 0;
5058 for (
const SCEV *Reg : RegUses) {
5059 if (Taken.
count(Reg))
5063 BestNum = RegUses.getUsedByIndices(Reg).count();
5065 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5066 if (Count > BestNum) {
5074 if (Count == BestNum) {
5075 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5076 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5078 Uses[LUIdx].AccessTy)) {
5085 assert(Best &&
"Failed to find best LSRUse candidate");
5087 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5088 <<
" will yield profitable reuse.\n");
5093 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5094 LSRUse &LU =
Uses[LUIdx];
5095 if (!LU.Regs.count(Best))
continue;
5098 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5099 Formula &
F = LU.Formulae[i];
5100 if (!
F.referencesReg(Best)) {
5102 LU.DeleteFormula(
F);
5106 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5112 LU.RecomputeRegs(LUIdx, RegUses);
5123void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5124 NarrowSearchSpaceByDetectingSupersets();
5125 NarrowSearchSpaceByCollapsingUnrolledCode();
5126 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5128 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5129 NarrowSearchSpaceByFilterPostInc();
5131 NarrowSearchSpaceByDeletingCostlyFormulas();
5133 NarrowSearchSpaceByPickingWinnerRegs();
5140 const Cost &CurCost,
5153 const LSRUse &LU =
Uses[Workspace.
size()];
5160 for (
const SCEV *S : CurRegs)
5161 if (LU.Regs.count(S))
5165 Cost NewCost(L, SE,
TTI, AMK);
5166 for (
const Formula &
F : LU.Formulae) {
5174 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5175 for (
const SCEV *Reg : ReqRegs) {
5176 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5179 if (NumReqRegsToFind == 0)
5183 if (NumReqRegsToFind != 0) {
5194 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5195 if (NewCost.isLess(SolutionCost)) {
5197 if (Workspace.
size() !=
Uses.size()) {
5198 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5199 NewRegs, VisitedRegs);
5200 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5201 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5204 dbgs() <<
".\nRegs:\n";
5205 for (
const SCEV *S : NewRegs)
dbgs()
5206 <<
"- " << *S <<
"\n";
5209 SolutionCost = NewCost;
5210 Solution = Workspace;
5221 Cost SolutionCost(L, SE,
TTI, AMK);
5222 SolutionCost.Lose();
5223 Cost CurCost(L, SE,
TTI, AMK);
5229 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5230 CurRegs, VisitedRegs);
5231 if (Solution.
empty()) {
5238 "The chosen solution requires ";
5239 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5240 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5245 Solution[i]->print(
dbgs());
5251 if (BaselineCost.isLess(SolutionCost)) {
5253 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
5256 dbgs() <<
"Baseline is more profitable than chosen solution, "
5257 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5260 "solution, dropping LSR solution.\n";);
5275 bool AllDominate =
true;
5279 if (isa<CatchSwitchInst>(Tentative))
5283 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5284 AllDominate =
false;
5290 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5300 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5301 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5304 for (
DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
5305 if (!Rung)
return IP;
5306 Rung = Rung->getIDom();
5307 if (!Rung)
return IP;
5308 IDom = Rung->getBlock();
5311 const Loop *IDomLoop = LI.getLoopFor(IDom);
5312 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5313 if (IDomDepth <= IPLoopDepth &&
5314 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5332 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5334 if (LU.Kind == LSRUse::ICmpZero)
5336 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5338 if (LF.PostIncLoops.count(L)) {
5339 if (LF.isUseFullyOutsideLoop(L))
5340 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5346 for (
const Loop *PIL : LF.PostIncLoops) {
5347 if (PIL == L)
continue;
5352 if (!ExitingBlocks.
empty()) {
5354 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5355 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5360 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5361 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5362 "Insertion point must be a normal instruction");
5369 while (isa<PHINode>(IP)) ++IP;
5372 while (IP->isEHPad()) ++IP;
5375 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5380 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5388Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5391 if (LU.RigidFormula)
5392 return LF.OperandValToReplace;
5396 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5401 Rewriter.setPostInc(LF.PostIncLoops);
5404 Type *OpTy = LF.OperandValToReplace->getType();
5406 Type *Ty =
F.getType();
5420 for (
const SCEV *Reg :
F.BaseRegs) {
5421 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5429 Value *ICmpScaledV =
nullptr;
5431 const SCEV *ScaledS =
F.ScaledReg;
5437 if (LU.Kind == LSRUse::ICmpZero) {
5447 "The only scale supported by ICmpZero uses is -1!");
5448 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5456 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5492 if (LU.Kind == LSRUse::ICmpZero) {
5499 ICmpScaledV = ConstantInt::get(IntTy,
Offset);
5509 int64_t UnfoldedOffset =
F.UnfoldedOffset;
5510 if (UnfoldedOffset != 0) {
5528 if (LU.Kind == LSRUse::ICmpZero) {
5529 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5530 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5532 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5533 "a scale at the same time!");
5534 if (
F.Scale == -1) {
5535 if (ICmpScaledV->
getType() != OpTy) {
5545 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5546 "ICmp does not support folding a global value and "
5547 "a scale at the same time!");
5550 if (
C->getType() != OpTy) {
5554 assert(
C &&
"Cast of ConstantInt should have folded");
5567void LSRInstance::RewriteForPHI(
5568 PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
5580 bool needUpdateFixups =
false;
5591 Loop *PNLoop = LI.getLoopFor(Parent);
5592 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5599 .setMergeIdenticalEdges()
5600 .setKeepOneInputPHIs());
5614 if (
L->contains(BB) && !
L->contains(PN))
5622 needUpdateFixups =
true;
5627 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5628 Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5636 Type *OpTy = LF.OperandValToReplace->getType();
5640 LF.OperandValToReplace->getType(),
"tmp",
5646 if (
auto *
I = dyn_cast<Instruction>(FullV))
5647 if (
L->contains(
I) && !
L->contains(BB))
5651 Pair.first->second = FullV;
5658 if (needUpdateFixups) {
5659 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
5660 for (LSRFixup &
Fixup :
Uses[LUIdx].Fixups)
5664 if (
Fixup.UserInst == PN) {
5667 bool foundInOriginalPHI =
false;
5669 if (val ==
Fixup.OperandValToReplace) {
5670 foundInOriginalPHI =
true;
5675 if (foundInOriginalPHI)
5686 if (val ==
Fixup.OperandValToReplace)
5687 Fixup.UserInst = NewPN;
5699void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5704 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5705 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
5707 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
5710 Type *OpTy = LF.OperandValToReplace->getType();
5711 if (FullV->
getType() != OpTy) {
5714 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
5723 if (LU.Kind == LSRUse::ICmpZero)
5726 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5729 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
5739 if (LU.Kind != LSRUse::Address)
5746 if (IVIncInsertPos->
getParent() == LHeader)
5749 if (!
Fixup.OperandValToReplace ||
5751 Instruction *UI = cast<Instruction>(U);
5752 return UI->getParent() != LHeader;
5757 Type *Ty =
I->getType();
5765void LSRInstance::ImplementSolution(
5772 for (
const IVChain &Chain : IVChainVec) {
5773 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
5778 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
5779 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
5782 ?
L->getHeader()->getTerminator()
5784 Rewriter.setIVIncInsertPos(L, InsertPos);
5785 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
5789 for (
const IVChain &Chain : IVChainVec) {
5790 GenerateIVChain(Chain, DeadInsts);
5796 ScalarEvolutionIVs.push_back(
IV);
5812 for (
PHINode &PN :
L->getHeader()->phis()) {
5814 Value *Start =
nullptr, *Step =
nullptr;
5819 case Instruction::Sub:
5824 case Instruction::Add:
5830 if (!isa<Constant>(Step))
5841 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
5854 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
5857 :
TTI.getPreferredAddressingMode(
L, &SE)),
5858 Rewriter(SE,
L->getHeader()->getModule()->getDataLayout(),
"lsr",
false),
5859 BaselineCost(
L, SE,
TTI, AMK) {
5861 if (!
L->isLoopSimplifyForm())
5865 if (IU.
empty())
return;
5869 unsigned NumUsers = 0;
5873 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
5880 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
5882 if (isa<FuncletPadInst>(FirstNonPHI) ||
5883 isa<CatchSwitchInst>(FirstNonPHI))
5885 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
5891 L->getHeader()->printAsOperand(
dbgs(),
false);
5904 OptimizeLoopTermCond();
5907 if (IU.empty())
return;
5910 if (!
L->isInnermost()) {
5923 CollectInterestingTypesAndFactors();
5924 CollectFixupsAndInitialFormulae();
5925 CollectLoopInvariantFixupsAndFormulae();
5931 print_uses(
dbgs()));
5935 GenerateAllReuseFormulae();
5937 FilterOutUndesirableDedicatedRegisters();
5938 NarrowSearchSpaceUsingHeuristics();
5948 if (Solution.
empty())
5953 for (
const LSRUse &LU :
Uses) {
5954 for (
const Formula &
F : LU.Formulae)
5956 F) &&
"Illegal formula generated!");
5961 ImplementSolution(Solution);
5964#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5965void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
5966 if (Factors.empty() &&
Types.empty())
return;
5968 OS <<
"LSR has identified the following interesting factors and types: ";
5971 for (int64_t Factor : Factors) {
5974 OS <<
'*' << Factor;
5977 for (
Type *Ty : Types) {
5980 OS <<
'(' << *Ty <<
')';
5986 OS <<
"LSR is examining the following fixup sites:\n";
5987 for (
const LSRUse &LU :
Uses)
5988 for (
const LSRFixup &LF : LU.Fixups) {
5996 OS <<
"LSR is examining the following uses:\n";
5997 for (
const LSRUse &LU :
Uses) {
6001 for (
const Formula &
F : LU.Formulae) {
6010 print_factors_and_types(
OS);
6022class LoopStrengthReduce :
public LoopPass {
6026 LoopStrengthReduce();
6035LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6039void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6071 return {Begin,
End};
6074struct SCEVDbgValueBuilder {
6075 SCEVDbgValueBuilder() =
default;
6076 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6078 void clone(
const SCEVDbgValueBuilder &
Base) {
6079 LocationOps =
Base.LocationOps;
6084 LocationOps.clear();
6101 unsigned ArgIndex = 0;
6102 if (It != LocationOps.
end()) {
6103 ArgIndex = std::distance(LocationOps.
begin(), It);
6105 ArgIndex = LocationOps.
size();
6117 if (
C->getAPInt().getSignificantBits() > 64)
6119 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6120 Expr.
push_back(
C->getAPInt().getSExtValue());
6127 return ToDwarfOpIter(Expr);
6134 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6135 "Expected arithmetic SCEV type");
6137 unsigned EmitOperator = 0;
6138 for (
const auto &
Op : CommExpr->
operands()) {
6141 if (EmitOperator >= 1)
6142 pushOperator(DwarfOp);
6153 bool Success = pushSCEV(Inner);
6155 IsSigned ? llvm::dwarf::DW_ATE_signed
6156 : llvm::dwarf::DW_ATE_unsigned};
6157 for (
const auto &
Op : CastOps)
6165 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6166 Success &= pushConst(StartInt);
6168 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6171 pushLocation(
U->getValue());
6173 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6174 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6176 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6177 Success &= pushSCEV(UDiv->getLHS());
6178 Success &= pushSCEV(UDiv->getRHS());
6179 pushOperator(llvm::dwarf::DW_OP_div);
6181 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6183 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6184 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6185 "Unexpected cast type in SCEV.");
6186 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6188 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6189 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6191 }
else if (isa<SCEVAddRecExpr>(S)) {
6206 if (
C->getAPInt().getSignificantBits() > 64)
6208 int64_t
I =
C->getAPInt().getSExtValue();
6210 case llvm::dwarf::DW_OP_plus:
6211 case llvm::dwarf::DW_OP_minus:
6213 case llvm::dwarf::DW_OP_mul:
6214 case llvm::dwarf::DW_OP_div:
6230 if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6237 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6238 if (!pushSCEV(Stride))
6240 pushOperator(llvm::dwarf::DW_OP_mul);
6242 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6243 if (!pushSCEV(Start))
6245 pushOperator(llvm::dwarf::DW_OP_plus);
6251 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6252 pushLocation(OffsetValue);
6255 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6256 << std::to_string(
Offset) <<
"\n");
6262 bool createIterCountExpr(
const SCEV *S,
6263 const SCEVDbgValueBuilder &IterationCount,
6270 if (!isa<SCEVAddRecExpr>(S))
6273 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6276 const auto *Rec = cast<SCEVAddRecExpr>(S);
6277 if (!Rec->isAffine())
6285 clone(IterationCount);
6286 if (!SCEVToValueExpr(*Rec, SE))
6300 if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6301 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6309 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6310 if (!pushSCEV(Start))
6312 pushOperator(llvm::dwarf::DW_OP_minus);
6314 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6315 if (!pushSCEV(Stride))
6317 pushOperator(llvm::dwarf::DW_OP_div);
6328 "Expected the locations vector to contain the IV");
6333 "Expected the location ops to contain the IV.");
6337 for (
const auto &
Op : LocationOps) {
6338 auto It =
find(DestLocations,
Op);
6339 if (It != DestLocations.
end()) {
6341 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6349 for (
const auto &
Op : expr_ops()) {
6351 Op.appendToVector(DestExpr);
6358 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6366struct DVIRecoveryRec {
6369 HadLocationArgList(
false) {}
6371 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6375 bool HadLocationArgList;
6381 for (
auto &RE : RecoveryExprs)
6383 RecoveryExprs.clear();
6386 ~DVIRecoveryRec() {
clear(); }
6394 auto expr_ops = ToDwarfOpIter(Expr);
6396 for (
auto Op : expr_ops)
6405template <
typename T>
6409 "contain any DW_OP_llvm_arg operands.");
6411 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6412 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6416template <
typename T>
6421 "Expected expression that references DIArglist locations using "
6422 "DW_OP_llvm_arg operands.");
6424 for (
Value *V : Locations)
6428 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6439 auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6441 if (NumLLVMArgs == 0) {
6448 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6460 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6463 DbgVal->setExpression(SalvageExpr);
6466 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6467 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6469 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6483 auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6484 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6485 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6486 assert(DVIRec.Expr &&
"Expected an expression");
6487 DbgVal->setExpression(DVIRec.Expr);
6491 if (!DVIRec.HadLocationArgList) {
6492 assert(DVIRec.LocationOps.size() == 1 &&
6493 "Unexpected number of location ops.");
6497 Value *CachedValue =
6502 for (
WeakVH VH : DVIRec.LocationOps) {
6507 DbgVal->setRawLocation(
6510 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6512 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6513 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6515 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6520 const SCEV *SCEVInductionVar,
6521 SCEVDbgValueBuilder IterCountExpr) {
6523 if (isa<DbgValueInst *>(DVIRec.DbgRef)
6524 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6525 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6537 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6539 NewLocationOps.
push_back(LSRInductionVar);
6541 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6542 WeakVH VH = DVIRec.LocationOps[i];
6546 if (VH && !isa<UndefValue>(VH)) {
6548 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6550 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6558 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6559 <<
" refers to a location that is now undef or erased. "
6560 "Salvage abandoned.\n");
6564 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6565 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6567 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6568 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6572 if (std::optional<APInt>
Offset =
6574 if (
Offset->getSignificantBits() <= 64)
6575 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6576 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6584 if (DVIRec.Expr->getNumElements() == 0) {
6585 assert(DVIRec.RecoveryExprs.size() == 1 &&
6586 "Expected only a single recovery expression for an empty "
6588 assert(DVIRec.RecoveryExprs[0] &&
6589 "Expected a SCEVDbgSalvageBuilder for location 0");
6590 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6591 B->appendToVectors(
NewExpr, NewLocationOps);
6593 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6601 SCEVDbgValueBuilder *DbgBuilder =
6602 DVIRec.RecoveryExprs[LocationArgIndex].get();
6608 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6609 "Expected a positive index for the location-op position.");
6610 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6614 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6618 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6620 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6623 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6631 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6632 if (DVIToUpdate.empty())
6636 assert(SCEVInductionVar &&
6637 "Anticipated a SCEV for the post-LSR induction variable");
6640 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6641 if (!IVAddRec->isAffine())
6649 SCEVDbgValueBuilder IterCountExpr;
6650 IterCountExpr.pushLocation(LSRInductionVar);
6651 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6654 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6657 for (
auto &DVIRec : DVIToUpdate) {
6658 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6669 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6671 for (
const auto &
B : L->getBlocks()) {
6672 for (
auto &
I : *
B) {
6673 auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6676 if (DbgVal->isKillLocation())
6681 const auto &HasTranslatableLocationOps =
6682 [&](
const auto *DbgValToTranslate) ->
bool {
6683 for (
const auto LocOp : DbgValToTranslate->location_ops()) {
6697 if (!HasTranslatableLocationOps(DbgVal))
6700 std::unique_ptr<DVIRecoveryRec> NewRec =
6701 std::make_unique<DVIRecoveryRec>(DbgVal);
6705 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
6706 for (
const auto LocOp : DbgVal->location_ops()) {
6707 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6708 NewRec->LocationOps.push_back(LocOp);
6709 NewRec->HadLocationArgList = DbgVal->hasArgList();
6711 SalvageableDVISCEVs.push_back(std::move(NewRec));
6715 if (DVR.isDbgValue() || DVR.isDbgAssign())
6716 ProcessDbgValue(&DVR);
6718 auto DVI = dyn_cast<DbgValueInst>(&
I);
6721 if (ProcessDbgValue(DVI))
6722 DVIHandles.insert(DVI);
6731 const LSRInstance &LSR) {
6733 auto IsSuitableIV = [&](
PHINode *
P) {
6744 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
6751 if (IsSuitableIV(
P))
6755 for (
PHINode &
P : L.getHeader()->phis()) {
6756 if (IsSuitableIV(&
P))
6762static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
6765 if (!L->isInnermost()) {
6767 return std::nullopt;
6770 if (!L->isLoopSimplifyForm()) {
6772 return std::nullopt;
6776 LLVM_DEBUG(
dbgs() <<
"Cannot fold on backedge that is loop variant\n");
6777 return std::nullopt;
6783 return std::nullopt;
6784 auto *TermCond = dyn_cast<ICmpInst>(BI->
getCondition());
6787 dbgs() <<
"Cannot fold on branching condition that is not an ICmpInst");
6788 return std::nullopt;
6790 if (!TermCond->hasOneUse()) {
6793 <<
"Cannot replace terminating condition with more than one use\n");
6794 return std::nullopt;
6798 Value *
RHS = TermCond->getOperand(1);
6799 if (!
LHS || !L->isLoopInvariant(
RHS))
6802 return std::nullopt;
6806 Value *ToFoldStart, *ToFoldStep;
6808 return std::nullopt;
6811 if (ToFold->
getParent() != L->getHeader())
6812 return std::nullopt;
6817 return std::nullopt;
6821 const unsigned ExpansionBudget = [&]() {
6824 return std::min(Budget, SmallTC);
6826 return std::min(Budget, *SmallTC);
6832 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
6835 PHINode *ToHelpFold =
nullptr;
6836 const SCEV *TermValueS =
nullptr;
6837 bool MustDropPoison =
false;
6838 auto InsertPt = L->getLoopPreheader()->getTerminator();
6839 for (
PHINode &PN : L->getHeader()->phis()) {
6845 <<
"' is not SCEV-able, not qualified for the "
6846 "terminating condition folding.\n");
6851 if (!AddRec || !AddRec->
isAffine()) {
6853 <<
"' is not an affine add recursion, not qualified "
6854 "for the terminating condition folding.\n");
6872 const SCEV *TermValueSLocal =
PostInc->evaluateAtIteration(BECount, SE);
6875 dbgs() <<
"Is not safe to expand terminating value for phi node" << PN
6883 dbgs() <<
"Is too expensive to expand terminating value for phi node"
6900 bool MustDropPoisonLocal =
false;
6922 TermValueS = TermValueSLocal;
6923 MustDropPoison = MustDropPoisonLocal;
6927 <<
"Cannot find other AddRec IV to help folding\n";);
6930 <<
"\nFound loop that can fold terminating condition\n"
6932 <<
" TermCond: " << *TermCond <<
"\n"
6933 <<
" BrandInst: " << *BI <<
"\n"
6934 <<
" ToFold: " << *ToFold <<
"\n"
6935 <<
" ToHelpFold: " << *ToHelpFold <<
"\n");
6937 if (!ToFold || !ToHelpFold)
6938 return std::nullopt;
6939 return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
6954 bool Changed =
false;
6955 std::unique_ptr<MemorySSAUpdater> MSSAU;
6957 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
6960 const LSRInstance &Reducer =
6961 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
6962 Changed |= Reducer.getChanged();
6968 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
6973 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
6987 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
6989 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
7002 const bool EnableFormTerm = [&] {
7014 if (EnableFormTerm) {
7016 auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7021 BasicBlock *LoopPreheader = L->getLoopPreheader();
7027 <<
"New term-cond phi-node:\n"
7028 << *ToHelpFold <<
"\n");
7030 Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
7032 Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
7036 cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
7039 const DataLayout &
DL = L->getHeader()->getModule()->getDataLayout();
7043 "Terminating value was checked safe in canFoldTerminatingCondition");
7050 << *StartValue <<
"\n"
7051 <<
"Terminating value of new term-cond phi-node:\n"
7052 << *TermValue <<
"\n");
7058 Value *NewTermCond =
7060 "lsr_fold_term_cond.replaced_term_cond");
7066 << *OldTermCond <<
"\n"
7067 <<
"New term-cond:\n" << *NewTermCond <<
"\n");
7077 if (SalvageableDVIRecords.
empty())
7083 for (
const auto &L : LI) {
7087 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7088 "could not be identified.\n");
7092 for (
auto &Rec : SalvageableDVIRecords)
7094 SalvageableDVIRecords.
clear();
7103 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7104 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7105 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7106 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7107 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7108 *
L->getHeader()->getParent());
7109 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7110 *
L->getHeader()->getParent());
7111 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7112 *
L->getHeader()->getParent());
7113 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7116 MSSA = &MSSAAnalysis->getMSSA();
7133char LoopStrengthReduce::ID = 0;
7136 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
iv Induction Variable Users
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static std::optional< std::tuple< PHINode *, PHINode *, const SCEV *, bool > > canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, const LoopInfo &LI, const TargetTransformInfo &TTI)
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static cl::opt< cl::boolOrDefault > AllowTerminatingConditionFoldingAfterLSR("lsr-term-fold", cl::Hidden, cl::desc("Attempt to replace primary IV with other IV."))
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static cl::opt< bool > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::init(false), cl::desc("Attempt to drop solution if it is less profitable"))
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg)
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
PowerPC TLS Dynamic Call Fixup
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This defines the Use class.
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool hasPoisonGeneratingFlags() const LLVM_READONLY
Return true if this operator has flags which may cause this instruction to evaluate to poison despite...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSelfWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.