84#include "llvm/Config/llvm-config.h"
132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
174 "Don't prefer any addressing mode"),
177 "Prefer pre-indexed addressing mode"),
180 "Prefer post-indexed addressing mode")));
184 cl::init(std::numeric_limits<uint16_t>::max()),
185 cl::desc(
"LSR search space complexity limit"));
189 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
193 cl::desc(
"Attempt to drop solution if it is less profitable"));
197 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
201 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
207 cl::desc(
"Stress test LSR IV chains"));
216 static const unsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
219 Type *MemTy =
nullptr;
220 unsigned AddrSpace = UnknownAddressSpace;
222 MemAccessTy() =
default;
223 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
226 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
232 unsigned AS = UnknownAddressSpace) {
253 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
254 : FixedOrScalableQuantity(MinVal, Scalable) {}
256 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
257 : FixedOrScalableQuantity(
V) {}
260 constexpr Immediate() =
delete;
262 static constexpr Immediate getFixed(ScalarTy MinVal) {
263 return {MinVal,
false};
265 static constexpr Immediate getScalable(ScalarTy MinVal) {
266 return {MinVal,
true};
268 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
269 return {MinVal, Scalable};
271 static constexpr Immediate getZero() {
return {0,
false}; }
272 static constexpr Immediate getFixedMin() {
273 return {std::numeric_limits<int64_t>::min(),
false};
275 static constexpr Immediate getFixedMax() {
276 return {std::numeric_limits<int64_t>::max(),
false};
278 static constexpr Immediate getScalableMin() {
279 return {std::numeric_limits<int64_t>::min(),
true};
281 static constexpr Immediate getScalableMax() {
282 return {std::numeric_limits<int64_t>::max(),
true};
285 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
287 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
289 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
290 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
293 constexpr bool isMin()
const {
294 return Quantity == std::numeric_limits<ScalarTy>::min();
297 constexpr bool isMax()
const {
298 return Quantity == std::numeric_limits<ScalarTy>::max();
302 constexpr Immediate addUnsigned(
const Immediate &RHS)
const {
303 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
305 return {
Value, Scalable ||
RHS.isScalable()};
308 constexpr Immediate subUnsigned(
const Immediate &RHS)
const {
309 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
311 return {
Value, Scalable ||
RHS.isScalable()};
315 constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const {
317 return {
Value, Scalable};
347struct KeyOrderTargetImmediate {
348 bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const {
349 if (
LHS.isScalable() && !
RHS.isScalable())
351 if (!
LHS.isScalable() &&
RHS.isScalable())
353 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
360struct KeyOrderSizeTAndImmediate {
361 bool operator()(
const std::pair<size_t, Immediate> &LHS,
362 const std::pair<size_t, Immediate> &RHS)
const {
363 size_t LSize =
LHS.first;
364 size_t RSize =
RHS.first;
366 return LSize < RSize;
367 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
374 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
388 RegUsesTy RegUsesMap;
392 void countRegister(
const SCEV *Reg,
size_t LUIdx);
393 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
394 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
396 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
414RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
415 std::pair<RegUsesTy::iterator, bool> Pair =
416 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
421 RSD.UsedByIndices.set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
429 assert(RSD.UsedByIndices.size() > LUIdx);
430 RSD.UsedByIndices.reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
451 if (
I == RegUsesMap.end())
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
461 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 size_t getNumRegs()
const;
528 void deleteBaseReg(
const SCEV *&S);
530 bool referencesReg(
const SCEV *S)
const;
531 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
532 const RegUseTracker &RegUses)
const;
553 for (
const SCEV *S :
Add->operands())
560 if (!AR->getStart()->isZero() && AR->isAffine()) {
563 AR->getStepRecurrence(SE),
579 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
581 for (
const SCEV *S : MyGood)
583 for (
const SCEV *S : MyBad)
602 BaseRegs.push_back(Sum);
608 BaseRegs.push_back(Sum);
616 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
623bool Formula::isCanonical(
const Loop &L)
const {
624 assert((Scale == 0 || ScaledReg) &&
625 "ScaledReg must be non-null if Scale is non-zero");
628 return BaseRegs.size() <= 1;
633 if (Scale == 1 && BaseRegs.empty())
653void Formula::canonicalize(
const Loop &L) {
657 if (BaseRegs.empty()) {
659 assert(ScaledReg &&
"Expected 1*reg => reg");
660 assert(Scale == 1 &&
"Expected 1*reg => reg");
661 BaseRegs.push_back(ScaledReg);
669 ScaledReg = BaseRegs.pop_back_val();
680 if (
I != BaseRegs.end())
690bool Formula::unscale() {
694 BaseRegs.push_back(ScaledReg);
699bool Formula::hasZeroEnd()
const {
700 if (UnfoldedOffset || BaseOffset)
702 if (BaseRegs.size() != 1 || ScaledReg)
709size_t Formula::getNumRegs()
const {
710 return !!ScaledReg + BaseRegs.size();
715Type *Formula::getType()
const {
716 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
717 ScaledReg ? ScaledReg->getType() :
718 BaseGV ? BaseGV->getType() :
723void Formula::deleteBaseReg(
const SCEV *&S) {
724 if (&S != &BaseRegs.back())
730bool Formula::referencesReg(
const SCEV *S)
const {
736bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
737 const RegUseTracker &RegUses)
const {
739 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
741 for (
const SCEV *BaseReg : BaseRegs)
742 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
752 BaseGV->printAsOperand(
OS,
false);
754 if (BaseOffset.isNonZero()) {
758 for (
const SCEV *BaseReg : BaseRegs) {
760 OS <<
"reg(" << *BaseReg <<
')';
762 if (HasBaseReg && BaseRegs.empty()) {
764 OS <<
"**error: HasBaseReg**";
765 }
else if (!HasBaseReg && !BaseRegs.empty()) {
767 OS <<
"**error: !HasBaseReg**";
771 OS << Scale <<
"*reg(";
778 if (UnfoldedOffset.isNonZero()) {
780 OS <<
"imm(" << UnfoldedOffset <<
')';
821 bool IgnoreSignificantBits =
false) {
832 if (
RA.isAllOnes()) {
846 const APInt &LA =
C->getAPInt();
855 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
857 IgnoreSignificantBits);
858 if (!Step)
return nullptr;
860 IgnoreSignificantBits);
861 if (!Start)
return nullptr;
874 for (
const SCEV *S :
Add->operands()) {
876 if (!
Op)
return nullptr;
892 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
907 IgnoreSignificantBits)) {
926 if (
C->getAPInt().getSignificantBits() <= 64) {
928 return Immediate::getFixed(
C->getValue()->getSExtValue());
933 if (Result.isNonZero())
936 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
939 if (Result.isNonZero())
944 }
else if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
946 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(M->getOperand(0)))
947 if (isa<SCEVVScale>(M->getOperand(1))) {
949 return Immediate::getScalable(
C->getValue()->getSExtValue());
953 return Immediate::getZero();
959 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
960 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
970 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
986 bool isAddress = isa<LoadInst>(Inst);
987 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
988 if (SI->getPointerOperand() == OperandVal)
993 switch (
II->getIntrinsicID()) {
994 case Intrinsic::memset:
995 case Intrinsic::prefetch:
996 case Intrinsic::masked_load:
997 if (
II->getArgOperand(0) == OperandVal)
1000 case Intrinsic::masked_store:
1001 if (
II->getArgOperand(1) == OperandVal)
1004 case Intrinsic::memmove:
1005 case Intrinsic::memcpy:
1006 if (
II->getArgOperand(0) == OperandVal ||
1007 II->getArgOperand(1) == OperandVal)
1013 if (IntrInfo.
PtrVal == OperandVal)
1018 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1019 if (RMW->getPointerOperand() == OperandVal)
1022 if (CmpX->getPointerOperand() == OperandVal)
1031 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1035 AccessTy.MemTy = Ty;
1038 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1039 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1040 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1041 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1042 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1043 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1045 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1047 switch (
II->getIntrinsicID()) {
1048 case Intrinsic::prefetch:
1049 case Intrinsic::memset:
1050 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1051 AccessTy.MemTy = OperandVal->
getType();
1053 case Intrinsic::memmove:
1054 case Intrinsic::memcpy:
1056 AccessTy.MemTy = OperandVal->
getType();
1058 case Intrinsic::masked_load:
1059 AccessTy.AddrSpace =
1060 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1062 case Intrinsic::masked_store:
1063 AccessTy.AddrSpace =
1064 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1124 if (!Processed.
insert(S).second)
1128 for (
const SCEV *S :
Add->operands()) {
1144 Value *UVal = U->getValue();
1148 if (UI && UI->
getOpcode() == Instruction::Mul &&
1182 const LSRUse &LU,
const Formula &
F);
1186 const LSRUse &LU,
const Formula &
F,
1193 const Loop *
L =
nullptr;
1203 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1221 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1222 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1223 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1224 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1230 return C.NumRegs == ~0
u;
1233 void RateFormula(
const Formula &
F,
1243 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1245 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1258 Value *OperandValToReplace =
nullptr;
1268 Immediate
Offset = Immediate::getZero();
1270 LSRFixup() =
default;
1272 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1280struct UniquifierDenseMapInfo {
1283 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1289 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1325 MemAccessTy AccessTy;
1331 Immediate MinOffset = Immediate::getFixedMax();
1332 Immediate MaxOffset = Immediate::getFixedMin();
1336 bool AllFixupsOutsideLoop =
true;
1343 bool RigidFormula =
false;
1349 Type *WidestFixupType =
nullptr;
1359 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1361 LSRFixup &getNewFixup() {
1362 Fixups.push_back(LSRFixup());
1366 void pushFixup(LSRFixup &f) {
1368 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1369 MaxOffset =
f.Offset;
1370 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1371 MinOffset =
f.Offset;
1374 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1375 float getNotSelectedProbability(
const SCEV *Reg)
const;
1376 bool InsertFormula(
const Formula &
F,
const Loop &L);
1377 void DeleteFormula(Formula &
F);
1378 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1387 LSRUse::KindType Kind, MemAccessTy AccessTy,
1389 bool HasBaseReg, int64_t Scale,
1393 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1397 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1399 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1401 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1403 [&](
unsigned i,
const SCEV *Reg) {
1404 return i + getSetupCost(Reg, Depth - 1);
1406 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1413void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1419 if (AR->getLoop() != L) {
1426 if (!AR->getLoop()->contains(L)) {
1436 unsigned LoopCost = 1;
1443 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1444 if (Step->getAPInt() ==
F.BaseOffset.getFixedValue())
1447 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1448 if (isa<SCEVConstant>(LoopStep)) {
1449 const SCEV *LoopStart = AR->getStart();
1450 if (!isa<SCEVConstant>(LoopStart) &&
1456 C.AddRecCost += LoopCost;
1460 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1461 if (!Regs.
count(AR->getOperand(1))) {
1462 RateRegister(
F, AR->getOperand(1), Regs);
1474 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1476 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1483void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1486 if (LoserRegs && LoserRegs->
count(Reg)) {
1490 if (Regs.
insert(Reg).second) {
1491 RateRegister(
F, Reg, Regs);
1492 if (LoserRegs && isLoser())
1497void Cost::RateFormula(
const Formula &
F,
1504 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1506 unsigned PrevAddRecCost =
C.AddRecCost;
1507 unsigned PrevNumRegs =
C.NumRegs;
1508 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1509 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1510 if (VisitedRegs.
count(ScaledReg)) {
1514 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1518 for (
const SCEV *BaseReg :
F.BaseRegs) {
1519 if (VisitedRegs.
count(BaseReg)) {
1523 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1529 size_t NumBaseParts =
F.getNumRegs();
1530 if (NumBaseParts > 1)
1535 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1541 for (
const LSRFixup &
Fixup : LU.Fixups) {
1542 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1543 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1547 else if (
Offset.isNonZero())
1553 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1574 if (
C.NumRegs > TTIRegNum) {
1577 if (PrevNumRegs > TTIRegNum)
1578 C.Insns += (
C.NumRegs - PrevNumRegs);
1580 C.Insns += (
C.NumRegs - TTIRegNum);
1593 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1597 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1600 if (LU.Kind != LSRUse::ICmpZero)
1601 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1607 C.Insns = std::numeric_limits<unsigned>::max();
1608 C.NumRegs = std::numeric_limits<unsigned>::max();
1609 C.AddRecCost = std::numeric_limits<unsigned>::max();
1610 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1611 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1612 C.ImmCost = std::numeric_limits<unsigned>::max();
1613 C.SetupCost = std::numeric_limits<unsigned>::max();
1614 C.ScaleCost = std::numeric_limits<unsigned>::max();
1618bool Cost::isLess(
const Cost &
Other)
const {
1620 C.Insns !=
Other.C.Insns)
1621 return C.Insns <
Other.C.Insns;
1625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1628 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1629 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1630 if (
C.AddRecCost != 0)
1631 OS <<
", with addrec cost " <<
C.AddRecCost;
1632 if (
C.NumIVMuls != 0)
1633 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1634 << (
C.NumIVMuls == 1 ?
"" :
"s");
1635 if (
C.NumBaseAdds != 0)
1636 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1637 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1638 if (
C.ScaleCost != 0)
1639 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1641 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1642 if (
C.SetupCost != 0)
1643 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1652bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1654 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1655 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1656 if (PN->getIncomingValue(i) == OperandValToReplace &&
1657 L->contains(PN->getIncomingBlock(i)))
1662 return !
L->contains(UserInst);
1665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1669 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1671 Store->getOperand(0)->printAsOperand(
OS,
false);
1672 }
else if (UserInst->getType()->isVoidTy())
1673 OS << UserInst->getOpcodeName();
1675 UserInst->printAsOperand(
OS,
false);
1677 OS <<
", OperandValToReplace=";
1678 OperandValToReplace->printAsOperand(
OS,
false);
1680 for (
const Loop *PIL : PostIncLoops) {
1681 OS <<
", PostIncLoop=";
1682 PIL->getHeader()->printAsOperand(
OS,
false);
1696bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1698 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1701 return Uniquifier.
count(Key);
1705float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1707 for (
const Formula &
F : Formulae)
1708 if (
F.referencesReg(Reg))
1710 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1715bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1716 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1718 if (!Formulae.empty() && RigidFormula)
1722 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1726 if (!Uniquifier.
insert(Key).second)
1730 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1731 "Zero allocated in a scaled register!");
1733 for (
const SCEV *BaseReg :
F.BaseRegs)
1734 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1738 Formulae.push_back(
F);
1741 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1749void LSRUse::DeleteFormula(Formula &
F) {
1750 if (&
F != &Formulae.back())
1752 Formulae.pop_back();
1756void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1760 for (
const Formula &
F : Formulae) {
1761 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1762 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1766 for (
const SCEV *S : OldRegs)
1768 RegUses.dropRegister(S, LUIdx);
1771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1773 OS <<
"LSR Use: Kind=";
1775 case Basic:
OS <<
"Basic";
break;
1776 case Special:
OS <<
"Special";
break;
1777 case ICmpZero:
OS <<
"ICmpZero";
break;
1779 OS <<
"Address of ";
1780 if (AccessTy.MemTy->isPointerTy())
1783 OS << *AccessTy.MemTy;
1786 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1789 OS <<
", Offsets={";
1790 bool NeedComma =
false;
1791 for (
const LSRFixup &
Fixup : Fixups) {
1792 if (NeedComma)
OS <<
',';
1798 if (AllFixupsOutsideLoop)
1799 OS <<
", all-fixups-outside-loop";
1801 if (WidestFixupType)
1802 OS <<
", widest fixup type: " << *WidestFixupType;
1811 LSRUse::KindType Kind, MemAccessTy AccessTy,
1813 bool HasBaseReg, int64_t Scale,
1816 case LSRUse::Address: {
1817 int64_t FixedOffset =
1818 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1819 int64_t ScalableOffset =
1820 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1822 HasBaseReg, Scale, AccessTy.AddrSpace,
1823 Fixup, ScalableOffset);
1825 case LSRUse::ICmpZero:
1832 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1837 if (Scale != 0 && Scale != -1)
1842 if (BaseOffset.isNonZero()) {
1845 if (BaseOffset.isScalable())
1855 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1864 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1866 case LSRUse::Special:
1868 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1875 Immediate MinOffset, Immediate MaxOffset,
1876 LSRUse::KindType Kind, MemAccessTy AccessTy,
1878 bool HasBaseReg, int64_t Scale) {
1879 if (BaseOffset.isNonZero() &&
1880 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1881 BaseOffset.isScalable() != MaxOffset.isScalable()))
1884 int64_t
Base = BaseOffset.getKnownMinValue();
1885 int64_t Min = MinOffset.getKnownMinValue();
1886 int64_t Max = MaxOffset.getKnownMinValue();
1889 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1892 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1895 HasBaseReg, Scale) &&
1901 Immediate MinOffset, Immediate MaxOffset,
1902 LSRUse::KindType Kind, MemAccessTy AccessTy,
1903 const Formula &
F,
const Loop &L) {
1911 assert((
F.isCanonical(L) ||
F.Scale != 0));
1913 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1918 Immediate MaxOffset, LSRUse::KindType Kind,
1920 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1923 BaseOffset, HasBaseReg, Scale) ||
1928 BaseGV, BaseOffset,
true, 0));
1932 Immediate MaxOffset, LSRUse::KindType Kind,
1933 MemAccessTy AccessTy,
const Formula &
F) {
1934 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1935 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1947 const LSRUse &LU,
const Formula &
F) {
1950 for (
const LSRFixup &
Fixup : LU.Fixups)
1952 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1953 F.Scale,
Fixup.UserInst))
1959 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1964 const LSRUse &LU,
const Formula &
F,
1973 return F.Scale != 1;
1976 case LSRUse::Address: {
1978 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1979 if (
F.BaseOffset.isScalable()) {
1980 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1981 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1983 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1984 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1988 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1991 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1994 "Legal addressing mode has an illegal cost!");
1995 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1997 case LSRUse::ICmpZero:
1999 case LSRUse::Special:
2009 LSRUse::KindType Kind, MemAccessTy AccessTy,
2013 if (BaseOffset.isZero() && !BaseGV)
2018 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2022 if (!HasBaseReg && Scale == 1) {
2032 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2042 Immediate MaxOffset, LSRUse::KindType Kind,
2043 MemAccessTy AccessTy,
const SCEV *S,
2046 if (S->
isZero())
return true;
2054 if (!S->
isZero())
return false;
2057 if (BaseOffset.isZero() && !BaseGV)
2060 if (BaseOffset.isScalable())
2065 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2068 BaseOffset, HasBaseReg, Scale);
2085 const SCEV *IncExpr;
2088 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2095 const SCEV *ExprBase =
nullptr;
2097 IVChain() =
default;
2098 IVChain(
const IVInc &Head,
const SCEV *
Base)
2099 : Incs(1, Head), ExprBase(
Base) {}
2106 return std::next(Incs.
begin());
2113 bool hasIncs()
const {
return Incs.
size() >= 2; }
2122 bool isProfitableIncrement(
const SCEV *OperExpr,
2123 const SCEV *IncExpr,
2148 bool Changed =
false;
2175 RegUseTracker RegUses;
2180 static const unsigned MaxChains = 8;
2197 void OptimizeShadowIV();
2200 void OptimizeLoopTermCond();
2204 void FinalizeChain(IVChain &Chain);
2205 void CollectChains();
2206 void GenerateIVChain(
const IVChain &Chain,
2209 void CollectInterestingTypesAndFactors();
2210 void CollectFixupsAndInitialFormulae();
2216 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2217 LSRUse::KindType Kind, MemAccessTy AccessTy);
2219 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2220 MemAccessTy AccessTy);
2222 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2224 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2226 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2227 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2228 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2229 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2231 void CollectLoopInvariantFixupsAndFormulae();
2233 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2234 unsigned Depth = 0);
2236 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2238 size_t Idx,
bool IsScaledReg =
false);
2239 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2240 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2241 const Formula &
Base,
size_t Idx,
2242 bool IsScaledReg =
false);
2243 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2244 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2245 const Formula &
Base,
2247 size_t Idx,
bool IsScaledReg =
false);
2248 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2250 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2251 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2252 void GenerateCrossUseConstantOffsets();
2253 void GenerateAllReuseFormulae();
2255 void FilterOutUndesirableDedicatedRegisters();
2257 size_t EstimateSearchSpaceComplexity()
const;
2258 void NarrowSearchSpaceByDetectingSupersets();
2259 void NarrowSearchSpaceByCollapsingUnrolledCode();
2260 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2261 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2262 void NarrowSearchSpaceByFilterPostInc();
2263 void NarrowSearchSpaceByDeletingCostlyFormulas();
2264 void NarrowSearchSpaceByPickingWinnerRegs();
2265 void NarrowSearchSpaceUsingHeuristics();
2270 const Cost &CurCost,
2280 const LSRUse &LU)
const;
2282 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2285 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2288 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2297 bool getChanged()
const {
return Changed; }
2299 return ScalarEvolutionIVs;
2313void LSRInstance::OptimizeShadowIV() {
2315 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2323 Type *DestTy =
nullptr;
2324 bool IsSigned =
false;
2338 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2340 DestTy = UCast->getDestTy();
2342 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2344 DestTy = SCast->getDestTy();
2346 if (!DestTy)
continue;
2366 if (Mantissa == -1)
continue;
2370 unsigned Entry, Latch;
2380 if (!
Init)
continue;
2381 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2382 (
double)
Init->getSExtValue() :
2383 (
double)
Init->getZExtValue());
2387 if (!Incr)
continue;
2388 if (Incr->
getOpcode() != Instruction::Add
2389 && Incr->
getOpcode() != Instruction::Sub)
2405 if (!
C->getValue().isStrictlyPositive())
2413 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2415 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2416 : Instruction::FSub,
2435 if (
U.getUser() ==
Cond) {
2503 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2508 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2509 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2516 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2517 Pred = ICmpInst::ICMP_SLE;
2519 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2520 Pred = ICmpInst::ICMP_SLT;
2522 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2523 Pred = ICmpInst::ICMP_ULT;
2532 if (
Max->getNumOperands() != 2)
2535 const SCEV *MaxLHS =
Max->getOperand(0);
2536 const SCEV *MaxRHS =
Max->getOperand(1);
2541 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2554 "Loop condition operand is an addrec in a different loop!");
2558 Value *NewRHS =
nullptr;
2559 if (ICmpInst::isTrueWhenEqual(Pred)) {
2562 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2563 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2564 NewRHS = BO->getOperand(0);
2566 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2567 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2568 NewRHS = BO->getOperand(0);
2575 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2576 NewRHS = SU->getValue();
2589 Cond->getOperand(0), NewRHS,
"scmp");
2593 Cond->replaceAllUsesWith(NewCond);
2596 Cond->eraseFromParent();
2598 if (
Cmp->use_empty())
2599 Cmp->eraseFromParent();
2605LSRInstance::OptimizeLoopTermCond() {
2622 L->getExitingBlocks(ExitingBlocks);
2630 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2636 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2646 if (!FindIVUserForCond(
Cond, CondUse))
2660 if (!DT.
dominates(ExitingBlock, LatchBlock))
2665 if (LatchBlock != ExitingBlock)
2669 if (&UI != CondUse &&
2673 const SCEV *
A = IU.getStride(*CondUse, L);
2674 const SCEV *
B = IU.getStride(UI, L);
2675 if (!
A || !
B)
continue;
2688 if (
C->isOne() ||
C->isMinusOne())
2689 goto decline_post_inc;
2691 if (
C->getValue().getSignificantBits() >= 64 ||
2692 C->getValue().isMinSignedValue())
2693 goto decline_post_inc;
2696 MemAccessTy AccessTy =
2698 int64_t Scale =
C->getSExtValue();
2702 AccessTy.AddrSpace))
2703 goto decline_post_inc;
2708 AccessTy.AddrSpace))
2709 goto decline_post_inc;
2714 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2720 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2721 if (
Cond->hasOneUse()) {
2722 Cond->moveBefore(TermBr);
2726 Cond = cast<ICmpInst>(
Cond->clone());
2727 Cond->setName(
L->getHeader()->getName() +
".termcond");
2749 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2756bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2757 bool HasBaseReg, LSRUse::KindType Kind,
2758 MemAccessTy AccessTy) {
2759 Immediate NewMinOffset = LU.MinOffset;
2760 Immediate NewMaxOffset = LU.MaxOffset;
2761 MemAccessTy NewAccessTy = AccessTy;
2766 if (LU.Kind != Kind)
2772 if (Kind == LSRUse::Address) {
2773 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2774 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2775 AccessTy.AddrSpace);
2780 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2782 LU.MaxOffset - NewOffset, HasBaseReg))
2784 NewMinOffset = NewOffset;
2785 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2787 NewOffset - LU.MinOffset, HasBaseReg))
2789 NewMaxOffset = NewOffset;
2795 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2796 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2800 LU.MinOffset = NewMinOffset;
2801 LU.MaxOffset = NewMaxOffset;
2802 LU.AccessTy = NewAccessTy;
2809std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2810 LSRUse::KindType Kind,
2811 MemAccessTy AccessTy) {
2819 Offset = Immediate::getFixed(0);
2822 std::pair<UseMapTy::iterator, bool>
P =
2826 size_t LUIdx =
P.first->second;
2827 LSRUse &LU =
Uses[LUIdx];
2828 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2830 return std::make_pair(LUIdx,
Offset);
2834 size_t LUIdx =
Uses.size();
2835 P.first->second = LUIdx;
2836 Uses.push_back(LSRUse(Kind, AccessTy));
2837 LSRUse &LU =
Uses[LUIdx];
2841 return std::make_pair(LUIdx,
Offset);
2845void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2846 if (&LU != &
Uses.back())
2851 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2857LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2858 const LSRUse &OrigLU) {
2860 for (LSRUse &LU :
Uses) {
2866 if (&LU != &OrigLU &&
2867 LU.Kind != LSRUse::ICmpZero &&
2868 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2869 LU.WidestFixupType == OrigLU.WidestFixupType &&
2870 LU.HasFormulaWithSameRegs(OrigF)) {
2872 for (
const Formula &
F : LU.Formulae) {
2875 if (
F.BaseRegs == OrigF.BaseRegs &&
2876 F.ScaledReg == OrigF.ScaledReg &&
2877 F.BaseGV == OrigF.BaseGV &&
2878 F.Scale == OrigF.Scale &&
2879 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2880 if (
F.BaseOffset.isZero())
2895void LSRInstance::CollectInterestingTypesAndFactors() {
2901 const SCEV *Expr = IU.getExpr(U);
2919 }
while (!Worklist.
empty());
2924 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2926 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2927 const SCEV *OldStride = *
I;
2928 const SCEV *NewStride = *NewStrideIter;
2939 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2941 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2942 Factors.insert(Factor->getAPInt().getSExtValue());
2947 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2948 Factors.insert(Factor->getAPInt().getSExtValue());
2954 if (
Types.size() == 1)
2966 for(; OI != OE; ++OI) {
2967 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2972 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2984 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2985 return Trunc->getOperand(0);
3007 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3009 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3011 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3018 if (SubExpr->getSCEVType() ==
scAddExpr)
3021 if (SubExpr->getSCEVType() !=
scMulExpr)
3027 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3037bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3038 const SCEV *IncExpr,
3046 if (!isa<SCEVConstant>(IncExpr)) {
3048 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3073 if (!Chain.hasIncs())
3076 if (!
Users.empty()) {
3077 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3079 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3082 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3090 if (isa<PHINode>(Chain.tailUserInst())
3091 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3094 const SCEV *LastIncExpr =
nullptr;
3095 unsigned NumConstIncrements = 0;
3096 unsigned NumVarIncrements = 0;
3097 unsigned NumReusedIncrements = 0;
3102 for (
const IVInc &Inc : Chain) {
3105 if (Inc.IncExpr->isZero())
3110 if (isa<SCEVConstant>(Inc.IncExpr)) {
3111 ++NumConstIncrements;
3115 if (Inc.IncExpr == LastIncExpr)
3116 ++NumReusedIncrements;
3120 LastIncExpr = Inc.IncExpr;
3125 if (NumConstIncrements > 1)
3132 cost += NumVarIncrements;
3136 cost -= NumReusedIncrements;
3138 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3155 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3156 const SCEV *LastIncExpr =
nullptr;
3157 for (; ChainIdx < NChains; ++ChainIdx) {
3158 IVChain &Chain = IVChainVec[ChainIdx];
3172 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3178 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3181 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3182 LastIncExpr = IncExpr;
3188 if (ChainIdx == NChains) {
3189 if (isa<PHINode>(UserInst))
3195 LastIncExpr = OperExpr;
3199 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3202 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3204 ChainUsersVec.
resize(NChains);
3205 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3206 <<
") IV=" << *LastIncExpr <<
"\n");
3208 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3209 <<
") IV+" << *LastIncExpr <<
"\n");
3211 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3213 IVChain &Chain = IVChainVec[ChainIdx];
3217 if (!LastIncExpr->
isZero()) {
3218 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3234 IVChain::const_iterator IncIter = Chain.Incs.begin();
3235 IVChain::const_iterator IncEnd = Chain.Incs.end();
3236 for( ; IncIter != IncEnd; ++IncIter) {
3237 if (IncIter->UserInst == OtherUse)
3240 if (IncIter != IncEnd)
3244 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3245 && IU.isIVUserOrOperand(OtherUse)) {
3248 NearUsers.
insert(OtherUse);
3253 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3278void LSRInstance::CollectChains() {
3285 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3294 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3304 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3305 ChainIdx < NChains; ++ChainIdx) {
3306 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3312 while (IVOpIter != IVOpEnd) {
3313 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3314 if (UniqueOperands.
insert(IVOpInst).second)
3315 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3316 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3321 for (
PHINode &PN :
L->getHeader()->phis()) {
3326 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3328 ChainInstruction(&PN, IncV, ChainUsersVec);
3331 unsigned ChainIdx = 0;
3332 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3333 UsersIdx < NChains; ++UsersIdx) {
3335 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3338 if (ChainIdx != UsersIdx)
3339 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3340 FinalizeChain(IVChainVec[ChainIdx]);
3343 IVChainVec.resize(ChainIdx);
3346void LSRInstance::FinalizeChain(IVChain &Chain) {
3347 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3348 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3350 for (
const IVInc &Inc : Chain) {
3352 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3353 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3354 IVIncSet.insert(UseI);
3361 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3362 Immediate IncOffset = Immediate::getZero();
3369 auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3370 if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3371 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3373 auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3374 if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3376 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3392void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3396 const IVInc &Head = Chain.Incs[0];
3401 Value *IVSrc =
nullptr;
3402 while (IVOpIter != IVOpEnd) {
3413 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3414 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3417 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3419 if (IVOpIter == IVOpEnd) {
3421 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3424 assert(IVSrc &&
"Failed to find IV chain source");
3429 const SCEV *LeftOverExpr =
nullptr;
3434 for (
const IVInc &Inc : Chain) {
3436 if (isa<PHINode>(InsertPt))
3437 InsertPt =
L->getLoopLatch()->getTerminator();
3441 Value *IVOper = IVSrc;
3442 if (!Inc.IncExpr->isZero()) {
3447 LeftOverExpr = LeftOverExpr ?
3448 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3452 bool FoundBase =
false;
3453 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3456 if (!Remainder->
isZero()) {
3458 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3459 const SCEV *IVOperExpr =
3461 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3470 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3473 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3476 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3480 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3483 LeftOverExpr =
nullptr;
3486 Type *OperTy = Inc.IVOperand->getType();
3487 if (IVTy != OperTy) {
3489 "cannot extend a chained IV");
3491 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3493 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3494 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3499 if (isa<PHINode>(Chain.tailUserInst())) {
3500 for (
PHINode &Phi :
L->getHeader()->phis()) {
3504 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3507 Value *IVOper = IVSrc;
3509 if (IVTy != PostIncTy) {
3511 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3512 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3513 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3515 Phi.replaceUsesOfWith(PostIncV, IVOper);
3521void LSRInstance::CollectFixupsAndInitialFormulae() {
3523 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3535 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3536 if (IVIncSet.count(UseI)) {
3537 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3541 LSRUse::KindType
Kind = LSRUse::Basic;
3542 MemAccessTy AccessTy;
3544 Kind = LSRUse::Address;
3548 const SCEV *S = IU.getExpr(U);
3559 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3562 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3564 if (CI->isEquality()) {
3567 Value *
NV = CI->getOperand(1);
3568 if (NV ==
U.getOperandValToReplace()) {
3569 CI->setOperand(1, CI->getOperand(0));
3570 CI->setOperand(0, NV);
3571 NV = CI->getOperand(1);
3578 (!
NV->getType()->isPointerTy() ||
3585 Kind = LSRUse::ICmpZero;
3587 }
else if (
L->isLoopInvariant(NV) &&
3588 (!isa<Instruction>(NV) ||
3589 DT.
dominates(cast<Instruction>(NV),
L->getHeader())) &&
3590 !
NV->getType()->isPointerTy()) {
3601 Kind = LSRUse::ICmpZero;
3603 assert(!isa<SCEVCouldNotCompute>(S));
3608 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3609 if (Factors[i] != -1)
3610 Factors.insert(-(
uint64_t)Factors[i]);
3616 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3617 size_t LUIdx =
P.first;
3619 LSRUse &LU =
Uses[LUIdx];
3622 LSRFixup &LF = LU.getNewFixup();
3623 LF.UserInst = UserInst;
3624 LF.OperandValToReplace =
U.getOperandValToReplace();
3625 LF.PostIncLoops = TmpPostIncLoops;
3627 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3630 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3632 F.initialMatch(S, L, SE);
3633 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3634 VisitedLSRUse.
insert(LUIdx);
3637 if (!LU.WidestFixupType ||
3640 LU.WidestFixupType = LF.OperandValToReplace->getType();
3643 if (LU.Formulae.empty()) {
3644 InsertInitialFormula(S, LU, LUIdx);
3645 CountRegisters(LU.Formulae.back(), LUIdx);
3654void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3658 LU.RigidFormula =
true;
3661 F.initialMatch(S, L, SE);
3662 bool Inserted = InsertFormula(LU, LUIdx,
F);
3663 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3669LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3670 LSRUse &LU,
size_t LUIdx) {
3672 F.BaseRegs.push_back(S);
3673 F.HasBaseReg =
true;
3674 bool Inserted = InsertFormula(LU, LUIdx,
F);
3675 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3679void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3681 RegUses.countRegister(
F.ScaledReg, LUIdx);
3682 for (
const SCEV *BaseReg :
F.BaseRegs)
3683 RegUses.countRegister(BaseReg, LUIdx);
3688bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3691 "Formula is illegal");
3693 if (!LU.InsertFormula(
F, *L))
3696 CountRegisters(
F, LUIdx);
3706LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3715 while (!Worklist.
empty()) {
3719 if (!Visited.
insert(S).second)
3726 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3729 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3730 const Value *
V = US->getValue();
3731 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3733 if (
L->contains(Inst))
continue;
3734 }
else if (isa<Constant>(V))
3737 for (
const Use &U :
V->uses()) {
3738 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3747 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3750 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3752 cast<PHINode>(UserInst)->getIncomingBlock(
3767 if (isa<PHINode>(UserInst)) {
3768 const auto *PhiNode = cast<PHINode>(UserInst);
3769 bool HasIncompatibleEHPTerminatedBlock =
false;
3771 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3772 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3773 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3774 HasIncompatibleEHPTerminatedBlock =
true;
3779 if (HasIncompatibleEHPTerminatedBlock) {
3785 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3792 if (!isa<SCEVUnknown>(UserS))
3801 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3802 unsigned OtherIdx = !
U.getOperandNo();
3803 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3808 std::pair<size_t, Immediate>
P =
3809 getUse(S, LSRUse::Basic, MemAccessTy());
3810 size_t LUIdx =
P.first;
3812 LSRUse &LU =
Uses[LUIdx];
3813 LSRFixup &LF = LU.getNewFixup();
3814 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3815 LF.OperandValToReplace =
U;
3817 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3818 if (!LU.WidestFixupType ||
3821 LU.WidestFixupType = LF.OperandValToReplace->getType();
3822 InsertSupplementalFormula(US, LU, LUIdx);
3823 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3839 unsigned Depth = 0) {
3846 for (
const SCEV *S :
Add->operands()) {
3852 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3861 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3863 Remainder =
nullptr;
3881 const SCEV *Remainder =
3894 LSRUse &LU,
const SCEV *S,
const Loop *L,
3896 if (LU.Kind != LSRUse::Address ||
3897 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3903 if (!isa<SCEVConstant>(LoopStep))
3909 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3916void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3917 const Formula &
Base,
3920 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3932 if (AddOps.
size() == 1)
3946 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3952 InnerAddOps.append(std::next(J),
3957 if (InnerAddOps.size() == 1 &&
3959 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3967 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3971 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3976 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3979 F.ScaledReg =
nullptr;
3982 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3983 }
else if (IsScaledReg)
3984 F.ScaledReg = InnerSum;
3986 F.BaseRegs[
Idx] = InnerSum;
3992 SC->getValue()->getZExtValue()))
3994 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3995 SC->getValue()->getZExtValue());
3997 F.BaseRegs.push_back(*J);
4002 if (InsertFormula(LU, LUIdx,
F))
4009 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4015void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4017 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4022 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4023 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4025 if (
Base.Scale == 1)
4026 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4032void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4035 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4036 (
Base.UnfoldedOffset.isNonZero()) <=
4044 Formula NewBase =
Base;
4045 NewBase.BaseRegs.clear();
4046 Type *CombinedIntegerType =
nullptr;
4047 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4050 if (!CombinedIntegerType)
4055 NewBase.BaseRegs.push_back(BaseReg);
4059 if (Ops.
size() == 0)
4064 auto GenerateFormula = [&](
const SCEV *Sum) {
4065 Formula
F = NewBase;
4073 F.BaseRegs.push_back(Sum);
4075 (void)InsertFormula(LU, LUIdx,
F);
4079 if (Ops.
size() > 1) {
4086 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4087 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4089 NewBase.UnfoldedOffset.getFixedValue(),
true));
4090 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4096void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4097 const Formula &
Base,
size_t Idx,
4101 if (
G->isZero() || !GV)
4105 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4110 F.BaseRegs[
Idx] =
G;
4111 (void)InsertFormula(LU, LUIdx,
F);
4115void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4118 if (
Base.BaseGV)
return;
4120 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4121 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4122 if (
Base.Scale == 1)
4123 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4128void LSRInstance::GenerateConstantOffsetsImpl(
4129 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4132 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4134 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4136 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4138 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4140 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4146 F.ScaledReg =
nullptr;
4148 F.deleteBaseReg(
F.BaseRegs[
Idx]);
4150 }
else if (IsScaledReg)
4153 F.BaseRegs[
Idx] = NewG;
4155 (void)InsertFormula(LU, LUIdx,
F);
4170 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
4172 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4173 const APInt &StepInt = StepRec->getAPInt();
4177 for (Immediate
Offset : Worklist) {
4179 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4186 for (Immediate
Offset : Worklist)
4190 if (
G->isZero() ||
Imm.isZero() ||
4191 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4194 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4195 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4200 F.BaseRegs[
Idx] =
G;
4205 (void)InsertFormula(LU, LUIdx,
F);
4209void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4215 if (LU.MaxOffset != LU.MinOffset)
4218 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4219 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4220 if (
Base.Scale == 1)
4221 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4227void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4229 if (LU.Kind != LSRUse::ICmpZero)
return;
4237 if (LU.MinOffset != LU.MaxOffset)
return;
4240 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4242 for (
const SCEV *BaseReg :
Base.BaseRegs)
4245 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4248 for (int64_t Factor : Factors) {
4253 if (
Base.BaseOffset.isMin() && Factor == -1)
4256 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4258 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4259 assert(Factor != 0 &&
"Zero factor not expected!");
4260 if (NewBaseOffset.getFixedValue() / Factor !=
4261 Base.BaseOffset.getFixedValue())
4269 Immediate
Offset = LU.MinOffset;
4270 if (
Offset.isMin() && Factor == -1)
4273 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4281 F.BaseOffset = NewBaseOffset;
4288 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4293 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4307 if (
F.UnfoldedOffset.isNonZero()) {
4308 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4310 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4311 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4312 Base.UnfoldedOffset.getFixedValue())
4316 IntTy,
F.UnfoldedOffset.getFixedValue()))
4321 (void)InsertFormula(LU, LUIdx,
F);
4328void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4335 if (
Base.Scale != 0 && !
Base.unscale())
4338 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4341 for (int64_t Factor : Factors) {
4342 Base.Scale = Factor;
4343 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4345 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4349 if (LU.Kind == LSRUse::Basic &&
4350 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4351 LU.AccessTy,
Base) &&
4352 LU.AllFixupsOutsideLoop)
4353 LU.Kind = LSRUse::Special;
4359 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4360 Base.BaseOffset.isZero() && !
Base.BaseGV)
4363 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4365 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4372 if (!Quotient->isZero()) {
4375 F.ScaledReg = Quotient;
4376 F.deleteBaseReg(
F.BaseRegs[i]);
4380 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4381 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4385 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4387 (void)InsertFormula(LU, LUIdx,
F);
4403 const SCEV *Result =
nullptr;
4404 for (
auto &L :
Loops) {
4408 if (!New || (Result && New != Result))
4413 assert(Result &&
"failed to create expression");
4418void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4420 if (
Base.BaseGV)
return;
4430 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4433 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4437 for (
auto &LF : LU.Fixups)
4438 Loops.push_back(LF.PostIncLoops);
4440 for (
Type *SrcTy : Types) {
4449 const SCEV *NewScaledReg =
4451 if (!NewScaledReg || NewScaledReg->
isZero())
4453 F.ScaledReg = NewScaledReg;
4455 bool HasZeroBaseReg =
false;
4456 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4457 const SCEV *NewBaseReg =
4459 if (!NewBaseReg || NewBaseReg->
isZero()) {
4460 HasZeroBaseReg =
true;
4463 BaseReg = NewBaseReg;
4470 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4474 (void)InsertFormula(LU, LUIdx,
F);
4487 const SCEV *OrigReg;
4490 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4500 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4501 <<
" , add offset " <<
Imm;
4511void LSRInstance::GenerateCrossUseConstantOffsets() {
4513 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4518 for (
const SCEV *
Use : RegUses) {
4521 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4524 Pair.first->second.insert(std::make_pair(Imm,
Use));
4525 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4534 for (
const SCEV *Reg : Sequence) {
4535 const ImmMapTy &Imms =
Map.find(Reg)->second;
4538 if (Imms.size() == 1)
4541 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4542 for (
const auto &Entry
4544 <<
' ' <<
Entry.first;
4548 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4550 const SCEV *OrigReg = J->second;
4552 Immediate JImm = J->first;
4553 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4555 if (!isa<SCEVConstant>(OrigReg) &&
4556 UsedByIndicesMap[Reg].
count() == 1) {
4564 Immediate
First = Imms.begin()->first;
4565 Immediate
Last = std::prev(Imms.end())->first;
4566 if (!
First.isCompatibleImmediate(
Last)) {
4573 bool Scalable =
First.isScalable() ||
Last.isScalable();
4574 int64_t FI =
First.getKnownMinValue();
4575 int64_t LI =
Last.getKnownMinValue();
4578 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4581 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4582 ImmMapTy::const_iterator OtherImms[] = {
4583 Imms.begin(), std::prev(Imms.end()),
4584 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4585 for (
const auto &M : OtherImms) {
4586 if (M == J || M == JE)
continue;
4587 if (!JImm.isCompatibleImmediate(
M->first))
4591 Immediate
Imm = JImm.subUnsigned(
M->first);
4592 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4594 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4602 UsedByIndicesMap.
clear();
4603 UniqueItems.
clear();
4606 for (
const WorkItem &WI : WorkItems) {
4607 size_t LUIdx = WI.LUIdx;
4608 LSRUse &LU =
Uses[LUIdx];
4609 Immediate
Imm = WI.Imm;
4610 const SCEV *OrigReg = WI.OrigReg;
4613 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4617 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4618 Formula
F = LU.Formulae[
L];
4625 if (
F.ScaledReg == OrigReg) {
4626 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4628 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4630 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4631 if (
F.referencesReg(S))
4634 NewF.BaseOffset =
Offset;
4635 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4638 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4643 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4647 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4649 if (
C->getValue()->isNegative() !=
4650 (NewF.BaseOffset.isLessThanZero()) &&
4652 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4657 NewF.canonicalize(*this->L);
4658 (void)InsertFormula(LU, LUIdx, NewF);
4661 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4662 const SCEV *BaseReg =
F.BaseRegs[
N];
4663 if (BaseReg != OrigReg)
4666 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4667 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4668 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4670 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4672 LU.Kind, LU.AccessTy, NewF)) {
4676 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4680 NewF.UnfoldedOffset = NewUnfoldedOffset;
4682 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4687 for (
const SCEV *NewReg : NewF.BaseRegs)
4688 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewReg)) {
4689 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4691 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4693 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4694 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4696 (
unsigned)llvm::countr_zero<uint64_t>(
4697 NewF.BaseOffset.getFixedValue()))
4702 NewF.canonicalize(*this->L);
4703 (void)InsertFormula(LU, LUIdx, NewF);
4714LSRInstance::GenerateAllReuseFormulae() {
4717 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4718 LSRUse &LU =
Uses[LUIdx];
4719 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4721 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4722 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4724 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4725 LSRUse &LU =
Uses[LUIdx];
4726 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4727 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4728 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4730 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4732 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4733 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4735 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4736 LSRUse &LU =
Uses[LUIdx];
4737 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4738 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4741 GenerateCrossUseConstantOffsets();
4744 "After generating reuse formulae:\n";
4745 print_uses(
dbgs()));
4750void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4755 bool ChangedFormulae =
false;
4760 using BestFormulaeTy =
4763 BestFormulaeTy BestFormulae;
4765 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4766 LSRUse &LU =
Uses[LUIdx];
4771 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4772 FIdx != NumForms; ++FIdx) {
4773 Formula &
F = LU.Formulae[FIdx];
4784 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4785 if (CostF.isLoser()) {
4797 for (
const SCEV *Reg :
F.BaseRegs) {
4798 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4802 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4803 Key.push_back(
F.ScaledReg);
4808 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4809 BestFormulae.insert(std::make_pair(Key, FIdx));
4813 Formula &Best = LU.Formulae[
P.first->second];
4815 Cost CostBest(L, SE,
TTI, AMK);
4817 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4818 if (CostF.isLess(CostBest))
4822 " in favor of formula ";
4823 Best.print(
dbgs());
dbgs() <<
'\n');
4826 ChangedFormulae =
true;
4828 LU.DeleteFormula(
F);
4836 LU.RecomputeRegs(LUIdx, RegUses);
4839 BestFormulae.clear();
4844 "After filtering out undesirable candidates:\n";
4852size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4854 for (
const LSRUse &LU :
Uses) {
4855 size_t FSize = LU.Formulae.size();
4870void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4874 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4875 "which use a superset of registers used by other "
4878 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4879 LSRUse &LU =
Uses[LUIdx];
4881 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4882 Formula &
F = LU.Formulae[i];
4883 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4889 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4895 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4896 (
uint64_t)
C->getValue()->getSExtValue());
4897 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4898 (
I -
F.BaseRegs.begin()));
4899 if (LU.HasFormulaWithSameRegs(NewF)) {
4902 LU.DeleteFormula(
F);
4908 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4909 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4913 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4914 (
I -
F.BaseRegs.begin()));
4915 if (LU.HasFormulaWithSameRegs(NewF)) {
4918 LU.DeleteFormula(
F);
4929 LU.RecomputeRegs(LUIdx, RegUses);
4938void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4943 dbgs() <<
"The search space is too complex.\n"
4944 "Narrowing the search space by assuming that uses separated "
4945 "by a constant offset will use the same registers.\n");
4949 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4950 LSRUse &LU =
Uses[LUIdx];
4951 for (
const Formula &
F : LU.Formulae) {
4952 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4955 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4959 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4960 LU.Kind, LU.AccessTy))
4965 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4968 for (LSRFixup &
Fixup : LU.Fixups) {
4969 Fixup.Offset +=
F.BaseOffset;
4970 LUThatHas->pushFixup(
Fixup);
4976 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4977 Formula &
F = LUThatHas->Formulae[i];
4978 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4979 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4981 LUThatHas->DeleteFormula(
F);
4989 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4992 DeleteUse(LU, LUIdx);
5005void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5009 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5010 "undesirable dedicated registers.\n");
5012 FilterOutUndesirableDedicatedRegisters();
5027void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5032 dbgs() <<
"The search space is too complex.\n"
5033 "Narrowing the search space by choosing the best Formula "
5034 "from the Formulae with the same Scale and ScaledReg.\n");
5039 BestFormulaeTy BestFormulae;
5041 bool ChangedFormulae =
false;
5046 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5047 LSRUse &LU =
Uses[LUIdx];
5052 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5057 size_t FARegNum = 0;
5058 for (
const SCEV *Reg : FA.BaseRegs) {
5059 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5060 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5062 size_t FBRegNum = 0;
5063 for (
const SCEV *Reg : FB.BaseRegs) {
5064 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5065 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5067 if (FARegNum != FBRegNum)
5068 return FARegNum < FBRegNum;
5075 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
5077 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
5078 return CostFA.isLess(CostFB);
5082 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5084 Formula &
F = LU.Formulae[FIdx];
5087 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5091 Formula &Best = LU.Formulae[
P.first->second];
5092 if (IsBetterThan(
F, Best))
5096 " in favor of formula ";
5097 Best.print(
dbgs());
dbgs() <<
'\n');
5099 ChangedFormulae =
true;
5101 LU.DeleteFormula(
F);
5107 LU.RecomputeRegs(LUIdx, RegUses);
5110 BestFormulae.clear();
5115 "After filtering out undesirable candidates:\n";
5122void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5129 "Narrowing the search space by choosing the lowest "
5130 "register Formula for PostInc Uses.\n");
5132 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5133 LSRUse &LU =
Uses[LUIdx];
5135 if (LU.Kind != LSRUse::Address)
5141 size_t MinRegs = std::numeric_limits<size_t>::max();
5142 for (
const Formula &
F : LU.Formulae)
5143 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5146 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5148 Formula &
F = LU.Formulae[FIdx];
5149 if (
F.getNumRegs() > MinRegs) {
5152 LU.DeleteFormula(
F);
5159 LU.RecomputeRegs(LUIdx, RegUses);
5210void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5223 DenseMap <const SCEV *, float> RegNumMap;
5224 for (
const SCEV *Reg : RegUses) {
5225 if (UniqRegs.
count(Reg))
5228 for (
const LSRUse &LU :
Uses) {
5229 if (!LU.Regs.count(Reg))
5231 float P = LU.getNotSelectedProbability(Reg);
5237 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
5241 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5244 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5245 LSRUse &LU =
Uses[LUIdx];
5247 if (LU.Formulae.size() < 2)
5252 float FMinRegNum = LU.Formulae[0].getNumRegs();
5253 float FMinARegNum = LU.Formulae[0].getNumRegs();
5255 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5256 Formula &
F = LU.Formulae[i];
5259 for (
const SCEV *BaseReg :
F.BaseRegs) {
5260 if (UniqRegs.
count(BaseReg))
5262 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5263 if (isa<SCEVAddRecExpr>(BaseReg))
5265 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5267 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5268 if (!UniqRegs.
count(ScaledReg)) {
5270 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5271 if (isa<SCEVAddRecExpr>(ScaledReg))
5273 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5276 if (FMinRegNum > FRegNum ||
5277 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5278 FMinRegNum = FRegNum;
5279 FMinARegNum = FARegNum;
5284 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5286 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5287 while (LU.Formulae.size() != 1) {
5290 LU.Formulae.pop_back();
5292 LU.RecomputeRegs(LUIdx, RegUses);
5293 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5294 Formula &
F = LU.Formulae[0];
5297 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5310 MemAccessTy AccessType) {
5311 if (Best->
getType() != Reg->getType() ||
5312 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5313 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5314 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5321 AccessType.MemTy,
nullptr,
5322 Diff->getSExtValue(),
5323 true, 0, AccessType.AddrSpace) &&
5325 AccessType.MemTy,
nullptr,
5326 -Diff->getSExtValue(),
5327 true, 0, AccessType.AddrSpace);
5333void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5344 const SCEV *Best =
nullptr;
5345 unsigned BestNum = 0;
5346 for (
const SCEV *Reg : RegUses) {
5347 if (Taken.
count(Reg))
5351 BestNum = RegUses.getUsedByIndices(Reg).count();
5353 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5354 if (Count > BestNum) {
5362 if (Count == BestNum) {
5363 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5364 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5366 Uses[LUIdx].AccessTy)) {
5373 assert(Best &&
"Failed to find best LSRUse candidate");
5375 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5376 <<
" will yield profitable reuse.\n");
5381 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5382 LSRUse &LU =
Uses[LUIdx];
5383 if (!LU.Regs.count(Best))
continue;
5386 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5387 Formula &
F = LU.Formulae[i];
5388 if (!
F.referencesReg(Best)) {
5390 LU.DeleteFormula(
F);
5394 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5400 LU.RecomputeRegs(LUIdx, RegUses);
5411void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5412 NarrowSearchSpaceByDetectingSupersets();
5413 NarrowSearchSpaceByCollapsingUnrolledCode();
5414 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5416 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5417 NarrowSearchSpaceByFilterPostInc();
5419 NarrowSearchSpaceByDeletingCostlyFormulas();
5421 NarrowSearchSpaceByPickingWinnerRegs();
5428 const Cost &CurCost,
5441 const LSRUse &LU =
Uses[Workspace.
size()];
5448 for (
const SCEV *S : CurRegs)
5449 if (LU.Regs.count(S))
5453 Cost NewCost(L, SE,
TTI, AMK);
5454 for (
const Formula &
F : LU.Formulae) {
5462 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5463 for (
const SCEV *Reg : ReqRegs) {
5464 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5467 if (NumReqRegsToFind == 0)
5471 if (NumReqRegsToFind != 0) {
5482 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5483 if (NewCost.isLess(SolutionCost)) {
5485 if (Workspace.
size() !=
Uses.size()) {
5486 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5487 NewRegs, VisitedRegs);
5488 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5489 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5492 dbgs() <<
".\nRegs:\n";
5493 for (
const SCEV *S : NewRegs)
dbgs()
5494 <<
"- " << *S <<
"\n";
5497 SolutionCost = NewCost;
5498 Solution = Workspace;
5509 Cost SolutionCost(L, SE,
TTI, AMK);
5510 SolutionCost.Lose();
5511 Cost CurCost(L, SE,
TTI, AMK);
5517 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5518 CurRegs, VisitedRegs);
5519 if (Solution.
empty()) {
5526 "The chosen solution requires ";
5527 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5528 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5533 Solution[i]->print(
dbgs());
5539 const bool EnableDropUnprofitableSolution = [&] {
5551 if (BaselineCost.isLess(SolutionCost)) {
5552 if (!EnableDropUnprofitableSolution)
5554 dbgs() <<
"Baseline is more profitable than chosen solution, "
5555 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5558 "solution, dropping LSR solution.\n";);
5573 bool AllDominate =
true;
5577 if (isa<CatchSwitchInst>(Tentative))
5581 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5582 AllDominate =
false;
5587 if (Tentative->
getParent() == Inst->getParent() &&
5588 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5598 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5599 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5603 if (!Rung)
return IP;
5604 Rung = Rung->getIDom();
5605 if (!Rung)
return IP;
5606 IDom = Rung->getBlock();
5609 const Loop *IDomLoop = LI.getLoopFor(IDom);
5610 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5611 if (IDomDepth <= IPLoopDepth &&
5612 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5630 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5632 if (LU.Kind == LSRUse::ICmpZero)
5634 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5636 if (LF.PostIncLoops.count(L)) {
5637 if (LF.isUseFullyOutsideLoop(L))
5638 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5644 for (
const Loop *PIL : LF.PostIncLoops) {
5645 if (PIL == L)
continue;
5650 if (!ExitingBlocks.
empty()) {
5652 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5658 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5659 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5660 "Insertion point must be a normal instruction");
5667 while (isa<PHINode>(IP)) ++IP;
5670 while (IP->isEHPad()) ++IP;
5673 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5678 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5686Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5689 if (LU.RigidFormula)
5690 return LF.OperandValToReplace;
5694 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5699 Rewriter.setPostInc(LF.PostIncLoops);
5702 Type *OpTy = LF.OperandValToReplace->getType();
5704 Type *Ty =
F.getType();
5718 for (
const SCEV *Reg :
F.BaseRegs) {
5719 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5727 Value *ICmpScaledV =
nullptr;
5729 const SCEV *ScaledS =
F.ScaledReg;
5735 if (LU.Kind == LSRUse::ICmpZero) {
5745 "The only scale supported by ICmpZero uses is -1!");
5746 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5754 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5790 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5791 "Expanding mismatched offsets\n");
5793 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5794 if (
Offset.isNonZero()) {
5795 if (LU.Kind == LSRUse::ICmpZero) {
5803 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5813 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5814 if (UnfoldedOffset.isNonZero()) {
5816 Ops.
push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5831 if (LU.Kind == LSRUse::ICmpZero) {
5832 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5833 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5835 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5836 "a scale at the same time!");
5837 if (
F.Scale == -1) {
5838 if (ICmpScaledV->
getType() != OpTy) {
5848 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5849 "ICmp does not support folding a global value and "
5850 "a scale at the same time!");
5853 if (
C->getType() != OpTy) {
5857 assert(
C &&
"Cast of ConstantInt should have folded");
5870void LSRInstance::RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
5871 const LSRFixup &LF,
const Formula &
F,
5877 bool needUpdateFixups =
false;
5888 Loop *PNLoop = LI.getLoopFor(Parent);
5889 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5896 .setMergeIdenticalEdges()
5897 .setKeepOneInputPHIs());
5911 if (
L->contains(BB) && !
L->contains(PN))
5919 needUpdateFixups =
true;
5924 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5925 Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5933 Type *OpTy = LF.OperandValToReplace->getType();
5937 LF.OperandValToReplace->getType(),
"tmp",
5943 if (
auto *
I = dyn_cast<Instruction>(FullV))
5944 if (
L->contains(
I) && !
L->contains(BB))
5945 InsertedNonLCSSAInsts.insert(
I);
5948 Pair.first->second = FullV;
5955 if (needUpdateFixups) {
5956 for (LSRUse &LU :
Uses)
5957 for (LSRFixup &
Fixup : LU.Fixups)
5961 if (
Fixup.UserInst == PN) {
5964 bool foundInOriginalPHI =
false;
5966 if (val ==
Fixup.OperandValToReplace) {
5967 foundInOriginalPHI =
true;
5972 if (foundInOriginalPHI)
5983 if (val ==
Fixup.OperandValToReplace)
5984 Fixup.UserInst = NewPN;
5994void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5999 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
6000 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6002 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
6005 Type *OpTy = LF.OperandValToReplace->getType();
6006 if (FullV->
getType() != OpTy) {
6009 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
6018 if (LU.Kind == LSRUse::ICmpZero)
6021 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
6024 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
6034 if (LU.Kind != LSRUse::Address)
6041 if (IVIncInsertPos->
getParent() == LHeader)
6044 if (!
Fixup.OperandValToReplace ||
6046 Instruction *UI = cast<Instruction>(U);
6047 return UI->getParent() != LHeader;
6052 Type *Ty =
I->getType();
6060void LSRInstance::ImplementSolution(
6067 for (
const IVChain &Chain : IVChainVec) {
6068 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
6073 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6074 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6077 ?
L->getHeader()->getTerminator()
6079 Rewriter.setIVIncInsertPos(L, InsertPos);
6080 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6084 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6087 for (
const IVChain &Chain : IVChainVec) {
6088 GenerateIVChain(Chain, DeadInsts);
6094 ScalarEvolutionIVs.push_back(
IV);
6110 for (
PHINode &PN :
L->getHeader()->phis()) {
6112 Value *Start =
nullptr, *Step =
nullptr;
6117 case Instruction::Sub:
6122 case Instruction::Add:
6128 if (!isa<Constant>(Step))
6133 if (BO->
getParent() == IVIncInsertPos->getParent())
6139 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6152 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6155 :
TTI.getPreferredAddressingMode(
L, &SE)),
6157 BaselineCost(
L, SE,
TTI, AMK) {
6159 if (!
L->isLoopSimplifyForm())
6163 if (IU.
empty())
return;
6167 unsigned NumUsers = 0;
6171 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6178 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
6179 auto *FirstNonPHI = PN->
getParent()->getFirstNonPHI();
6180 if (isa<FuncletPadInst>(FirstNonPHI) ||
6181 isa<CatchSwitchInst>(FirstNonPHI))
6183 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
6189 L->getHeader()->printAsOperand(
dbgs(),
false);
6194#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6202 OptimizeLoopTermCond();
6205 if (IU.empty())
return;
6208 if (!
L->isInnermost()) {
6221 CollectInterestingTypesAndFactors();
6222 CollectFixupsAndInitialFormulae();
6223 CollectLoopInvariantFixupsAndFormulae();
6229 print_uses(
dbgs()));
6231 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6235 GenerateAllReuseFormulae();
6237 FilterOutUndesirableDedicatedRegisters();
6238 NarrowSearchSpaceUsingHeuristics();
6248 if (Solution.
empty())
6253 for (
const LSRUse &LU :
Uses) {
6254 for (
const Formula &
F : LU.Formulae)
6256 F) &&
"Illegal formula generated!");
6261 ImplementSolution(Solution);
6264#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6265void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
6266 if (Factors.empty() &&
Types.empty())
return;
6268 OS <<
"LSR has identified the following interesting factors and types: ";
6271 for (int64_t Factor : Factors) {
6274 OS <<
'*' << Factor;
6277 for (
Type *Ty : Types) {
6280 OS <<
'(' << *Ty <<
')';
6286 OS <<
"LSR is examining the following fixup sites:\n";
6287 for (
const LSRUse &LU :
Uses)
6288 for (
const LSRFixup &LF : LU.Fixups) {
6296 OS <<
"LSR is examining the following uses:\n";
6297 for (
const LSRUse &LU :
Uses) {
6301 for (
const Formula &
F : LU.Formulae) {
6310 print_factors_and_types(
OS);
6322class LoopStrengthReduce :
public LoopPass {
6326 LoopStrengthReduce();
6335LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6339void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6371 return {Begin,
End};
6374struct SCEVDbgValueBuilder {
6375 SCEVDbgValueBuilder() =
default;
6376 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6378 void clone(
const SCEVDbgValueBuilder &
Base) {
6379 LocationOps =
Base.LocationOps;
6384 LocationOps.clear();
6401 unsigned ArgIndex = 0;
6402 if (It != LocationOps.
end()) {
6403 ArgIndex = std::distance(LocationOps.
begin(), It);
6405 ArgIndex = LocationOps.
size();
6417 if (
C->getAPInt().getSignificantBits() > 64)
6419 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6420 Expr.
push_back(
C->getAPInt().getSExtValue());
6427 return ToDwarfOpIter(Expr);
6434 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6435 "Expected arithmetic SCEV type");
6437 unsigned EmitOperator = 0;
6438 for (
const auto &
Op : CommExpr->
operands()) {
6441 if (EmitOperator >= 1)
6442 pushOperator(DwarfOp);
6453 bool Success = pushSCEV(Inner);
6455 IsSigned ? llvm::dwarf::DW_ATE_signed
6456 : llvm::dwarf::DW_ATE_unsigned};
6457 for (
const auto &
Op : CastOps)
6465 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6466 Success &= pushConst(StartInt);
6468 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6471 pushLocation(
U->getValue());
6473 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6474 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6476 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6477 Success &= pushSCEV(UDiv->getLHS());
6478 Success &= pushSCEV(UDiv->getRHS());
6479 pushOperator(llvm::dwarf::DW_OP_div);
6481 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6483 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6484 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6485 "Unexpected cast type in SCEV.");
6486 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6488 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6489 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6491 }
else if (isa<SCEVAddRecExpr>(S)) {
6506 if (
C->getAPInt().getSignificantBits() > 64)
6508 int64_t
I =
C->getAPInt().getSExtValue();
6510 case llvm::dwarf::DW_OP_plus:
6511 case llvm::dwarf::DW_OP_minus:
6513 case llvm::dwarf::DW_OP_mul:
6514 case llvm::dwarf::DW_OP_div:
6530 if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6537 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6538 if (!pushSCEV(Stride))
6540 pushOperator(llvm::dwarf::DW_OP_mul);
6542 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6543 if (!pushSCEV(Start))
6545 pushOperator(llvm::dwarf::DW_OP_plus);
6551 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6552 pushLocation(OffsetValue);
6555 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6556 << std::to_string(
Offset) <<
"\n");
6562 bool createIterCountExpr(
const SCEV *S,
6563 const SCEVDbgValueBuilder &IterationCount,
6570 if (!isa<SCEVAddRecExpr>(S))
6573 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6576 const auto *Rec = cast<SCEVAddRecExpr>(S);
6577 if (!Rec->isAffine())
6585 clone(IterationCount);
6586 if (!SCEVToValueExpr(*Rec, SE))
6600 if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6601 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6609 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6610 if (!pushSCEV(Start))
6612 pushOperator(llvm::dwarf::DW_OP_minus);
6614 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6615 if (!pushSCEV(Stride))
6617 pushOperator(llvm::dwarf::DW_OP_div);
6628 "Expected the locations vector to contain the IV");
6633 "Expected the location ops to contain the IV.");
6637 for (
const auto &
Op : LocationOps) {
6638 auto It =
find(DestLocations,
Op);
6639 if (It != DestLocations.
end()) {
6641 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6649 for (
const auto &
Op : expr_ops()) {
6651 Op.appendToVector(DestExpr);
6658 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6666struct DVIRecoveryRec {
6669 HadLocationArgList(
false) {}
6671 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6675 bool HadLocationArgList;
6681 for (
auto &RE : RecoveryExprs)
6683 RecoveryExprs.
clear();
6686 ~DVIRecoveryRec() { clear(); }
6694 auto expr_ops = ToDwarfOpIter(Expr);
6696 for (
auto Op : expr_ops)
6705template <
typename T>
6709 "contain any DW_OP_llvm_arg operands.");
6711 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6712 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6716template <
typename T>
6721 "Expected expression that references DIArglist locations using "
6722 "DW_OP_llvm_arg operands.");
6724 for (
Value *V : Locations)
6728 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6739 auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6741 if (NumLLVMArgs == 0) {
6748 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6760 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6763 DbgVal->setExpression(SalvageExpr);
6766 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6767 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6769 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6783 auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6784 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6785 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6786 assert(DVIRec.Expr &&
"Expected an expression");
6787 DbgVal->setExpression(DVIRec.Expr);
6791 if (!DVIRec.HadLocationArgList) {
6792 assert(DVIRec.LocationOps.size() == 1 &&
6793 "Unexpected number of location ops.");
6797 Value *CachedValue =
6802 for (
WeakVH VH : DVIRec.LocationOps) {
6807 DbgVal->setRawLocation(
6810 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6812 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6813 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6815 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6820 const SCEV *SCEVInductionVar,
6821 SCEVDbgValueBuilder IterCountExpr) {
6823 if (isa<DbgValueInst *>(DVIRec.DbgRef)
6824 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6825 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6837 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6839 NewLocationOps.
push_back(LSRInductionVar);
6841 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6842 WeakVH VH = DVIRec.LocationOps[i];
6846 if (VH && !isa<UndefValue>(VH)) {
6848 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6850 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6858 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6859 <<
" refers to a location that is now undef or erased. "
6860 "Salvage abandoned.\n");
6864 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6865 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6867 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6868 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6872 if (std::optional<APInt>
Offset =
6874 if (
Offset->getSignificantBits() <= 64)
6875 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6878 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6886 if (DVIRec.Expr->getNumElements() == 0) {
6887 assert(DVIRec.RecoveryExprs.size() == 1 &&
6888 "Expected only a single recovery expression for an empty "
6890 assert(DVIRec.RecoveryExprs[0] &&
6891 "Expected a SCEVDbgSalvageBuilder for location 0");
6892 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6893 B->appendToVectors(
NewExpr, NewLocationOps);
6895 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6903 SCEVDbgValueBuilder *DbgBuilder =
6904 DVIRec.RecoveryExprs[LocationArgIndex].get();
6910 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6911 "Expected a positive index for the location-op position.");
6912 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6916 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6920 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6922 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6925 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6933 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6934 if (DVIToUpdate.empty())
6938 assert(SCEVInductionVar &&
6939 "Anticipated a SCEV for the post-LSR induction variable");
6942 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6943 if (!IVAddRec->isAffine())
6951 SCEVDbgValueBuilder IterCountExpr;
6952 IterCountExpr.pushLocation(LSRInductionVar);
6953 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6956 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6959 for (
auto &DVIRec : DVIToUpdate) {
6960 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6971 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6973 for (
const auto &
B : L->getBlocks()) {
6974 for (
auto &
I : *
B) {
6975 auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6978 if (DbgVal->isKillLocation())
6983 const auto &HasTranslatableLocationOps =
6984 [&](
const auto *DbgValToTranslate) ->
bool {
6985 for (
const auto LocOp : DbgValToTranslate->location_ops()) {
6999 if (!HasTranslatableLocationOps(DbgVal))
7002 std::unique_ptr<DVIRecoveryRec> NewRec =
7003 std::make_unique<DVIRecoveryRec>(DbgVal);
7007 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
7008 for (
const auto LocOp : DbgVal->location_ops()) {
7009 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7010 NewRec->LocationOps.push_back(LocOp);
7011 NewRec->HadLocationArgList = DbgVal->hasArgList();
7013 SalvageableDVISCEVs.push_back(std::move(NewRec));
7017 if (DVR.isDbgValue() || DVR.isDbgAssign())
7018 ProcessDbgValue(&DVR);
7020 auto DVI = dyn_cast<DbgValueInst>(&
I);
7023 if (ProcessDbgValue(DVI))
7024 DVIHandles.insert(DVI);
7033 const LSRInstance &LSR) {
7035 auto IsSuitableIV = [&](
PHINode *
P) {
7046 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7053 if (IsSuitableIV(
P))
7057 for (
PHINode &
P : L.getHeader()->phis()) {
7058 if (IsSuitableIV(&
P))
7076 bool Changed =
false;
7077 std::unique_ptr<MemorySSAUpdater> MSSAU;
7079 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7082 const LSRInstance &Reducer =
7083 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7084 Changed |= Reducer.getChanged();
7090 const DataLayout &
DL = L->getHeader()->getDataLayout();
7092#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7095 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7109 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7111 const DataLayout &
DL = L->getHeader()->getDataLayout();
7124 if (SalvageableDVIRecords.
empty())
7130 for (
const auto &L : LI) {
7134 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7135 "could not be identified.\n");
7139 for (
auto &Rec : SalvageableDVIRecords)
7141 SalvageableDVIRecords.
clear();
7150 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7151 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7152 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7153 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7154 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7155 *
L->getHeader()->getParent());
7156 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7157 *
L->getHeader()->getParent());
7158 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7159 *
L->getHeader()->getParent());
7160 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7163 MSSA = &MSSAAnalysis->getMSSA();
7180char LoopStrengthReduce::ID = 0;
7183 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Conditional or Unconditional Branch instruction.
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getVScale(Type *Ty)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.