114 using namespace llvm;
116 #define DEBUG_TYPE "loop-reduce"
130 cl::desc(
"Enable LSR phi elimination"));
136 cl::desc(
"Stress test LSR IV chains"));
145 static const unsigned UnknownAddressSpace = ~0u;
150 MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
152 MemAccessTy(
Type *Ty,
unsigned AS) :
153 MemTy(Ty), AddrSpace(AS) {}
156 return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
159 bool operator!=(MemAccessTy Other)
const {
return !(*
this == Other); }
162 unsigned AS = UnknownAddressSpace) {
181 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
192 class RegUseTracker {
195 RegUsesTy RegUsesMap;
199 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
200 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
201 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
203 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
220 RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
221 std::pair<RegUsesTy::iterator, bool> Pair =
222 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
223 RegSortData &RSD = Pair.first->second;
226 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
227 RSD.UsedByIndices.set(LUIdx);
231 RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
232 RegUsesTy::iterator It = RegUsesMap.find(Reg);
233 assert(It != RegUsesMap.end());
234 RegSortData &RSD = It->second;
235 assert(RSD.UsedByIndices.size() > LUIdx);
236 RSD.UsedByIndices.reset(LUIdx);
240 RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
241 assert(LUIdx <= LastLUIdx);
245 for (
auto &Pair : RegUsesMap) {
247 if (LUIdx < UsedByIndices.
size())
248 UsedByIndices[LUIdx] =
249 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
255 RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
256 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
257 if (I == RegUsesMap.end())
261 if (i == -1)
return false;
262 if ((
size_t)i != LUIdx)
return true;
267 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
268 assert(I != RegUsesMap.end() &&
"Unknown register!");
269 return I->second.UsedByIndices;
308 const SCEV *ScaledReg;
313 int64_t UnfoldedOffset;
316 : BaseGV(nullptr), BaseOffset(0), HasBaseReg(
false), Scale(0),
317 ScaledReg(nullptr), UnfoldedOffset(0) {}
327 size_t getNumRegs()
const;
330 void deleteBaseReg(
const SCEV *&S);
332 bool referencesReg(
const SCEV *S)
const;
333 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
334 const RegUseTracker &RegUses)
const;
355 for (
const SCEV *S :
Add->operands())
362 if (!AR->getStart()->isZero() && AR->isAffine()) {
365 AR->getStepRecurrence(SE),
373 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
374 if (Mul->getOperand(0)->isAllOnesValue()) {
383 for (
const SCEV *S : MyGood)
385 for (
const SCEV *S : MyBad)
404 BaseRegs.push_back(Sum);
410 BaseRegs.push_back(Sum);
421 return Scale != 1 || !BaseRegs.empty();
422 return BaseRegs.size() <= 1;
431 void Formula::canonicalize() {
436 assert(!BaseRegs.empty() &&
"1*reg => reg, should not be needed.");
438 ScaledReg = BaseRegs.back();
441 size_t BaseRegsSize = BaseRegs.size();
444 while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
452 bool Formula::unscale() {
456 BaseRegs.push_back(ScaledReg);
463 size_t Formula::getNumRegs()
const {
464 return !!ScaledReg + BaseRegs.size();
470 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
471 ScaledReg ? ScaledReg->getType() :
472 BaseGV ? BaseGV->getType() :
477 void Formula::deleteBaseReg(
const SCEV *&S) {
478 if (&S != &BaseRegs.back())
484 bool Formula::referencesReg(
const SCEV *S)
const {
490 bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
491 const RegUseTracker &RegUses)
const {
493 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
495 for (
const SCEV *BaseReg : BaseRegs)
496 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
504 if (!First) OS <<
" + ";
else First =
false;
505 BaseGV->printAsOperand(OS,
false);
507 if (BaseOffset != 0) {
508 if (!First) OS <<
" + ";
else First =
false;
511 for (
const SCEV *BaseReg : BaseRegs) {
512 if (!First) OS <<
" + ";
else First =
false;
513 OS <<
"reg(" << *BaseReg <<
')';
515 if (HasBaseReg && BaseRegs.empty()) {
516 if (!First) OS <<
" + ";
else First =
false;
517 OS <<
"**error: HasBaseReg**";
518 }
else if (!HasBaseReg && !BaseRegs.empty()) {
519 if (!First) OS <<
" + ";
else First =
false;
520 OS <<
"**error: !HasBaseReg**";
523 if (!First) OS <<
" + ";
else First =
false;
524 OS << Scale <<
"*reg(";
531 if (UnfoldedOffset != 0) {
532 if (!First) OS <<
" + ";
533 OS <<
"imm(" << UnfoldedOffset <<
')';
574 bool IgnoreSignificantBits =
false) {
596 const APInt &LA =
C->getAPInt();
598 if (LA.
srem(RA) != 0)
605 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
607 IgnoreSignificantBits);
608 if (!Step)
return nullptr;
610 IgnoreSignificantBits);
611 if (!Start)
return nullptr;
624 for (
const SCEV *S :
Add->operands()) {
626 if (!Op)
return nullptr;
635 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
639 for (
const SCEV *S : Mul->operands()) {
642 IgnoreSignificantBits)) {
661 if (
C->getAPInt().getMinSignedBits() <= 64) {
663 return C->getValue()->getSExtValue();
671 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
686 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
687 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
697 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
712 bool isAddress = isa<LoadInst>(Inst);
714 if (
SI->getOperand(1) == OperandVal)
716 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
719 switch (II->getIntrinsicID()) {
722 if (II->getArgOperand(0) == OperandVal)
732 MemAccessTy AccessTy(Inst->
getType(), MemAccessTy::UnknownAddressSpace);
733 if (
const StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
734 AccessTy.MemTy =
SI->getOperand(0)->getType();
735 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
736 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
737 AccessTy.AddrSpace = LI->getPointerAddressSpace();
742 if (
PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
744 PTy->getAddressSpace());
790 if (!Processed.
insert(S).second)
794 for (
const SCEV *S :
Add->operands()) {
801 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
802 if (Mul->getNumOperands() == 2) {
804 if (isa<SCEVConstant>(Mul->getOperand(0)))
809 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
810 Value *UVal = U->getValue();
814 if (UI && UI->
getOpcode() == Instruction::Mul &&
836 bool Changed =
false;
838 while (!DeadInsts.
empty()) {
875 const LSRUse &LU,
const Formula &
F);
878 const LSRUse &LU,
const Formula &
F);
889 unsigned NumBaseAdds;
896 : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
897 SetupCost(0), ScaleCost(0) {}
906 return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
907 | ImmCost | SetupCost | ScaleCost) != ~0u)
908 || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
909 & ImmCost & SetupCost & ScaleCost) == ~0u);
915 return NumRegs == ~0u;
931 void RateRegister(
const SCEV *Reg,
935 void RatePrimaryRegister(
const SCEV *Reg,
950 Value *OperandValToReplace;
962 bool isUseFullyOutsideLoop(
const Loop *L)
const;
972 struct UniquifierDenseMapInfo {
975 V.
push_back(reinterpret_cast<const SCEV *>(-1));
981 V.
push_back(reinterpret_cast<const SCEV *>(-2));
1017 MemAccessTy AccessTy;
1028 bool AllFixupsOutsideLoop;
1041 Type *WidestFixupType;
1051 LSRUse(KindType K, MemAccessTy AT)
1052 :
Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
1053 AllFixupsOutsideLoop(
true), RigidFormula(
false),
1054 WidestFixupType(nullptr) {}
1056 LSRFixup &getNewFixup() {
1057 Fixups.push_back(LSRFixup());
1061 void pushFixup(LSRFixup &f) {
1063 if (f.Offset > MaxOffset)
1064 MaxOffset = f.Offset;
1065 if (f.Offset < MinOffset)
1066 MinOffset = f.Offset;
1069 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1070 bool InsertFormula(
const Formula &
F);
1071 void DeleteFormula(Formula &
F);
1072 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1081 void Cost::RateRegister(
const SCEV *Reg,
1090 if (AR->getLoop() !=
L) {
1103 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1104 if (!Regs.
count(AR->getOperand(1))) {
1105 RateRegister(AR->getOperand(1),
Regs,
L, SE, DT);
1115 if (!isa<SCEVUnknown>(Reg) &&
1116 !isa<SCEVConstant>(
Reg) &&
1117 !(isa<SCEVAddRecExpr>(Reg) &&
1118 (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(
Reg)->getStart()) ||
1119 isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
1122 NumIVMuls += isa<SCEVMulExpr>(
Reg) &&
1129 void Cost::RatePrimaryRegister(
const SCEV *Reg,
1134 if (LoserRegs && LoserRegs->
count(Reg)) {
1138 if (Regs.
insert(Reg).second) {
1139 RateRegister(Reg, Regs, L, SE, DT);
1140 if (LoserRegs && isLoser())
1153 assert(F.isCanonical() &&
"Cost is accurate only for canonical formula");
1155 if (
const SCEV *ScaledReg = F.ScaledReg) {
1156 if (VisitedRegs.
count(ScaledReg)) {
1160 RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
1164 for (
const SCEV *BaseReg : F.BaseRegs) {
1165 if (VisitedRegs.
count(BaseReg)) {
1169 RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
1175 size_t NumBaseParts = F.getNumRegs();
1176 if (NumBaseParts > 1)
1181 NumBaseAdds += (F.UnfoldedOffset != 0);
1187 for (
const LSRFixup &Fixup : LU.Fixups) {
1188 int64_t O = Fixup.Offset;
1189 int64_t
Offset = (uint64_t)O + F.BaseOffset;
1193 else if (Offset != 0)
1198 if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
1218 return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
1219 ImmCost, SetupCost) <
1220 std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
1221 Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
1226 OS << NumRegs <<
" reg" << (NumRegs == 1 ?
"" :
"s");
1227 if (AddRecCost != 0)
1228 OS <<
", with addrec cost " << AddRecCost;
1230 OS <<
", plus " << NumIVMuls <<
" IV mul" << (NumIVMuls == 1 ?
"" :
"s");
1231 if (NumBaseAdds != 0)
1232 OS <<
", plus " << NumBaseAdds <<
" base add"
1233 << (NumBaseAdds == 1 ?
"" :
"s");
1235 OS <<
", plus " << ScaleCost <<
" scale cost";
1237 OS <<
", plus " << ImmCost <<
" imm cost";
1239 OS <<
", plus " << SetupCost <<
" setup cost";
1247 LSRFixup::LSRFixup()
1248 : UserInst(nullptr), OperandValToReplace(nullptr),
1252 bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1254 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1255 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++
i)
1256 if (PN->getIncomingValue(i) == OperandValToReplace &&
1257 L->
contains(PN->getIncomingBlock(i)))
1270 Store->getOperand(0)->printAsOperand(OS,
false);
1271 }
else if (UserInst->getType()->isVoidTy())
1272 OS << UserInst->getOpcodeName();
1274 UserInst->printAsOperand(OS,
false);
1276 OS <<
", OperandValToReplace=";
1277 OperandValToReplace->printAsOperand(OS,
false);
1279 for (
const Loop *PIL : PostIncLoops) {
1280 OS <<
", PostIncLoop=";
1281 PIL->getHeader()->printAsOperand(OS,
false);
1285 OS <<
", Offset=" <<
Offset;
1295 bool LSRUse::HasFormulaWithSameRegs(
const Formula &F)
const {
1297 if (F.ScaledReg) Key.
push_back(F.ScaledReg);
1300 return Uniquifier.count(Key);
1305 bool LSRUse::InsertFormula(
const Formula &F) {
1306 assert(F.isCanonical() &&
"Invalid canonical representation");
1308 if (!Formulae.empty() && RigidFormula)
1312 if (F.ScaledReg) Key.
push_back(F.ScaledReg);
1316 if (!Uniquifier.insert(Key).second)
1320 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1321 "Zero allocated in a scaled register!");
1323 for (
const SCEV *BaseReg : F.BaseRegs)
1324 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1328 Formulae.push_back(F);
1331 Regs.
insert(F.BaseRegs.begin(), F.BaseRegs.end());
1333 Regs.
insert(F.ScaledReg);
1339 void LSRUse::DeleteFormula(Formula &F) {
1340 if (&F != &Formulae.back())
1342 Formulae.pop_back();
1346 void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1350 for (
const Formula &F : Formulae) {
1351 if (F.ScaledReg) Regs.
insert(F.ScaledReg);
1352 Regs.
insert(F.BaseRegs.begin(), F.BaseRegs.end());
1356 for (
const SCEV *S : OldRegs)
1358 RegUses.dropRegister(S, LUIdx);
1362 OS <<
"LSR Use: Kind=";
1364 case Basic: OS <<
"Basic";
break;
1365 case Special: OS <<
"Special";
break;
1366 case ICmpZero: OS <<
"ICmpZero";
break;
1368 OS <<
"Address of ";
1369 if (AccessTy.MemTy->isPointerTy())
1372 OS << *AccessTy.MemTy;
1375 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1378 OS <<
", Offsets={";
1379 bool NeedComma =
false;
1380 for (
const LSRFixup &Fixup :
Fixups) {
1381 if (NeedComma) OS <<
',';
1387 if (AllFixupsOutsideLoop)
1388 OS <<
", all-fixups-outside-loop";
1390 if (WidestFixupType)
1391 OS <<
", widest fixup type: " << *WidestFixupType;
1400 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1402 bool HasBaseReg, int64_t Scale) {
1406 HasBaseReg, Scale, AccessTy.AddrSpace);
1408 case LSRUse::ICmpZero:
1415 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1420 if (Scale != 0 && Scale != -1)
1425 if (BaseOffset != 0) {
1432 BaseOffset = -(uint64_t)BaseOffset;
1441 return !BaseGV && Scale == 0 && BaseOffset == 0;
1443 case LSRUse::Special:
1445 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1452 int64_t MinOffset, int64_t MaxOffset,
1453 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1455 bool HasBaseReg, int64_t Scale) {
1457 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1460 MinOffset = (uint64_t)BaseOffset + MinOffset;
1461 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1464 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1467 HasBaseReg, Scale) &&
1473 int64_t MinOffset, int64_t MaxOffset,
1474 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1483 assert((F.isCanonical() || F.Scale != 0));
1485 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1490 int64_t MaxOffset, LSRUse::KindType
Kind,
1492 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1495 BaseOffset, HasBaseReg, Scale) ||
1500 BaseGV, BaseOffset,
true, 0));
1504 int64_t MaxOffset, LSRUse::KindType
Kind,
1505 MemAccessTy AccessTy,
const Formula &F) {
1506 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1507 F.BaseOffset, F.HasBaseReg, F.Scale);
1511 const LSRUse &LU,
const Formula &F) {
1513 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1518 const LSRUse &LU,
const Formula &F) {
1526 return F.Scale != 1;
1532 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
1533 F.Scale, LU.AccessTy.AddrSpace);
1535 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
1536 F.Scale, LU.AccessTy.AddrSpace);
1538 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1539 "Legal addressing mode has an illegal cost!");
1540 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1542 case LSRUse::ICmpZero:
1544 case LSRUse::Special:
1554 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1558 if (BaseOffset == 0 && !BaseGV)
return true;
1562 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1566 if (!HasBaseReg && Scale == 1) {
1577 int64_t MaxOffset, LSRUse::KindType
Kind,
1578 MemAccessTy AccessTy,
const SCEV *S,
1581 if (S->
isZero())
return true;
1589 if (!S->
isZero())
return false;
1592 if (BaseOffset == 0 && !BaseGV)
return true;
1596 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1599 BaseOffset, HasBaseReg, Scale);
1616 const SCEV *IncExpr;
1619 UserInst(U), IVOperand(O), IncExpr(E) {}
1626 const SCEV *ExprBase;
1628 IVChain() : ExprBase(nullptr) {}
1630 IVChain(
const IVInc &Head,
const SCEV *Base)
1631 : Incs(1, Head), ExprBase(Base) {}
1636 const_iterator
begin()
const {
1638 return std::next(Incs.begin());
1640 const_iterator
end()
const {
1645 bool hasIncs()
const {
return Incs.size() >= 2; }
1648 void add(
const IVInc &
X) { Incs.push_back(X); }
1651 Instruction *tailUserInst()
const {
return Incs.back().UserInst; }
1654 bool isProfitableIncrement(
const SCEV *OperExpr,
1655 const SCEV *IncExpr,
1698 RegUseTracker RegUses;
1703 static const unsigned MaxChains = 8;
1711 void OptimizeShadowIV();
1714 void OptimizeLoopTermCond();
1718 void FinalizeChain(IVChain &Chain);
1719 void CollectChains();
1723 void CollectInterestingTypesAndFactors();
1724 void CollectFixupsAndInitialFormulae();
1730 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
1731 LSRUse::KindType
Kind, MemAccessTy AccessTy);
1733 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType
Kind,
1734 MemAccessTy AccessTy);
1736 void DeleteUse(LSRUse &LU,
size_t LUIdx);
1738 LSRUse *FindUseWithSimilarFormula(
const Formula &F,
const LSRUse &OrigLU);
1740 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
1741 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
1742 void CountRegisters(
const Formula &F,
size_t LUIdx);
1743 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &F);
1745 void CollectLoopInvariantFixupsAndFormulae();
1747 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula Base,
1748 unsigned Depth = 0);
1750 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
1751 const Formula &Base,
unsigned Depth,
1752 size_t Idx,
bool IsScaledReg =
false);
1753 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula Base);
1754 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
1755 const Formula &Base,
size_t Idx,
1756 bool IsScaledReg =
false);
1757 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
1758 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
1759 const Formula &Base,
1761 size_t Idx,
bool IsScaledReg =
false);
1762 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
1763 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
1764 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
1765 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base);
1766 void GenerateCrossUseConstantOffsets();
1767 void GenerateAllReuseFormulae();
1769 void FilterOutUndesirableDedicatedRegisters();
1771 size_t EstimateSearchSpaceComplexity()
const;
1772 void NarrowSearchSpaceByDetectingSupersets();
1773 void NarrowSearchSpaceByCollapsingUnrolledCode();
1774 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1775 void NarrowSearchSpaceByPickingWinnerRegs();
1776 void NarrowSearchSpaceUsingHeuristics();
1781 const Cost &CurCost,
1795 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
1800 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
1804 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
1814 bool getChanged()
const {
return Changed; }
1816 void print_factors_and_types(
raw_ostream &OS)
const;
1827 void LSRInstance::OptimizeShadowIV() {
1829 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1837 Type *DestTy =
nullptr;
1838 bool IsSigned =
false;
1852 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1854 DestTy = UCast->getDestTy();
1856 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1858 DestTy = SCast->getDestTy();
1860 if (!DestTy)
continue;
1872 if (Mantissa == -1)
continue;
1876 unsigned Entry, Latch;
1886 if (!Init)
continue;
1887 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
1893 if (!Incr)
continue;
1895 && Incr->getOpcode() != Instruction::Sub)
1900 if (Incr->getOperand(0) == PH)
1901 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1902 else if (Incr->getOperand(1) == PH)
1903 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1914 PHINode *NewPH = PHINode::Create(DestTy, 2,
"IV.S.", PH);
1920 Instruction::FAdd : Instruction::FSub,
1921 NewPH, CFP,
"IV.S.next.", Incr);
1938 if (U.getUser() == Cond) {
2004 if (!Sel || !Sel->
hasOneUse())
return Cond;
2007 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2012 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2013 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2020 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2021 Pred = ICmpInst::ICMP_SLE;
2023 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2024 Pred = ICmpInst::ICMP_SLT;
2026 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2027 Pred = ICmpInst::ICMP_ULT;
2058 "Loop condition operand is an addrec in a different loop!");
2062 Value *NewRHS =
nullptr;
2066 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2067 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2068 NewRHS = BO->getOperand(0);
2070 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2071 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2072 NewRHS = BO->getOperand(0);
2079 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2080 NewRHS = SU->getValue();
2088 Pred = CmpInst::getInversePredicate(Pred);
2108 LSRInstance::OptimizeLoopTermCond() {
2127 return LatchBlock != BB;
2135 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2152 if (!FindIVUserForCond(Cond, CondUse))
2161 Cond = OptimizeMax(Cond, CondUse);
2166 if (!DT.
dominates(ExitingBlock, LatchBlock))
2171 if (LatchBlock != ExitingBlock)
2175 if (&*UI != CondUse &&
2179 const SCEV *
A = IU.getStride(*CondUse, L);
2180 const SCEV *
B = IU.getStride(*UI, L);
2181 if (!A || !B)
continue;
2191 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(B, A, SE))) {
2195 goto decline_post_inc;
2199 goto decline_post_inc;
2206 AccessTy.AddrSpace))
2207 goto decline_post_inc;
2212 AccessTy.AddrSpace))
2213 goto decline_post_inc;
2217 DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2229 Cond = cast<ICmpInst>(Cond->
clone());
2231 ExitingBlock->getInstList().insert(TermBr->
getIterator(), Cond);
2258 IVIncInsertPos = Inst;
2259 else if (BB != IVIncInsertPos->
getParent())
2266 bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2267 bool HasBaseReg, LSRUse::KindType
Kind,
2268 MemAccessTy AccessTy) {
2269 int64_t NewMinOffset = LU.MinOffset;
2270 int64_t NewMaxOffset = LU.MaxOffset;
2271 MemAccessTy NewAccessTy = AccessTy;
2276 if (LU.Kind != Kind)
2283 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2284 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2285 AccessTy.AddrSpace);
2290 if (NewOffset < LU.MinOffset) {
2292 LU.MaxOffset - NewOffset, HasBaseReg))
2294 NewMinOffset = NewOffset;
2295 }
else if (NewOffset > LU.MaxOffset) {
2297 NewOffset - LU.MinOffset, HasBaseReg))
2299 NewMaxOffset = NewOffset;
2303 LU.MinOffset = NewMinOffset;
2304 LU.MaxOffset = NewMaxOffset;
2305 LU.AccessTy = NewAccessTy;
2312 std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2313 LSRUse::KindType Kind,
2314 MemAccessTy AccessTy) {
2325 std::pair<UseMapTy::iterator, bool>
P =
2329 size_t LUIdx = P.first->second;
2330 LSRUse &LU = Uses[LUIdx];
2331 if (reconcileNewOffset(LU, Offset,
true, Kind, AccessTy))
2333 return std::make_pair(LUIdx, Offset);
2337 size_t LUIdx = Uses.size();
2338 P.first->second = LUIdx;
2339 Uses.push_back(LSRUse(Kind, AccessTy));
2340 LSRUse &LU = Uses[LUIdx];
2344 return std::make_pair(LUIdx, Offset);
2348 void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2349 if (&LU != &Uses.back())
2354 RegUses.swapAndDropUse(LUIdx, Uses.size());
2360 LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2361 const LSRUse &OrigLU) {
2363 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2364 LSRUse &LU = Uses[LUIdx];
2370 if (&LU != &OrigLU &&
2371 LU.Kind != LSRUse::ICmpZero &&
2372 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2373 LU.WidestFixupType == OrigLU.WidestFixupType &&
2374 LU.HasFormulaWithSameRegs(OrigF)) {
2376 for (
const Formula &F : LU.Formulae) {
2379 if (F.BaseRegs == OrigF.BaseRegs &&
2380 F.ScaledReg == OrigF.ScaledReg &&
2381 F.BaseGV == OrigF.BaseGV &&
2382 F.Scale == OrigF.Scale &&
2383 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2384 if (F.BaseOffset == 0)
2399 void LSRInstance::CollectInterestingTypesAndFactors() {
2405 const SCEV *Expr = IU.getExpr(U);
2421 }
while (!Worklist.
empty());
2426 I = Strides.
begin(),
E = Strides.
end(); I !=
E; ++
I)
2428 std::next(I); NewStrideIter !=
E; ++NewStrideIter) {
2429 const SCEV *OldStride = *
I;
2430 const SCEV *NewStride = *NewStrideIter;
2441 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2443 if (Factor->getAPInt().getMinSignedBits() <= 64)
2444 Factors.insert(Factor->getAPInt().getSExtValue());
2449 if (Factor->getAPInt().getMinSignedBits() <= 64)
2450 Factors.insert(Factor->getAPInt().getSExtValue());
2456 if (Types.size() == 1)
2468 for(; OI != OE; ++OI) {
2469 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2474 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2486 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2487 return Trunc->getOperand(0);
2515 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2517 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2519 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2525 for (std::reverse_iterator<SCEVAddExpr::op_iterator>
I(Add->
op_end()),
2527 const SCEV *SubExpr = *
I;
2537 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2546 bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2547 const SCEV *IncExpr,
2555 if (!isa<SCEVConstant>(IncExpr)) {
2557 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2581 if (!Chain.hasIncs())
2584 if (!Users.
empty()) {
2585 DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2587 dbgs() <<
" " << *Inst <<
"\n";
2591 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2599 if (isa<PHINode>(Chain.tailUserInst())
2600 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2603 const SCEV *LastIncExpr =
nullptr;
2604 unsigned NumConstIncrements = 0;
2605 unsigned NumVarIncrements = 0;
2606 unsigned NumReusedIncrements = 0;
2607 for (
const IVInc &Inc : Chain) {
2608 if (Inc.IncExpr->isZero())
2613 if (isa<SCEVConstant>(Inc.IncExpr)) {
2614 ++NumConstIncrements;
2618 if (Inc.IncExpr == LastIncExpr)
2619 ++NumReusedIncrements;
2623 LastIncExpr = Inc.IncExpr;
2628 if (NumConstIncrements > 1)
2635 cost += NumVarIncrements;
2639 cost -= NumReusedIncrements;
2641 DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2658 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2659 const SCEV *LastIncExpr =
nullptr;
2660 for (; ChainIdx < NChains; ++ChainIdx) {
2661 IVChain &Chain = IVChainVec[ChainIdx];
2675 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2684 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2685 LastIncExpr = IncExpr;
2691 if (ChainIdx == NChains) {
2692 if (isa<PHINode>(UserInst))
2698 LastIncExpr = OperExpr;
2702 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2705 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2707 ChainUsersVec.
resize(NChains);
2708 DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
2709 <<
") IV=" << *LastIncExpr <<
"\n");
2711 DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
2712 <<
") IV+" << *LastIncExpr <<
"\n");
2714 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2716 IVChain &Chain = IVChainVec[ChainIdx];
2720 if (!LastIncExpr->
isZero()) {
2721 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
2739 for( ; IncIter != IncEnd; ++IncIter) {
2740 if (IncIter->UserInst == OtherUse)
2743 if (IncIter != IncEnd)
2747 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
2748 && IU.isIVUserOrOperand(OtherUse)) {
2751 NearUsers.
insert(OtherUse);
2756 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
2781 void LSRInstance::CollectChains() {
2782 DEBUG(
dbgs() <<
"Collecting IV Chains.\n");
2788 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2797 if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
2807 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
2808 ChainIdx < NChains; ++ChainIdx) {
2809 ChainUsersVec[ChainIdx].NearUsers.
erase(&I);
2815 while (IVOpIter != IVOpEnd) {
2816 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2817 if (UniqueOperands.
insert(IVOpInst).second)
2818 ChainInstruction(&I, IVOpInst, ChainUsersVec);
2819 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2832 ChainInstruction(PN, IncV, ChainUsersVec);
2835 unsigned ChainIdx = 0;
2836 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
2837 UsersIdx < NChains; ++UsersIdx) {
2839 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
2842 if (ChainIdx != UsersIdx)
2843 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2844 FinalizeChain(IVChainVec[ChainIdx]);
2847 IVChainVec.resize(ChainIdx);
2850 void LSRInstance::FinalizeChain(IVChain &Chain) {
2851 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2852 DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
2854 for (
const IVInc &Inc : Chain) {
2855 DEBUG(
dbgs() <<
" Inc: " << *Inc.UserInst <<
"\n");
2856 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
2857 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
2858 IVIncSet.insert(UseI);
2887 const IVInc &Head = Chain.Incs[0];
2892 Value *IVSrc =
nullptr;
2893 while (IVOpIter != IVOpEnd) {
2904 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
2905 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
2908 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2910 if (IVOpIter == IVOpEnd) {
2912 DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
2916 DEBUG(
dbgs() <<
"Generate chain at: " << *IVSrc <<
"\n");
2917 Type *IVTy = IVSrc->getType();
2919 const SCEV *LeftOverExpr =
nullptr;
2920 for (
const IVInc &Inc : Chain) {
2922 if (isa<PHINode>(InsertPt))
2923 InsertPt = L->getLoopLatch()->getTerminator();
2927 Value *IVOper = IVSrc;
2928 if (!Inc.IncExpr->isZero()) {
2932 LeftOverExpr = LeftOverExpr ?
2933 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
2935 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
2941 IVOper = Rewriter.
expandCodeFor(IVOperExpr, IVTy, InsertPt);
2945 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
2947 LeftOverExpr =
nullptr;
2950 Type *OperTy = Inc.IVOperand->getType();
2951 if (IVTy != OperTy) {
2953 "cannot extend a chained IV");
2955 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
2957 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
2962 if (isa<PHINode>(Chain.tailUserInst())) {
2971 Value *IVOper = IVSrc;
2973 if (IVTy != PostIncTy) {
2975 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
2977 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
2985 void LSRInstance::CollectFixupsAndInitialFormulae() {
2990 find(UserInst->
operands(), U.getOperandValToReplace());
2991 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
2992 if (IVIncSet.count(UseI))
2996 MemAccessTy AccessTy;
2997 if (
isAddressUse(UserInst, U.getOperandValToReplace())) {
3002 const SCEV *S = IU.getExpr(U);
3011 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
3012 if (CI->isEquality()) {
3015 Value *
NV = CI->getOperand(1);
3016 if (NV == U.getOperandValToReplace()) {
3017 CI->setOperand(1, CI->getOperand(0));
3018 CI->setOperand(0, NV);
3019 NV = CI->getOperand(1);
3029 TmpPostIncLoops, SE, DT);
3030 Kind = LSRUse::ICmpZero;
3036 for (
size_t i = 0, e = Factors.size(); i != e; ++
i)
3037 if (Factors[i] != -1)
3038 Factors.insert(-(uint64_t)Factors[i]);
3043 std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3044 size_t LUIdx = P.first;
3045 int64_t Offset = P.second;
3046 LSRUse &LU = Uses[LUIdx];
3049 LSRFixup &LF = LU.getNewFixup();
3050 LF.UserInst = UserInst;
3051 LF.OperandValToReplace = U.getOperandValToReplace();
3052 LF.PostIncLoops = TmpPostIncLoops;
3054 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3056 if (!LU.WidestFixupType ||
3059 LU.WidestFixupType = LF.OperandValToReplace->getType();
3062 if (LU.Formulae.empty()) {
3063 InsertInitialFormula(S, LU, LUIdx);
3064 CountRegisters(LU.Formulae.back(), LUIdx);
3074 LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx) {
3077 LU.RigidFormula =
true;
3080 F.initialMatch(S, L, SE);
3081 bool Inserted = InsertFormula(LU, LUIdx, F);
3082 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3088 LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3089 LSRUse &LU,
size_t LUIdx) {
3091 F.BaseRegs.push_back(S);
3092 F.HasBaseReg =
true;
3093 bool Inserted = InsertFormula(LU, LUIdx, F);
3094 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3098 void LSRInstance::CountRegisters(
const Formula &F,
size_t LUIdx) {
3100 RegUses.countRegister(F.ScaledReg, LUIdx);
3101 for (
const SCEV *BaseReg : F.BaseRegs)
3102 RegUses.countRegister(BaseReg, LUIdx);
3107 bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &F) {
3109 assert(
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3110 "Formula is illegal");
3111 if (!LU.InsertFormula(F))
3114 CountRegisters(F, LUIdx);
3124 LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3128 while (!Worklist.
empty()) {
3132 if (!Visited.
insert(S).second)
3136 Worklist.
append(N->op_begin(), N->op_end());
3137 else if (
const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3139 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3142 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3143 const Value *V = US->getValue();
3144 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3146 if (L->contains(Inst))
continue;
3147 }
else if (isa<UndefValue>(V))
3150 for (
const Use &U : V->
uses()) {
3160 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3162 cast<PHINode>(UserInst)->getIncomingBlock(
3163 PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3164 if (!DT.
dominates(L->getHeader(), UseBB))
3175 const SCEV *UserS = SE.
getSCEV(const_cast<Instruction *>(UserInst));
3177 if (!isa<SCEVUnknown>(UserS))
3181 SE.
getUnknown(const_cast<Instruction *>(UserInst)));
3186 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3187 unsigned OtherIdx = !U.getOperandNo();
3188 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3193 std::pair<size_t, int64_t> P = getUse(
3195 size_t LUIdx = P.first;
3196 int64_t Offset = P.second;
3197 LSRUse &LU = Uses[LUIdx];
3198 LSRFixup &LF = LU.getNewFixup();
3199 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3200 LF.OperandValToReplace = U;
3202 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3203 if (!LU.WidestFixupType ||
3206 LU.WidestFixupType = LF.OperandValToReplace->getType();
3207 InsertSupplementalFormula(US, LU, LUIdx);
3208 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3224 unsigned Depth = 0) {
3231 for (
const SCEV *S :
Add->operands()) {
3237 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3246 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3247 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3248 Remainder =
nullptr;
3252 Remainder = SE.getConstant(AR->
getType(), 0);
3253 return SE.getAddRecExpr(Remainder,
3259 }
else if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3261 if (Mul->getNumOperands() != 2)
3264 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3265 C = C ? cast<SCEVConstant>(SE.
getMulExpr(C, Op0)) : Op0;
3266 const SCEV *Remainder =
3269 Ops.push_back(SE.getMulExpr(C, Remainder));
3277 void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3278 const Formula &Base,
3279 unsigned Depth,
size_t Idx,
3281 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3287 if (AddOps.
size() == 1)
3302 LU.AccessTy, *J, Base.getNumRegs() > 1))
3308 InnerAddOps.append(std::next(J),
3313 if (InnerAddOps.size() == 1 &&
3315 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3331 F.ScaledReg =
nullptr;
3333 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3334 }
else if (IsScaledReg)
3335 F.ScaledReg = InnerSum;
3337 F.BaseRegs[Idx] = InnerSum;
3347 F.BaseRegs.push_back(*J);
3352 if (InsertFormula(LU, LUIdx, F))
3355 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3360 void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3361 Formula Base,
unsigned Depth) {
3362 assert(Base.isCanonical() &&
"Input must be in the canonical form");
3367 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++
i)
3368 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3370 if (Base.Scale == 1)
3371 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3377 void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3380 if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3389 for (
const SCEV *BaseReg : Base.BaseRegs) {
3394 F.BaseRegs.push_back(BaseReg);
3396 if (Ops.
size() > 1) {
3402 F.BaseRegs.push_back(Sum);
3404 (void)InsertFormula(LU, LUIdx, F);
3410 void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3411 const Formula &Base,
size_t Idx,
3413 const SCEV *
G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3419 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3424 F.BaseRegs[Idx] =
G;
3425 (void)InsertFormula(LU, LUIdx, F);
3429 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3432 if (Base.BaseGV)
return;
3434 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++
i)
3435 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3436 if (Base.Scale == 1)
3437 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,
3442 void LSRInstance::GenerateConstantOffsetsImpl(
3443 LSRUse &LU,
unsigned LUIdx,
const Formula &Base,
3445 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3446 for (int64_t Offset : Worklist) {
3448 F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3449 if (
isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
3457 F.ScaledReg =
nullptr;
3459 F.deleteBaseReg(F.BaseRegs[Idx]);
3461 }
else if (IsScaledReg)
3464 F.BaseRegs[Idx] = NewG;
3466 (void)InsertFormula(LU, LUIdx, F);
3471 if (G->
isZero() || Imm == 0)
3474 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3475 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3480 F.BaseRegs[Idx] =
G;
3481 (void)InsertFormula(LU, LUIdx, F);
3485 void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3491 if (LU.MaxOffset != LU.MinOffset)
3494 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++
i)
3495 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3496 if (Base.Scale == 1)
3497 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,
3503 void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3505 if (LU.Kind != LSRUse::ICmpZero)
return;
3508 Type *IntTy = Base.getType();
3513 if (LU.MinOffset != LU.MaxOffset)
return;
3515 assert(!Base.BaseGV &&
"ICmpZero use is not legal!");
3518 for (int64_t Factor : Factors) {
3520 if (Base.BaseOffset == INT64_MIN && Factor == -1)
3522 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3523 if (NewBaseOffset / Factor != Base.BaseOffset)
3527 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3531 int64_t Offset = LU.MinOffset;
3532 if (Offset == INT64_MIN && Factor == -1)
3534 Offset = (uint64_t)Offset * Factor;
3535 if (Offset / Factor != LU.MinOffset)
3539 !ConstantInt::isValueValidForType(IntTy, Offset))
3543 F.BaseOffset = NewBaseOffset;
3546 if (!
isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3550 F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3555 for (
size_t i = 0, e = F.BaseRegs.size(); i != e; ++
i) {
3556 F.BaseRegs[
i] = SE.
getMulExpr(F.BaseRegs[i], FactorS);
3557 if (
getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3563 F.ScaledReg = SE.
getMulExpr(F.ScaledReg, FactorS);
3564 if (
getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3569 if (F.UnfoldedOffset != 0) {
3570 if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
3572 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3573 if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3577 !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3582 (void)InsertFormula(LU, LUIdx, F);
3589 void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base) {
3591 Type *IntTy = Base.getType();
3596 if (Base.Scale != 0 && !Base.unscale())
3599 assert(Base.Scale == 0 &&
"unscale did not did its job!");
3602 for (int64_t Factor : Factors) {
3603 Base.Scale = Factor;
3604 Base.HasBaseReg = Base.BaseRegs.size() > 1;
3606 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3611 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3612 LU.AccessTy, Base) &&
3613 LU.AllFixupsOutsideLoop)
3614 LU.Kind = LSRUse::Special;
3620 if (LU.Kind == LSRUse::ICmpZero &&
3621 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3624 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++
i)
3626 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
3635 F.ScaledReg = Quotient;
3636 F.deleteBaseReg(F.BaseRegs[i]);
3640 if (F.Scale == 1 && F.BaseRegs.empty())
3642 (void)InsertFormula(LU, LUIdx, F);
3649 void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base) {
3651 if (Base.BaseGV)
return;
3654 Type *DstTy = Base.getType();
3658 for (
Type *SrcTy : Types) {
3663 for (
const SCEV *&BaseReg : F.BaseRegs)
3668 if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3671 (void)InsertFormula(LU, LUIdx, F);
3684 const SCEV *OrigReg;
3686 WorkItem(
size_t LI, int64_t I,
const SCEV *R)
3687 : LUIdx(LI), Imm(I), OrigReg(R) {}
3696 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
3697 <<
" , add offset " << Imm;
3707 void LSRInstance::GenerateCrossUseConstantOffsets() {
3709 typedef std::map<int64_t, const SCEV *> ImmMapTy;
3713 for (
const SCEV *
Use : RegUses) {
3716 auto Pair = Map.
insert(std::make_pair(Reg, ImmMapTy()));
3719 Pair.first->second.insert(std::make_pair(Imm,
Use));
3720 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
3728 for (
const SCEV *Reg : Sequence) {
3729 const ImmMapTy &Imms = Map.
find(Reg)->second;
3732 if (Imms.size() == 1)
3735 DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
3736 for (
const auto &Entry : Imms)
3737 dbgs() <<
' ' << Entry.first;
3741 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3743 const SCEV *OrigReg = J->second;
3745 int64_t JImm = J->first;
3746 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3748 if (!isa<SCEVConstant>(OrigReg) &&
3749 UsedByIndicesMap[Reg].
count() == 1) {
3750 DEBUG(
dbgs() <<
"Skipping cross-use reuse for " << *OrigReg <<
'\n');
3756 ImmMapTy::const_iterator OtherImms[] = {
3757 Imms.begin(), std::prev(Imms.end()),
3758 Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3762 ImmMapTy::const_iterator M = OtherImms[
i];
3763 if (M == J || M == JE)
continue;
3766 int64_t Imm = (uint64_t)JImm - M->first;
3770 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
3771 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
3778 UsedByIndicesMap.
clear();
3779 UniqueItems.
clear();
3782 for (
const WorkItem &WI : WorkItems) {
3783 size_t LUIdx = WI.LUIdx;
3784 LSRUse &LU = Uses[LUIdx];
3785 int64_t Imm = WI.Imm;
3786 const SCEV *OrigReg = WI.OrigReg;
3789 const SCEV *NegImmS = SE.
getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3793 for (
size_t L = 0,
LE = LU.Formulae.size(); L !=
LE; ++
L) {
3794 Formula F = LU.Formulae[
L];
3801 if (F.ScaledReg == OrigReg) {
3802 int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
3804 if (F.referencesReg(SE.
getSCEV(
3805 ConstantInt::get(IntTy, -(uint64_t)Offset))))
3808 NewF.BaseOffset =
Offset;
3809 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3812 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
3817 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
3819 (C->getAPInt().abs() *
APInt(BitWidth, F.Scale))
3824 NewF.canonicalize();
3825 (void)InsertFormula(LU, LUIdx, NewF);
3828 for (
size_t N = 0,
NE = F.BaseRegs.size(); N !=
NE; ++
N) {
3829 const SCEV *BaseReg = F.BaseRegs[
N];
3830 if (BaseReg != OrigReg)
3833 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
3835 LU.Kind, LU.AccessTy, NewF)) {
3839 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3841 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
3846 for (
const SCEV *NewReg : NewF.BaseRegs)
3847 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
3848 if ((C->getAPInt() + NewF.BaseOffset)
3852 countTrailingZeros<uint64_t>(NewF.BaseOffset))
3856 NewF.canonicalize();
3857 (void)InsertFormula(LU, LUIdx, NewF);
3868 LSRInstance::GenerateAllReuseFormulae() {
3871 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3872 LSRUse &LU = Uses[LUIdx];
3873 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++
i)
3874 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
3875 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++
i)
3876 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
3878 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3879 LSRUse &LU = Uses[LUIdx];
3880 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++
i)
3881 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
3882 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++
i)
3883 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
3884 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++
i)
3885 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
3886 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++
i)
3887 GenerateScales(LU, LUIdx, LU.Formulae[i]);
3889 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3890 LSRUse &LU = Uses[LUIdx];
3891 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++
i)
3892 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
3895 GenerateCrossUseConstantOffsets();
3898 "After generating reuse formulae:\n";
3899 print_uses(
dbgs()));
3904 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
3909 bool ChangedFormulae =
false;
3916 BestFormulaeTy BestFormulae;
3918 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3919 LSRUse &LU = Uses[LUIdx];
3923 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
3924 FIdx != NumForms; ++FIdx) {
3925 Formula &F = LU.Formulae[FIdx];
3936 CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
3937 if (CostF.isLoser()) {
3949 for (
const SCEV *Reg : F.BaseRegs) {
3950 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
3954 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
3960 std::pair<BestFormulaeTy::const_iterator, bool> P =
3961 BestFormulae.insert(std::make_pair(Key, FIdx));
3965 Formula &Best = LU.Formulae[P.first->second];
3969 CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
3970 if (CostF < CostBest)
3974 " in favor of formula "; Best.print(
dbgs());
3978 ChangedFormulae =
true;
3980 LU.DeleteFormula(F);
3988 LU.RecomputeRegs(LUIdx, RegUses);
3991 BestFormulae.clear();
3994 DEBUG(
if (ChangedFormulae) {
3996 "After filtering out undesirable candidates:\n";
4007 size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4009 for (
const LSRUse &LU : Uses) {
4010 size_t FSize = LU.Formulae.size();
4011 if (FSize >= ComplexityLimit) {
4016 if (Power >= ComplexityLimit)
4025 void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4026 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4027 DEBUG(
dbgs() <<
"The search space is too complex.\n");
4029 DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4030 "which use a superset of registers used by other "
4033 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4034 LSRUse &LU = Uses[LUIdx];
4036 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++
i) {
4037 Formula &F = LU.Formulae[
i];
4042 I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I !=
E; ++
I) {
4043 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4046 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4047 (I - F.BaseRegs.begin()));
4048 if (LU.HasFormulaWithSameRegs(NewF)) {
4050 LU.DeleteFormula(F);
4056 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4057 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4061 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4062 (I - F.BaseRegs.begin()));
4063 if (LU.HasFormulaWithSameRegs(NewF)) {
4066 LU.DeleteFormula(F);
4077 LU.RecomputeRegs(LUIdx, RegUses);
4080 DEBUG(
dbgs() <<
"After pre-selection:\n";
4081 print_uses(
dbgs()));
4087 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4088 if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4091 DEBUG(
dbgs() <<
"The search space is too complex.\n"
4092 "Narrowing the search space by assuming that uses separated "
4093 "by a constant offset will use the same registers.\n");
4097 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4098 LSRUse &LU = Uses[LUIdx];
4099 for (
const Formula &F : LU.Formulae) {
4100 if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4103 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4107 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset,
false,
4108 LU.Kind, LU.AccessTy))
4113 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4116 for (LSRFixup &Fixup : LU.Fixups) {
4117 Fixup.Offset += F.BaseOffset;
4118 LUThatHas->pushFixup(Fixup);
4119 DEBUG(
dbgs() <<
"New fixup has offset " << Fixup.Offset <<
'\n');
4124 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++
i) {
4125 Formula &F = LUThatHas->Formulae[
i];
4126 if (!
isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4127 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4130 LUThatHas->DeleteFormula(F);
4138 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4141 DeleteUse(LU, LUIdx);
4148 DEBUG(
dbgs() <<
"After pre-selection:\n"; print_uses(
dbgs()));
4154 void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4155 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4156 DEBUG(
dbgs() <<
"The search space is too complex.\n");
4158 DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4159 "undesirable dedicated registers.\n");
4161 FilterOutUndesirableDedicatedRegisters();
4163 DEBUG(
dbgs() <<
"After pre-selection:\n";
4164 print_uses(
dbgs()));
4171 void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4175 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4178 DEBUG(
dbgs() <<
"The search space is too complex.\n");
4182 const SCEV *Best =
nullptr;
4183 unsigned BestNum = 0;
4184 for (
const SCEV *Reg : RegUses) {
4185 if (Taken.
count(Reg))
4189 BestNum = RegUses.getUsedByIndices(Reg).count();
4191 unsigned Count = RegUses.getUsedByIndices(Reg).count();
4192 if (Count > BestNum) {
4199 DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
4200 <<
" will yield profitable reuse.\n");
4205 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4206 LSRUse &LU = Uses[LUIdx];
4207 if (!LU.Regs.count(Best))
continue;
4210 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++
i) {
4211 Formula &F = LU.Formulae[
i];
4212 if (!F.referencesReg(Best)) {
4214 LU.DeleteFormula(F);
4218 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
4224 LU.RecomputeRegs(LUIdx, RegUses);
4227 DEBUG(
dbgs() <<
"After pre-selection:\n";
4228 print_uses(
dbgs()));
4236 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4237 NarrowSearchSpaceByDetectingSupersets();
4238 NarrowSearchSpaceByCollapsingUnrolledCode();
4239 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4240 NarrowSearchSpaceByPickingWinnerRegs();
4247 const Cost &CurCost,
4260 const LSRUse &LU = Uses[Workspace.
size()];
4267 for (
const SCEV *S : CurRegs)
4268 if (LU.Regs.count(S))
4273 for (
const Formula &F : LU.Formulae) {
4277 int NumReqRegsToFind =
std::min(F.getNumRegs(), ReqRegs.
size());
4278 for (
const SCEV *Reg : ReqRegs) {
4279 if ((F.ScaledReg && F.ScaledReg == Reg) ||
4282 if (NumReqRegsToFind == 0)
4286 if (NumReqRegsToFind != 0) {
4296 NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
4297 if (NewCost < SolutionCost) {
4299 if (Workspace.
size() != Uses.size()) {
4300 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4301 NewRegs, VisitedRegs);
4302 if (F.getNumRegs() == 1 && Workspace.
size() == 1)
4303 VisitedRegs.
insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4306 dbgs() <<
".\n Regs:";
4307 for (
const SCEV *S : NewRegs)
4308 dbgs() <<
' ' << *S;
4311 SolutionCost = NewCost;
4312 Solution = Workspace;
4324 SolutionCost.Lose();
4328 Workspace.
reserve(Uses.size());
4331 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4332 CurRegs, VisitedRegs);
4333 if (Solution.
empty()) {
4334 DEBUG(
dbgs() <<
"\nNo Satisfactory Solution\n");
4340 "The chosen solution requires "; SolutionCost.print(
dbgs());
4342 for (
size_t i = 0, e = Uses.size(); i != e; ++
i) {
4344 Uses[
i].print(
dbgs());
4347 Solution[
i]->print(
dbgs());
4351 assert(Solution.
size() == Uses.size() &&
"Malformed solution!");
4363 bool AllDominate =
true;
4367 if (isa<CatchSwitchInst>(Tentative))
4371 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
4372 AllDominate =
false;
4378 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
4388 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4389 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
4393 if (!Rung)
return IP;
4394 Rung = Rung->getIDom();
4395 if (!Rung)
return IP;
4396 IDom = Rung->getBlock();
4399 const Loop *IDomLoop = LI.getLoopFor(IDom);
4400 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
4401 if (IDomDepth <= IPLoopDepth &&
4402 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4423 if (
Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4425 if (LU.Kind == LSRUse::ICmpZero)
4427 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4429 if (LF.PostIncLoops.count(L)) {
4430 if (LF.isUseFullyOutsideLoop(L))
4431 Inputs.
push_back(L->getLoopLatch()->getTerminator());
4437 for (
const Loop *PIL : LF.PostIncLoops) {
4438 if (PIL == L)
continue;
4443 if (!ExitingBlocks.
empty()) {
4445 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++
i)
4451 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
4452 && !isa<DbgInfoIntrinsic>(LowestIP) &&
4453 "Insertion point must be a normal instruction");
4460 while (isa<PHINode>(IP)) ++IP;
4463 while (IP->isEHPad()) ++IP;
4466 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
4479 Value *LSRInstance::Expand(
const LSRUse &LU,
4485 if (LU.RigidFormula)
4486 return LF.OperandValToReplace;
4490 IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
4498 Type *OpTy = LF.OperandValToReplace->getType();
4500 Type *Ty = F.getType();
4514 for (
const SCEV *Reg : F.BaseRegs) {
4515 assert(!Reg->
isZero() &&
"Zero allocated in a base register!");
4520 LF.UserInst, LF.OperandValToReplace,
4527 Value *ICmpScaledV =
nullptr;
4529 const SCEV *ScaledS = F.ScaledReg;
4534 LF.UserInst, LF.OperandValToReplace,
4537 if (LU.Kind == LSRUse::ICmpZero) {
4547 "The only scale supported by ICmpZero uses is -1!");
4590 int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
4592 if (LU.Kind == LSRUse::ICmpZero) {
4596 ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
4599 ICmpScaledV = ConstantInt::get(IntTy, Offset);
4609 int64_t UnfoldedOffset = F.UnfoldedOffset;
4610 if (UnfoldedOffset != 0) {
4628 if (LU.Kind == LSRUse::ICmpZero) {
4629 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4631 assert(!F.BaseGV &&
"ICmp does not support folding a global value and "
4632 "a scale at the same time!");
4633 if (F.Scale == -1) {
4634 if (ICmpScaledV->
getType() != OpTy) {
4636 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV,
false,
4638 ICmpScaledV, OpTy,
"tmp", CI);
4645 assert((F.Scale == 0 || F.Scale == 1) &&
4646 "ICmp does not support folding a global value and "
4647 "a scale at the same time!");
4651 C = ConstantExpr::getCast(CastInst::getCastOpcode(C,
false,
4665 void LSRInstance::RewriteForPHI(
PHINode *PN,
4684 Loop *PNLoop = LI.getLoopFor(Parent);
4685 if (!PNLoop || Parent != PNLoop->
getHeader()) {
4691 .setMergeIdenticalEdges()
4692 .setDontDeleteUselessPHIs());
4705 if (L->contains(BB) && !L->contains(PN))
4716 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
4717 Inserted.
insert(std::make_pair(BB, static_cast<Value *>(
nullptr)));
4725 Type *OpTy = LF.OperandValToReplace->getType();
4728 CastInst::Create(CastInst::getCastOpcode(FullV,
false,
4730 FullV, LF.OperandValToReplace->getType(),
4734 Pair.first->second = FullV;
4742 void LSRInstance::Rewrite(
const LSRUse &LU,
4749 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
4750 RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
4753 Expand(LU, LF, F, LF.UserInst->getIterator(),
Rewriter, DeadInsts);
4756 Type *OpTy = LF.OperandValToReplace->getType();
4757 if (FullV->
getType() != OpTy) {
4759 CastInst::Create(CastInst::getCastOpcode(FullV,
false, OpTy,
false),
4760 FullV, OpTy,
"tmp", LF.UserInst);
4769 if (LU.Kind == LSRUse::ICmpZero)
4770 LF.UserInst->setOperand(0, FullV);
4772 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
4780 void LSRInstance::ImplementSolution(
4796 for (
const IVChain &Chain : IVChainVec) {
4797 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
4802 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
4803 for (
const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
4804 Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
4808 for (
const IVChain &Chain : IVChainVec) {
4809 GenerateIVChain(Chain, Rewriter, DeadInsts);
4822 : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(
false),
4823 IVIncInsertPos(nullptr) {
4829 if (IU.
empty())
return;
4833 unsigned NumUsers = 0;
4837 DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U <<
"\n");
4843 if (
auto *PN = dyn_cast<PHINode>(U.getUser())) {
4845 if (isa<FuncletPadInst>(FirstNonPHI) ||
4846 isa<CatchSwitchInst>(FirstNonPHI))
4848 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
4861 Rung; Rung = Rung->getIDom()) {
4864 if (DomLoop && DomLoop->
getHeader() == BB) {
4876 OptimizeLoopTermCond();
4879 if (IU.empty())
return;
4883 DEBUG(
dbgs() <<
"LSR skipping outer loop " << *L <<
"\n");
4889 CollectInterestingTypesAndFactors();
4890 CollectFixupsAndInitialFormulae();
4891 CollectLoopInvariantFixupsAndFormulae();
4893 assert(!Uses.empty() &&
"IVUsers reported at least one use");
4894 DEBUG(
dbgs() <<
"LSR found " << Uses.size() <<
" uses:\n";
4895 print_uses(
dbgs()));
4899 GenerateAllReuseFormulae();
4901 FilterOutUndesirableDedicatedRegisters();
4902 NarrowSearchSpaceUsingHeuristics();
4912 if (Solution.
empty())
4917 for (
const LSRUse &LU : Uses) {
4918 for (
const Formula &F : LU.Formulae)
4920 F) &&
"Illegal formula generated!");
4925 ImplementSolution(Solution);
4928 void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
4929 if (Factors.empty() && Types.empty())
return;
4931 OS <<
"LSR has identified the following interesting factors and types: ";
4934 for (int64_t Factor : Factors) {
4935 if (!First) OS <<
", ";
4937 OS <<
'*' << Factor;
4940 for (
Type *Ty : Types) {
4941 if (!First) OS <<
", ";
4943 OS <<
'(' << *Ty <<
')';
4948 void LSRInstance::print_fixups(
raw_ostream &OS)
const {
4949 OS <<
"LSR is examining the following fixup sites:\n";
4950 for (
const LSRUse &LU : Uses)
4951 for (
const LSRFixup &LF : LU.Fixups) {
4958 void LSRInstance::print_uses(
raw_ostream &OS)
const {
4959 OS <<
"LSR is examining the following uses:\n";
4960 for (
const LSRUse &LU : Uses) {
4964 for (
const Formula &F : LU.Formulae) {
4973 print_factors_and_types(OS);
4985 class LoopStrengthReduce :
public LoopPass {
4989 LoopStrengthReduce();
4998 LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
5002 void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
5025 bool Changed =
false;
5028 Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
5053 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
5054 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5055 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5056 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5057 const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5074 "Loop Strength Reduction",
false,
false)
Pass interface - Implemented by all 'passes'.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
iterator_range< use_iterator > uses()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const SCEV * TransformForPostIncUse(TransformKind Kind, const SCEV *S, Instruction *User, Value *OperandValToReplace, PostIncLoopSet &Loops, ScalarEvolution &SE, DominatorTree &DT)
TransformForPostIncUse - Transform the given expression according to the given transformation kind...
DiagnosticInfoOptimizationBase::Argument NV
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
Pass * createLoopStrengthReducePass()
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
const SCEV * getConstant(ConstantInt *V)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
LLVMContext & getContext() const
bool isZero() const
Return true if the expression is a constant zero.
static const size_t ComplexityLimit
Implements a dense probed hash-table based set.
The main scalar evolution driver.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Denormalize - Perform the inverse transform on the expression with the given loop set...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for bail out...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void initializeLoopStrengthReducePass(PassRegistry &)
void setDebugType(const char *s)
const_iterator begin(StringRef path)
Get begin iterator over path.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
static void dump(StringRef Title, SpillInfo const &Spills)
const Function * getParent() const
Return the enclosing method, or null if none.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
An instruction for reading from memory.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
iv Induction Variable Users
void reserve(size_type N)
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal...
iterator end()
Get an iterator to the end of the SetVector.
size_type size() const
Determine the number of elements in the SetVector.
BlockT * getHeader() const
This is the base class for unary cast operator classes.
const SCEV * getStart() const
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool isNegative() const
Determine sign of this APInt.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
bool isUnconditional() const
This class represents the LLVM 'select' instruction.
Option class for critical edge splitting.
const APInt & getValue() const
Return the constant as an APInt value reference.
void clearPostInc()
Disable all post-inc expansion.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
A Use represents the edge between a Value definition and its users.
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
Instruction * getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Reg
All possible values of the reg field in the ModR/M byte.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
void setName(const Twine &Name)
Change the name of the value.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following: ...
op_iterator op_begin() const
This node represents multiplication of some number of SCEVs.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
This file implements a class to represent arbitrary precision integral constant values and operations...
LLVM_NODISCARD bool empty() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
bool insert(const value_type &X)
Insert a new element into the SetVector.
static bool isEqual(const Function &Caller, const Function &Callee)
This node represents a polynomial recurrence on the trip count of the specified loop.
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
iterator begin()
Get an iterator to the beginning of the SetVector.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Base class for the actual dominator tree node.
AnalysisUsage & addPreservedID(const void *ID)
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
This class represents a truncation of integer types.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
size_t getNumOperands() const
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Class to represent pointers.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
BasicBlock * SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions())
If this edge is a critical edge, insert a new node to split the critical edge.
static bool isCompatibleIVType(Value *LVal, Value *RVal)
Return true if we allow an IV chain to include both types.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
LLVM Basic Block Representation.
PointerIntPair - This class implements a pair of a pointer and small integer.
This class represents a binary unsigned division operation.
The instances of the Type class are immutable: once they are created, they are never changed...
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
This is an important class for using LLVM in a threaded context.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
Type * getType() const
Return the LLVM type of this SCEV expression.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Conditional or Unconditional Branch instruction.
This is an important base class in LLVM.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
const SCEV * getOperand(unsigned i) const
int64_t getSExtValue() const
Get sign extended value.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Normalize - Normalize according to the given loops.
std::pair< iterator, bool > insert(const ValueT &V)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
This instruction compares its operands according to the predicate given to the constructor.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value, and mutate S to point to a new SCEV with that value excluded.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Value * getOperand(unsigned i) const
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
self_iterator getIterator()
iterator_range< block_iterator > blocks()
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Predicate getPredicate() const
Return the predicate for this instruction.
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
LLVM_NODISCARD bool empty() const
const APInt & getAPInt() const
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
bool isPointerTy() const
True if this is an instance of PointerType.
iterator erase(const_iterator CI)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
TargetTransformInfo & TTI
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
void setChainedPhi(PHINode *PN)
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A SetVector that performs no allocations if smaller than a certain size.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
AnalysisUsage & addRequiredID(const void *ID)
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
void setIVIncInsertPos(const Loop *L, Instruction *Pos)
Set the current IV increment loop and position.
static Value * getWideOperand(Value *Oper)
IVChain logic must consistenctly peek base TruncInst operands, so wrap it in a convenient helper...
INITIALIZE_PASS(HexagonGenMux,"hexagon-mux","Hexagon generate mux instructions", false, false) void HexagonGenMux I isValid()
LLVM_NODISCARD T pop_back_val()
CHAIN = SC CHAIN, Imm128 - System call.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
ConstantInt * getValue() const
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
size_t size() const
Returns the number of bits in this bitvector.
Class for arbitrary precision integers.
This node represents an addition of some number of SCEVs.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This class represents a signed maximum selection.
static bool isAddressUse(Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
iterator_range< user_iterator > users()
This class uses information about analyze scalars to rewrite expressions in canonical form...
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
static void clear(coro::Shape &Shape)
loop Loop Strength Reduction
iterator insert(iterator I, T &&Elt)
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Virtual Register Rewriter
bool operator!=(uint64_t V1, const APInt &V2)
bool isAllOnesValue() const
Determine if all bits are set.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Value * getCondition() const
void emplace_back(ArgTypes &&...Args)
bool isMinSignedValue() const
Determine if this is the smallest signed value.
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
This class represents an analyzed expression in the program.
Analysis pass that exposes the IVUsers for a loop.
Represents a single loop in the control flow graph.
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
bool hasOneUse() const
Return true if there is exactly one user of this value.
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
INITIALIZE_PASS_BEGIN(LoopStrengthReduce,"loop-reduce","Loop Strength Reduction", false, false) INITIALIZE_PASS_END(LoopStrengthReduce
This class represents a cast unsigned integer to floating point.
static MemAccessTy getAccessType(const Instruction *Inst)
Return the type of the memory being accessed.
iterator find(const KeyT &Val)
This class represents an unsigned maximum selection.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
const Loop * getLoop() const
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
static bool DeleteTriviallyDeadInstructions(SmallVectorImpl< WeakVH > &DeadInsts)
If any of the instructions is the specified set are trivially dead, delete them and see if this makes...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSCEVType() const
bool operator<(int64_t V1, const APSInt &V2)
Module * getParent()
Get the module that this global value is contained inside of...
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
A vector that has set insertion semantics.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
IVStrideUse - Keep track of one use of a strided induction variable.
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
This class implements an extremely fast bulk output stream that can only output to a stream...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
const SCEV * getUnknown(Value *V)
The legacy pass manager's analysis pass to compute loop information.
op_iterator op_end() const
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
bool operator==(uint64_t V1, const APInt &V2)
DomTreeNodeBase< NodeT > * getNode(NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop. ...
This node is a base class providing common functionality for n'ary operators.
void setIncomingValue(unsigned i, Value *V)
void setPostInc(const PostIncLoopSet &L)
Enable post-inc expansion for addrecs referring to the given loops.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI)
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
unsigned getLoopDepth() const
Return the nesting level of this loop.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical multiply expression, or something simpler if possible.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const BasicBlock * getParent() const
InstListType::iterator iterator
Instruction iterators...
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
A wrapper class for inspecting calls to intrinsic functions.
This class represents a constant integer value.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.