81 #define DEBUG_TYPE "loop-reduce"
95 cl::desc(
"Enable LSR phi elimination"));
101 cl::desc(
"Stress test LSR IV chains"));
122 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
135 class RegUseTracker {
138 RegUsesTy RegUsesMap;
142 void CountRegister(
const SCEV *
Reg,
size_t LUIdx);
143 void DropRegister(
const SCEV *
Reg,
size_t LUIdx);
144 void SwapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
146 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
163 RegUseTracker::CountRegister(
const SCEV *
Reg,
size_t LUIdx) {
164 std::pair<RegUsesTy::iterator, bool> Pair =
165 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
166 RegSortData &RSD = Pair.first->second;
169 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
170 RSD.UsedByIndices.set(LUIdx);
174 RegUseTracker::DropRegister(
const SCEV *Reg,
size_t LUIdx) {
175 RegUsesTy::iterator It = RegUsesMap.find(Reg);
176 assert(It != RegUsesMap.end());
177 RegSortData &RSD = It->second;
178 assert(RSD.UsedByIndices.size() > LUIdx);
179 RSD.UsedByIndices.reset(LUIdx);
183 RegUseTracker::SwapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
184 assert(LUIdx <= LastLUIdx);
188 for (
auto &Pair : RegUsesMap) {
190 if (LUIdx < UsedByIndices.
size())
191 UsedByIndices[LUIdx] =
192 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] : 0;
198 RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
199 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
200 if (I == RegUsesMap.end())
204 if (i == -1)
return false;
205 if ((
size_t)i != LUIdx)
return true;
210 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
211 assert(I != RegUsesMap.end() &&
"Unknown register!");
212 return I->second.UsedByIndices;
215 void RegUseTracker::clear() {
252 const SCEV *ScaledReg;
257 int64_t UnfoldedOffset;
260 : BaseGV(nullptr), BaseOffset(0), HasBaseReg(
false), Scale(0),
261 ScaledReg(nullptr), UnfoldedOffset(0) {}
271 size_t getNumRegs()
const;
274 void DeleteBaseReg(
const SCEV *&S);
276 bool referencesReg(
const SCEV *S)
const;
277 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
278 const RegUseTracker &RegUses)
const;
298 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
299 for (
const SCEV *S : Add->operands())
306 if (!AR->getStart()->isZero()) {
309 AR->getStepRecurrence(SE),
317 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
318 if (Mul->getOperand(0)->isAllOnesValue()) {
327 for (
const SCEV *S : MyGood)
329 for (
const SCEV *S : MyBad)
349 BaseRegs.push_back(Sum);
355 BaseRegs.push_back(Sum);
366 return Scale != 1 || !BaseRegs.empty();
367 return BaseRegs.size() <= 1;
376 void Formula::Canonicalize() {
381 assert(!BaseRegs.empty() &&
"1*reg => reg, should not be needed.");
383 ScaledReg = BaseRegs.back();
386 size_t BaseRegsSize = BaseRegs.size();
389 while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
397 bool Formula::Unscale() {
401 BaseRegs.push_back(ScaledReg);
409 size_t Formula::getNumRegs()
const {
410 return !!ScaledReg + BaseRegs.size();
416 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
417 ScaledReg ? ScaledReg->getType() :
418 BaseGV ? BaseGV->getType() :
423 void Formula::DeleteBaseReg(
const SCEV *&S) {
424 if (&S != &BaseRegs.back())
430 bool Formula::referencesReg(
const SCEV *S)
const {
431 return S == ScaledReg ||
432 std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
437 bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
438 const RegUseTracker &RegUses)
const {
440 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
442 for (
const SCEV *BaseReg : BaseRegs)
443 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
451 if (!First) OS <<
" + ";
else First =
false;
452 BaseGV->printAsOperand(OS,
false);
454 if (BaseOffset != 0) {
455 if (!First) OS <<
" + ";
else First =
false;
458 for (
const SCEV *BaseReg : BaseRegs) {
459 if (!First) OS <<
" + ";
else First =
false;
460 OS <<
"reg(" << *BaseReg <<
')';
462 if (HasBaseReg && BaseRegs.empty()) {
463 if (!First) OS <<
" + ";
else First =
false;
464 OS <<
"**error: HasBaseReg**";
465 }
else if (!HasBaseReg && !BaseRegs.empty()) {
466 if (!First) OS <<
" + ";
else First =
false;
467 OS <<
"**error: !HasBaseReg**";
470 if (!First) OS <<
" + ";
else First =
false;
471 OS << Scale <<
"*reg(";
478 if (UnfoldedOffset != 0) {
479 if (!First) OS <<
" + ";
480 OS <<
"imm(" << UnfoldedOffset <<
')';
484 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
523 bool IgnoreSignificantBits =
false) {
542 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
545 const APInt &LA = C->getValue()->getValue();
547 if (LA.
srem(RA) != 0)
556 IgnoreSignificantBits);
557 if (!Step)
return nullptr;
559 IgnoreSignificantBits);
560 if (!Start)
return nullptr;
570 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
573 for (
const SCEV *S : Add->operands()) {
575 if (!Op)
return nullptr;
584 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
588 for (
const SCEV *S : Mul->operands()) {
591 IgnoreSignificantBits)) {
610 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
611 if (C->getValue()->getValue().getMinSignedBits() <= 64) {
613 return C->getValue()->getSExtValue();
615 }
else if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
621 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
637 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
638 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
642 }
else if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
648 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
663 bool isAddress = isa<LoadInst>(Inst);
665 if (
SI->getOperand(1) == OperandVal)
667 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
670 switch (II->getIntrinsicID()) {
673 case Intrinsic::x86_sse_storeu_ps:
674 case Intrinsic::x86_sse2_storeu_pd:
675 case Intrinsic::x86_sse2_storeu_dq:
676 case Intrinsic::x86_sse2_storel_dq:
677 if (II->getArgOperand(0) == OperandVal)
688 if (
const StoreInst *
SI = dyn_cast<StoreInst>(Inst))
689 AccessTy =
SI->getOperand(0)->getType();
690 else if (
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
693 switch (II->getIntrinsicID()) {
695 case Intrinsic::x86_sse_storeu_ps:
696 case Intrinsic::x86_sse2_storeu_pd:
697 case Intrinsic::x86_sse2_storeu_dq:
698 case Intrinsic::x86_sse2_storel_dq:
699 AccessTy = II->getArgOperand(0)->getType();
706 if (
PointerType *PTy = dyn_cast<PointerType>(AccessTy))
708 PTy->getAddressSpace());
754 if (!Processed.
insert(S).second)
757 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
758 for (
const SCEV *S : Add->operands()) {
765 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
766 if (Mul->getNumOperands() == 2) {
768 if (isa<SCEVConstant>(Mul->getOperand(0)))
773 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
774 Value *UVal = U->getValue();
778 if (UI && UI->
getOpcode() == Instruction::Mul &&
801 bool Changed =
false;
803 while (!DeadInsts.
empty()) {
838 const LSRUse &LU,
const Formula &
F);
841 const LSRUse &LU,
const Formula &
F);
852 unsigned NumBaseAdds;
859 : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
860 SetupCost(0), ScaleCost(0) {}
869 return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
870 | ImmCost | SetupCost | ScaleCost) != ~0u)
871 || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
872 & ImmCost & SetupCost & ScaleCost) == ~0u);
877 assert(isValid() &&
"invalid cost");
878 return NumRegs == ~0u;
895 void RateRegister(
const SCEV *Reg,
899 void RatePrimaryRegister(
const SCEV *Reg,
909 void Cost::RateRegister(
const SCEV *Reg,
918 if (AR->getLoop() != L) {
931 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
932 if (!Regs.
count(AR->getOperand(1))) {
933 RateRegister(AR->getOperand(1), Regs, L, SE, DT);
943 if (!isa<SCEVUnknown>(Reg) &&
944 !isa<SCEVConstant>(
Reg) &&
945 !(isa<SCEVAddRecExpr>(Reg) &&
946 (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(
Reg)->getStart()) ||
947 isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
950 NumIVMuls += isa<SCEVMulExpr>(
Reg) &&
957 void Cost::RatePrimaryRegister(
const SCEV *Reg,
962 if (LoserRegs && LoserRegs->
count(Reg)) {
966 if (Regs.
insert(Reg).second) {
967 RateRegister(Reg, Regs, L, SE, DT);
968 if (LoserRegs && isLoser())
982 assert(F.isCanonical() &&
"Cost is accurate only for canonical formula");
984 if (
const SCEV *ScaledReg = F.ScaledReg) {
985 if (VisitedRegs.
count(ScaledReg)) {
989 RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
993 for (
const SCEV *BaseReg : F.BaseRegs) {
994 if (VisitedRegs.
count(BaseReg)) {
998 RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
1004 size_t NumBaseParts = F.getNumRegs();
1005 if (NumBaseParts > 1)
1010 NumBaseAdds += (F.UnfoldedOffset != 0);
1016 for (int64_t O : Offsets) {
1017 int64_t Offset = (uint64_t)O + F.BaseOffset;
1021 else if (Offset != 0)
1024 assert(isValid() &&
"invalid cost");
1040 return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
1041 ImmCost, SetupCost) <
1042 std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
1043 Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
1048 OS << NumRegs <<
" reg" << (NumRegs == 1 ?
"" :
"s");
1049 if (AddRecCost != 0)
1050 OS <<
", with addrec cost " << AddRecCost;
1052 OS <<
", plus " << NumIVMuls <<
" IV mul" << (NumIVMuls == 1 ?
"" :
"s");
1053 if (NumBaseAdds != 0)
1054 OS <<
", plus " << NumBaseAdds <<
" base add"
1055 << (NumBaseAdds == 1 ?
"" :
"s");
1057 OS <<
", plus " << ScaleCost <<
" scale cost";
1059 OS <<
", plus " << ImmCost <<
" imm cost";
1061 OS <<
", plus " << SetupCost <<
" setup cost";
1064 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1081 Value *OperandValToReplace;
1097 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1107 LSRFixup::LSRFixup()
1108 : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
1113 bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1115 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1116 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1117 if (PN->getIncomingValue(i) == OperandValToReplace &&
1118 L->
contains(PN->getIncomingBlock(i)))
1131 Store->getOperand(0)->printAsOperand(OS,
false);
1132 }
else if (UserInst->getType()->isVoidTy())
1133 OS << UserInst->getOpcodeName();
1135 UserInst->printAsOperand(OS,
false);
1137 OS <<
", OperandValToReplace=";
1138 OperandValToReplace->printAsOperand(OS,
false);
1140 for (
const Loop *PIL : PostIncLoops) {
1141 OS <<
", PostIncLoop=";
1142 PIL->getHeader()->printAsOperand(OS,
false);
1145 if (LUIdx != ~
size_t(0))
1146 OS <<
", LUIdx=" << LUIdx;
1149 OS <<
", Offset=" << Offset;
1152 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1162 struct UniquifierDenseMapInfo {
1165 V.
push_back(reinterpret_cast<const SCEV *>(-1));
1171 V.
push_back(reinterpret_cast<const SCEV *>(-2));
1216 bool AllFixupsOutsideLoop;
1229 Type *WidestFixupType;
1239 LSRUse(KindType K,
Type *
T) :
Kind(K), AccessTy(T),
1240 MinOffset(INT64_MAX),
1241 MaxOffset(INT64_MIN),
1242 AllFixupsOutsideLoop(
true),
1243 RigidFormula(
false),
1244 WidestFixupType(nullptr) {}
1246 bool HasFormulaWithSameRegs(
const Formula &F)
const;
1247 bool InsertFormula(
const Formula &F);
1248 void DeleteFormula(Formula &F);
1249 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1259 bool LSRUse::HasFormulaWithSameRegs(
const Formula &F)
const {
1261 if (F.ScaledReg) Key.
push_back(F.ScaledReg);
1264 return Uniquifier.count(Key);
1270 bool LSRUse::InsertFormula(
const Formula &F) {
1271 assert(F.isCanonical() &&
"Invalid canonical representation");
1273 if (!Formulae.empty() && RigidFormula)
1277 if (F.ScaledReg) Key.
push_back(F.ScaledReg);
1281 if (!Uniquifier.insert(Key).second)
1285 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1286 "Zero allocated in a scaled register!");
1288 for (
const SCEV *BaseReg : F.BaseRegs)
1289 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1293 Formulae.push_back(F);
1296 Regs.
insert(F.BaseRegs.begin(), F.BaseRegs.end());
1298 Regs.
insert(F.ScaledReg);
1304 void LSRUse::DeleteFormula(Formula &F) {
1305 if (&F != &Formulae.back())
1307 Formulae.pop_back();
1311 void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1315 for (
const Formula &F : Formulae) {
1316 if (F.ScaledReg) Regs.
insert(F.ScaledReg);
1317 Regs.
insert(F.BaseRegs.begin(), F.BaseRegs.end());
1321 for (
const SCEV *S : OldRegs)
1323 RegUses.DropRegister(S, LUIdx);
1327 OS <<
"LSR Use: Kind=";
1329 case Basic: OS <<
"Basic";
break;
1330 case Special: OS <<
"Special";
break;
1331 case ICmpZero: OS <<
"ICmpZero";
break;
1333 OS <<
"Address of ";
1334 if (AccessTy->isPointerTy())
1340 OS <<
", Offsets={";
1341 bool NeedComma =
false;
1342 for (int64_t O : Offsets) {
1343 if (NeedComma) OS <<
',';
1349 if (AllFixupsOutsideLoop)
1350 OS <<
", all-fixups-outside-loop";
1352 if (WidestFixupType)
1353 OS <<
", widest fixup type: " << *WidestFixupType;
1356 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1363 LSRUse::KindType
Kind,
Type *AccessTy,
1365 bool HasBaseReg, int64_t Scale) {
1370 case LSRUse::ICmpZero:
1377 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1382 if (Scale != 0 && Scale != -1)
1387 if (BaseOffset != 0) {
1394 BaseOffset = -(uint64_t)BaseOffset;
1403 return !BaseGV && Scale == 0 && BaseOffset == 0;
1405 case LSRUse::Special:
1407 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1414 int64_t MinOffset, int64_t MaxOffset,
1415 LSRUse::KindType
Kind,
Type *AccessTy,
1417 bool HasBaseReg, int64_t Scale) {
1419 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1422 MinOffset = (uint64_t)BaseOffset + MinOffset;
1423 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1426 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1429 HasBaseReg, Scale) &&
1435 int64_t MinOffset, int64_t MaxOffset,
1436 LSRUse::KindType
Kind,
Type *AccessTy,
1445 assert((F.isCanonical() || F.Scale != 0));
1447 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1452 int64_t MaxOffset, LSRUse::KindType
Kind,
Type *AccessTy,
1453 GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg,
1457 BaseOffset, HasBaseReg, Scale) ||
1462 BaseGV, BaseOffset,
true, 0));
1466 int64_t MaxOffset, LSRUse::KindType
Kind,
Type *AccessTy,
1468 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1469 F.BaseOffset, F.HasBaseReg, F.Scale);
1473 const LSRUse &LU,
const Formula &F) {
1475 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1480 const LSRUse &LU,
const Formula &F) {
1488 return F.Scale != 1;
1493 int ScaleCostMinOffset =
1495 F.BaseOffset + LU.MinOffset,
1496 F.HasBaseReg, F.Scale);
1497 int ScaleCostMaxOffset =
1499 F.BaseOffset + LU.MaxOffset,
1500 F.HasBaseReg, F.Scale);
1502 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1503 "Legal addressing mode has an illegal cost!");
1504 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1506 case LSRUse::ICmpZero:
1508 case LSRUse::Special:
1518 LSRUse::KindType
Kind,
Type *AccessTy,
1522 if (BaseOffset == 0 && !BaseGV)
return true;
1526 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1530 if (!HasBaseReg && Scale == 1) {
1541 int64_t MaxOffset, LSRUse::KindType
Kind,
1542 Type *AccessTy,
const SCEV *S,
bool HasBaseReg) {
1544 if (S->
isZero())
return true;
1552 if (!S->
isZero())
return false;
1555 if (BaseOffset == 0 && !BaseGV)
return true;
1559 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1562 BaseOffset, HasBaseReg, Scale);
1579 const SCEV *IncExpr;
1582 UserInst(U), IVOperand(O), IncExpr(E) {}
1589 const SCEV *ExprBase;
1591 IVChain() : ExprBase(nullptr) {}
1593 IVChain(
const IVInc &Head,
const SCEV *Base)
1594 : Incs(1, Head), ExprBase(Base) {}
1599 const_iterator
begin()
const {
1600 assert(!Incs.empty());
1601 return std::next(Incs.begin());
1603 const_iterator
end()
const {
1608 bool hasIncs()
const {
return Incs.size() >= 2; }
1611 void add(
const IVInc &
X) { Incs.push_back(X); }
1614 Instruction *tailUserInst()
const {
return Incs.back().UserInst; }
1618 bool isProfitableIncrement(
const SCEV *OperExpr,
1619 const SCEV *IncExpr,
1661 RegUseTracker RegUses;
1666 static const unsigned MaxChains = 8;
1674 void OptimizeShadowIV();
1677 void OptimizeLoopTermCond();
1681 void FinalizeChain(IVChain &Chain);
1682 void CollectChains();
1686 void CollectInterestingTypesAndFactors();
1687 void CollectFixupsAndInitialFormulae();
1689 LSRFixup &getNewFixup() {
1690 Fixups.push_back(LSRFixup());
1698 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
1699 LSRUse::KindType
Kind,
Type *AccessTy);
1701 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr,
1702 LSRUse::KindType
Kind,
1705 void DeleteUse(LSRUse &LU,
size_t LUIdx);
1707 LSRUse *FindUseWithSimilarFormula(
const Formula &F,
const LSRUse &OrigLU);
1709 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
1710 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
1711 void CountRegisters(
const Formula &F,
size_t LUIdx);
1712 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &F);
1714 void CollectLoopInvariantFixupsAndFormulae();
1716 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula Base,
1717 unsigned Depth = 0);
1719 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
1720 const Formula &Base,
unsigned Depth,
1721 size_t Idx,
bool IsScaledReg =
false);
1722 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula Base);
1723 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
1724 const Formula &Base,
size_t Idx,
1725 bool IsScaledReg =
false);
1726 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
1727 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
1728 const Formula &Base,
1730 size_t Idx,
bool IsScaledReg =
false);
1731 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
1732 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
1733 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
1734 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base);
1735 void GenerateCrossUseConstantOffsets();
1736 void GenerateAllReuseFormulae();
1738 void FilterOutUndesirableDedicatedRegisters();
1740 size_t EstimateSearchSpaceComplexity()
const;
1741 void NarrowSearchSpaceByDetectingSupersets();
1742 void NarrowSearchSpaceByCollapsingUnrolledCode();
1743 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1744 void NarrowSearchSpaceByPickingWinnerRegs();
1745 void NarrowSearchSpaceUsingHeuristics();
1750 const Cost &CurCost,
1764 Value *Expand(
const LSRFixup &LF,
1769 void RewriteForPHI(
PHINode *PN,
const LSRFixup &LF,
1774 void Rewrite(
const LSRFixup &LF,
1785 bool getChanged()
const {
return Changed; }
1787 void print_factors_and_types(
raw_ostream &OS)
const;
1798 void LSRInstance::OptimizeShadowIV() {
1800 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1808 Type *DestTy =
nullptr;
1809 bool IsSigned =
false;
1823 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1825 DestTy = UCast->getDestTy();
1827 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1829 DestTy = SCast->getDestTy();
1831 if (!DestTy)
continue;
1843 if (Mantissa == -1)
continue;
1847 unsigned Entry, Latch;
1857 if (!Init)
continue;
1858 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
1864 if (!Incr)
continue;
1865 if (Incr->getOpcode() != Instruction::Add
1866 && Incr->getOpcode() != Instruction::Sub)
1871 if (Incr->getOperand(0) == PH)
1872 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1873 else if (Incr->getOperand(1) == PH)
1874 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1885 PHINode *NewPH = PHINode::Create(DestTy, 2,
"IV.S.", PH);
1890 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1891 Instruction::FAdd : Instruction::FSub,
1892 NewPH, CFP,
"IV.S.next.", Incr);
1910 if (U.getUser() == Cond) {
1977 if (!Sel || !Sel->
hasOneUse())
return Cond;
1980 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1985 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
1986 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
1993 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
1996 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
1999 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2031 "Loop condition operand is an addrec in a different loop!");
2035 Value *NewRHS =
nullptr;
2039 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2040 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2041 NewRHS = BO->getOperand(0);
2043 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2044 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2045 NewRHS = BO->getOperand(0);
2052 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2053 NewRHS = SU->getValue();
2061 Pred = CmpInst::getInversePredicate(Pred);
2082 LSRInstance::OptimizeLoopTermCond() {
2089 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2106 if (!FindIVUserForCond(Cond, CondUse))
2115 Cond = OptimizeMax(Cond, CondUse);
2120 if (!DT.
dominates(ExitingBlock, LatchBlock))
2125 if (LatchBlock != ExitingBlock)
2129 if (&*UI != CondUse &&
2133 const SCEV *
A = IU.getStride(*CondUse, L);
2134 const SCEV *B = IU.getStride(*UI, L);
2135 if (!A || !B)
continue;
2145 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(B, A, SE))) {
2149 goto decline_post_inc;
2153 goto decline_post_inc;
2160 goto decline_post_inc;
2165 goto decline_post_inc;
2169 DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2181 Cond = cast<ICmpInst>(Cond->
clone());
2183 ExitingBlock->getInstList().insert(TermBr, Cond);
2210 IVIncInsertPos = Inst;
2211 else if (BB != IVIncInsertPos->
getParent())
2220 LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
2221 LSRUse::KindType
Kind,
Type *AccessTy) {
2222 int64_t NewMinOffset = LU.MinOffset;
2223 int64_t NewMaxOffset = LU.MaxOffset;
2224 Type *NewAccessTy = AccessTy;
2229 if (LU.Kind != Kind)
2236 NewAccessTy = Type::getVoidTy(AccessTy->
getContext());
2239 if (NewOffset < LU.MinOffset) {
2241 LU.MaxOffset - NewOffset, HasBaseReg))
2243 NewMinOffset = NewOffset;
2244 }
else if (NewOffset > LU.MaxOffset) {
2246 NewOffset - LU.MinOffset, HasBaseReg))
2248 NewMaxOffset = NewOffset;
2252 LU.MinOffset = NewMinOffset;
2253 LU.MaxOffset = NewMaxOffset;
2254 LU.AccessTy = NewAccessTy;
2255 if (NewOffset != LU.Offsets.back())
2256 LU.Offsets.push_back(NewOffset);
2263 std::pair<size_t, int64_t>
2264 LSRInstance::getUse(
const SCEV *&Expr,
2265 LSRUse::KindType Kind,
Type *AccessTy) {
2266 const SCEV *Copy = Expr;
2276 std::pair<UseMapTy::iterator, bool>
P =
2280 size_t LUIdx = P.first->second;
2281 LSRUse &LU = Uses[LUIdx];
2282 if (reconcileNewOffset(LU, Offset,
true, Kind, AccessTy))
2284 return std::make_pair(LUIdx, Offset);
2288 size_t LUIdx = Uses.size();
2289 P.first->second = LUIdx;
2290 Uses.push_back(LSRUse(Kind, AccessTy));
2291 LSRUse &LU = Uses[LUIdx];
2295 if (LU.Offsets.empty() || Offset != LU.Offsets.back())
2296 LU.Offsets.push_back(Offset);
2298 LU.MinOffset = Offset;
2299 LU.MaxOffset = Offset;
2300 return std::make_pair(LUIdx, Offset);
2304 void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2305 if (&LU != &Uses.back())
2310 RegUses.SwapAndDropUse(LUIdx, Uses.size());
2316 LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2317 const LSRUse &OrigLU) {
2319 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2320 LSRUse &LU = Uses[LUIdx];
2326 if (&LU != &OrigLU &&
2327 LU.Kind != LSRUse::ICmpZero &&
2328 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2329 LU.WidestFixupType == OrigLU.WidestFixupType &&
2330 LU.HasFormulaWithSameRegs(OrigF)) {
2332 for (
const Formula &F : LU.Formulae) {
2335 if (F.BaseRegs == OrigF.BaseRegs &&
2336 F.ScaledReg == OrigF.ScaledReg &&
2337 F.BaseGV == OrigF.BaseGV &&
2338 F.Scale == OrigF.Scale &&
2339 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2340 if (F.BaseOffset == 0)
2355 void LSRInstance::CollectInterestingTypesAndFactors() {
2361 const SCEV *Expr = IU.getExpr(U);
2374 }
else if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2375 Worklist.
append(Add->op_begin(), Add->op_end());
2377 }
while (!Worklist.
empty());
2382 I = Strides.
begin(), E = Strides.
end(); I != E; ++
I)
2384 std::next(I); NewStrideIter != E; ++NewStrideIter) {
2385 const SCEV *OldStride = *
I;
2386 const SCEV *NewStride = *NewStrideIter;
2397 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2399 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2400 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2405 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2406 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2412 if (Types.size() == 1)
2424 for(; OI != OE; ++OI) {
2425 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2430 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2442 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2443 return Trunc->getOperand(0);
2472 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2474 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2476 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2482 for (std::reverse_iterator<SCEVAddExpr::op_iterator>
I(Add->
op_end()),
2484 const SCEV *SubExpr = *
I;
2494 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2503 bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2504 const SCEV *IncExpr,
2512 if (!isa<SCEVConstant>(IncExpr)) {
2514 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2538 if (!Chain.hasIncs())
2541 if (!Users.
empty()) {
2542 DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2544 dbgs() <<
" " << *Inst <<
"\n";
2548 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2556 if (isa<PHINode>(Chain.tailUserInst())
2557 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2560 const SCEV *LastIncExpr =
nullptr;
2561 unsigned NumConstIncrements = 0;
2562 unsigned NumVarIncrements = 0;
2563 unsigned NumReusedIncrements = 0;
2564 for (
const IVInc &Inc : Chain) {
2565 if (Inc.IncExpr->isZero())
2570 if (isa<SCEVConstant>(Inc.IncExpr)) {
2571 ++NumConstIncrements;
2575 if (Inc.IncExpr == LastIncExpr)
2576 ++NumReusedIncrements;
2580 LastIncExpr = Inc.IncExpr;
2585 if (NumConstIncrements > 1)
2592 cost += NumVarIncrements;
2596 cost -= NumReusedIncrements;
2598 DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2616 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2617 const SCEV *LastIncExpr =
nullptr;
2618 for (; ChainIdx < NChains; ++ChainIdx) {
2619 IVChain &Chain = IVChainVec[ChainIdx];
2633 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2642 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2643 LastIncExpr = IncExpr;
2649 if (ChainIdx == NChains) {
2650 if (isa<PHINode>(UserInst))
2656 LastIncExpr = OperExpr;
2660 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2663 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2665 ChainUsersVec.
resize(NChains);
2666 DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
2667 <<
") IV=" << *LastIncExpr <<
"\n");
2669 DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
2670 <<
") IV+" << *LastIncExpr <<
"\n");
2672 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2674 IVChain &Chain = IVChainVec[ChainIdx];
2678 if (!LastIncExpr->
isZero()) {
2679 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
2695 IVChain::const_iterator IncIter = Chain.Incs.begin();
2696 IVChain::const_iterator IncEnd = Chain.Incs.end();
2697 for( ; IncIter != IncEnd; ++IncIter) {
2698 if (IncIter->UserInst == OtherUse)
2701 if (IncIter != IncEnd)
2705 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
2706 && IU.isIVUserOrOperand(OtherUse)) {
2709 NearUsers.
insert(OtherUse);
2714 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
2739 void LSRInstance::CollectChains() {
2740 DEBUG(
dbgs() <<
"Collecting IV Chains.\n");
2746 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2753 BBIter = LatchPath.
rbegin(), BBEnd = LatchPath.
rend();
2754 BBIter != BBEnd; ++BBIter) {
2758 if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
2768 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
2769 ChainIdx < NChains; ++ChainIdx) {
2770 ChainUsersVec[ChainIdx].NearUsers.
erase(I);
2776 while (IVOpIter != IVOpEnd) {
2777 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2778 if (UniqueOperands.
insert(IVOpInst).second)
2779 ChainInstruction(I, IVOpInst, ChainUsersVec);
2780 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2793 ChainInstruction(PN, IncV, ChainUsersVec);
2796 unsigned ChainIdx = 0;
2797 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
2798 UsersIdx < NChains; ++UsersIdx) {
2800 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
2803 if (ChainIdx != UsersIdx)
2804 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2805 FinalizeChain(IVChainVec[ChainIdx]);
2808 IVChainVec.resize(ChainIdx);
2811 void LSRInstance::FinalizeChain(IVChain &Chain) {
2812 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2813 DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
2815 for (
const IVInc &Inc : Chain) {
2816 DEBUG(
dbgs() <<
" Inc: " << Inc.UserInst <<
"\n");
2817 auto UseI = std::find(Inc.UserInst->op_begin(), Inc.UserInst->op_end(),
2819 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
2820 IVIncSet.insert(UseI);
2849 const IVInc &Head = Chain.Incs[0];
2854 Value *IVSrc =
nullptr;
2855 while (IVOpIter != IVOpEnd) {
2866 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
2867 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
2870 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2872 if (IVOpIter == IVOpEnd) {
2874 DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
2878 DEBUG(
dbgs() <<
"Generate chain at: " << *IVSrc <<
"\n");
2879 Type *IVTy = IVSrc->getType();
2881 const SCEV *LeftOverExpr =
nullptr;
2882 for (
const IVInc &Inc : Chain) {
2884 if (isa<PHINode>(InsertPt))
2885 InsertPt = L->getLoopLatch()->getTerminator();
2889 Value *IVOper = IVSrc;
2890 if (!Inc.IncExpr->isZero()) {
2894 LeftOverExpr = LeftOverExpr ?
2895 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
2897 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
2903 IVOper = Rewriter.
expandCodeFor(IVOperExpr, IVTy, InsertPt);
2907 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
2909 LeftOverExpr =
nullptr;
2912 Type *OperTy = Inc.IVOperand->getType();
2913 if (IVTy != OperTy) {
2915 "cannot extend a chained IV");
2917 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
2919 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
2924 if (isa<PHINode>(Chain.tailUserInst())) {
2933 Value *IVOper = IVSrc;
2935 if (IVTy != PostIncTy) {
2936 assert(PostIncTy->
isPointerTy() &&
"mixing int/ptr IV types");
2937 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
2939 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
2947 void LSRInstance::CollectFixupsAndInitialFormulae() {
2952 U.getOperandValToReplace());
2953 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
2954 if (IVIncSet.count(UseI))
2958 LSRFixup &LF = getNewFixup();
2959 LF.UserInst = UserInst;
2960 LF.OperandValToReplace = U.getOperandValToReplace();
2961 LF.PostIncLoops = U.getPostIncLoops();
2963 LSRUse::KindType Kind = LSRUse::Basic;
2964 Type *AccessTy =
nullptr;
2965 if (
isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
2970 const SCEV *S = IU.getExpr(U);
2978 if (
ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
2979 if (CI->isEquality()) {
2982 Value *
NV = CI->getOperand(1);
2983 if (NV == LF.OperandValToReplace) {
2984 CI->setOperand(1, CI->getOperand(0));
2985 CI->setOperand(0, NV);
2986 NV = CI->getOperand(1);
2996 LF.PostIncLoops, SE, DT);
2997 Kind = LSRUse::ICmpZero;
3003 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3004 if (Factors[i] != -1)
3005 Factors.insert(-(uint64_t)Factors[i]);
3010 std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3012 LF.Offset = P.second;
3013 LSRUse &LU = Uses[LF.LUIdx];
3014 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3015 if (!LU.WidestFixupType ||
3018 LU.WidestFixupType = LF.OperandValToReplace->getType();
3021 if (LU.Formulae.empty()) {
3022 InsertInitialFormula(S, LU, LF.LUIdx);
3023 CountRegisters(LU.Formulae.back(), LF.LUIdx);
3034 LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx) {
3037 LU.RigidFormula =
true;
3040 F.InitialMatch(S, L, SE);
3041 bool Inserted = InsertFormula(LU, LUIdx, F);
3042 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3048 LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3049 LSRUse &LU,
size_t LUIdx) {
3051 F.BaseRegs.push_back(S);
3052 F.HasBaseReg =
true;
3053 bool Inserted = InsertFormula(LU, LUIdx, F);
3054 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3059 void LSRInstance::CountRegisters(
const Formula &F,
size_t LUIdx) {
3061 RegUses.CountRegister(F.ScaledReg, LUIdx);
3062 for (
const SCEV *BaseReg : F.BaseRegs)
3063 RegUses.CountRegister(BaseReg, LUIdx);
3068 bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &F) {
3070 assert(
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3071 "Formula is illegal");
3072 if (!LU.InsertFormula(F))
3075 CountRegisters(F, LUIdx);
3085 LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3089 while (!Worklist.
empty()) {
3093 if (!Visited.
insert(S).second)
3097 Worklist.
append(N->op_begin(), N->op_end());
3098 else if (
const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3100 else if (
const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3103 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3104 const Value *V = US->getValue();
3105 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3107 if (L->contains(Inst))
continue;
3108 }
else if (isa<UndefValue>(V))
3111 for (
const Use &U : V->
uses()) {
3121 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3123 cast<PHINode>(UserInst)->getIncomingBlock(
3124 PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3125 if (!DT.
dominates(L->getHeader(), UseBB))
3130 const SCEV *UserS = SE.
getSCEV(const_cast<Instruction *>(UserInst));
3132 if (!isa<SCEVUnknown>(UserS))
3136 SE.
getUnknown(const_cast<Instruction *>(UserInst)));
3141 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3142 unsigned OtherIdx = !U.getOperandNo();
3143 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3148 LSRFixup &LF = getNewFixup();
3149 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3150 LF.OperandValToReplace = U;
3151 std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic,
nullptr);
3153 LF.Offset = P.second;
3154 LSRUse &LU = Uses[LF.LUIdx];
3155 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3156 if (!LU.WidestFixupType ||
3159 LU.WidestFixupType = LF.OperandValToReplace->getType();
3160 InsertSupplementalFormula(US, LU, LF.LUIdx);
3161 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3177 unsigned Depth = 0) {
3182 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3184 for (
const SCEV *S : Add->operands()) {
3190 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3196 C, Ops, L, SE, Depth+1);
3199 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3200 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3201 Remainder =
nullptr;
3205 Remainder = SE.getConstant(AR->
getType(), 0);
3206 return SE.getAddRecExpr(Remainder,
3212 }
else if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3214 if (Mul->getNumOperands() != 2)
3217 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3218 C = C ? cast<SCEVConstant>(SE.
getMulExpr(C, Op0)) : Op0;
3219 const SCEV *Remainder =
3222 Ops.push_back(SE.getMulExpr(C, Remainder));
3230 void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3231 const Formula &Base,
3232 unsigned Depth,
size_t Idx,
3234 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3240 if (AddOps.
size() == 1)
3255 LU.AccessTy, *J, Base.getNumRegs() > 1))
3261 InnerAddOps.append(std::next(J),
3266 if (InnerAddOps.size() == 1 &&
3268 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3284 F.ScaledReg =
nullptr;
3286 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3287 }
else if (IsScaledReg)
3288 F.ScaledReg = InnerSum;
3290 F.BaseRegs[Idx] = InnerSum;
3300 F.BaseRegs.push_back(*J);
3305 if (InsertFormula(LU, LUIdx, F))
3308 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3314 void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3315 Formula Base,
unsigned Depth) {
3316 assert(Base.isCanonical() &&
"Input must be in the canonical form");
3321 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3322 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3324 if (Base.Scale == 1)
3325 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3331 void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3334 if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3343 for (
const SCEV *BaseReg : Base.BaseRegs) {
3348 F.BaseRegs.push_back(BaseReg);
3350 if (Ops.
size() > 1) {
3356 F.BaseRegs.push_back(Sum);
3358 (void)InsertFormula(LU, LUIdx, F);
3364 void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3365 const Formula &Base,
size_t Idx,
3367 const SCEV *
G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3373 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3378 F.BaseRegs[Idx] =
G;
3379 (void)InsertFormula(LU, LUIdx, F);
3383 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3386 if (Base.BaseGV)
return;
3388 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3389 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3390 if (Base.Scale == 1)
3391 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,
3396 void LSRInstance::GenerateConstantOffsetsImpl(
3397 LSRUse &LU,
unsigned LUIdx,
const Formula &Base,
3399 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3400 for (int64_t Offset : Worklist) {
3402 F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3403 if (
isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
3411 F.ScaledReg =
nullptr;
3413 F.DeleteBaseReg(F.BaseRegs[Idx]);
3415 }
else if (IsScaledReg)
3418 F.BaseRegs[Idx] = NewG;
3420 (void)InsertFormula(LU, LUIdx, F);
3425 if (G->
isZero() || Imm == 0)
3428 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3429 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3434 F.BaseRegs[Idx] =
G;
3435 (void)InsertFormula(LU, LUIdx, F);
3439 void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3445 if (LU.MaxOffset != LU.MinOffset)
3448 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3449 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3450 if (Base.Scale == 1)
3451 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,
3457 void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3459 if (LU.Kind != LSRUse::ICmpZero)
return;
3462 Type *IntTy = Base.getType();
3467 if (LU.MinOffset != LU.MaxOffset)
return;
3469 assert(!Base.BaseGV &&
"ICmpZero use is not legal!");
3472 for (int64_t Factor : Factors) {
3474 if (Base.BaseOffset == INT64_MIN && Factor == -1)
3476 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3477 if (NewBaseOffset / Factor != Base.BaseOffset)
3481 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3485 int64_t Offset = LU.MinOffset;
3486 if (Offset == INT64_MIN && Factor == -1)
3488 Offset = (uint64_t)Offset * Factor;
3489 if (Offset / Factor != LU.MinOffset)
3493 !ConstantInt::isValueValidForType(IntTy, Offset))
3497 F.BaseOffset = NewBaseOffset;
3500 if (!
isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3504 F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3509 for (
size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3510 F.BaseRegs[i] = SE.
getMulExpr(F.BaseRegs[i], FactorS);
3511 if (
getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3517 F.ScaledReg = SE.
getMulExpr(F.ScaledReg, FactorS);
3518 if (
getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3523 if (F.UnfoldedOffset != 0) {
3524 if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
3526 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3527 if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3531 !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3536 (void)InsertFormula(LU, LUIdx, F);
3543 void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base) {
3545 Type *IntTy = Base.getType();
3550 if (Base.Scale != 0 && !Base.Unscale())
3553 assert(Base.Scale == 0 &&
"Unscale did not did its job!");
3556 for (int64_t Factor : Factors) {
3557 Base.Scale = Factor;
3558 Base.HasBaseReg = Base.BaseRegs.size() > 1;
3560 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3564 if (LU.Kind == LSRUse::Basic &&
3565 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3566 LU.AccessTy, Base) &&
3567 LU.AllFixupsOutsideLoop)
3568 LU.Kind = LSRUse::Special;
3574 if (LU.Kind == LSRUse::ICmpZero &&
3575 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3578 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3580 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
3589 F.ScaledReg = Quotient;
3590 F.DeleteBaseReg(F.BaseRegs[i]);
3594 if (F.Scale == 1 && F.BaseRegs.empty())
3596 (void)InsertFormula(LU, LUIdx, F);
3603 void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base) {
3605 if (Base.BaseGV)
return;
3608 Type *DstTy = Base.getType();
3612 for (
Type *SrcTy : Types) {
3617 for (
const SCEV *&BaseReg : F.BaseRegs)
3622 if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3625 (void)InsertFormula(LU, LUIdx, F);
3638 const SCEV *OrigReg;
3640 WorkItem(
size_t LI, int64_t I,
const SCEV *R)
3641 : LUIdx(LI), Imm(I), OrigReg(R) {}
3650 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
3651 <<
" , add offset " << Imm;
3654 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3662 void LSRInstance::GenerateCrossUseConstantOffsets() {
3664 typedef std::map<int64_t, const SCEV *> ImmMapTy;
3668 for (
const SCEV *
Use : RegUses) {
3671 auto Pair = Map.
insert(std::make_pair(Reg, ImmMapTy()));
3674 Pair.first->second.insert(std::make_pair(Imm,
Use));
3675 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
3683 for (
const SCEV *Reg : Sequence) {
3684 const ImmMapTy &Imms = Map.
find(Reg)->second;
3687 if (Imms.size() == 1)
3690 DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
3691 for (
const auto &Entry : Imms)
3692 dbgs() <<
' ' << Entry.first;
3696 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3698 const SCEV *OrigReg = J->second;
3700 int64_t JImm = J->first;
3701 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3703 if (!isa<SCEVConstant>(OrigReg) &&
3704 UsedByIndicesMap[Reg].count() == 1) {
3705 DEBUG(
dbgs() <<
"Skipping cross-use reuse for " << *OrigReg <<
'\n');
3711 ImmMapTy::const_iterator OtherImms[] = {
3712 Imms.begin(), std::prev(Imms.end()),
3713 Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3717 ImmMapTy::const_iterator M = OtherImms[i];
3718 if (M == J || M == JE)
continue;
3721 int64_t Imm = (uint64_t)JImm - M->first;
3725 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
3726 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
3733 UsedByIndicesMap.
clear();
3734 UniqueItems.
clear();
3737 for (
const WorkItem &WI : WorkItems) {
3738 size_t LUIdx = WI.LUIdx;
3739 LSRUse &LU = Uses[LUIdx];
3740 int64_t Imm = WI.Imm;
3741 const SCEV *OrigReg = WI.OrigReg;
3744 const SCEV *NegImmS = SE.
getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3748 for (
size_t L = 0,
LE = LU.Formulae.size(); L !=
LE; ++L) {
3749 Formula F = LU.Formulae[L];
3756 if (F.ScaledReg == OrigReg) {
3757 int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
3759 if (F.referencesReg(SE.
getSCEV(
3760 ConstantInt::get(IntTy, -(uint64_t)Offset))))
3763 NewF.BaseOffset = Offset;
3764 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3767 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
3772 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
3774 (NewF.BaseOffset < 0) &&
3780 NewF.Canonicalize();
3781 (void)InsertFormula(LU, LUIdx, NewF);
3784 for (
size_t N = 0,
NE = F.BaseRegs.size(); N !=
NE; ++
N) {
3785 const SCEV *BaseReg = F.BaseRegs[
N];
3786 if (BaseReg != OrigReg)
3789 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
3791 LU.Kind, LU.AccessTy, NewF)) {
3795 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3797 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
3802 for (
const SCEV *NewReg : NewF.BaseRegs)
3803 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
3804 if ((C->
getValue()->getValue() + NewF.BaseOffset).
abs().slt(
3808 countTrailingZeros<uint64_t>(NewF.BaseOffset))
3812 NewF.Canonicalize();
3813 (void)InsertFormula(LU, LUIdx, NewF);
3824 LSRInstance::GenerateAllReuseFormulae() {
3827 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3828 LSRUse &LU = Uses[LUIdx];
3829 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3830 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
3831 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3832 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
3834 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3835 LSRUse &LU = Uses[LUIdx];
3836 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3837 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
3838 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3839 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
3840 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3841 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
3842 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3843 GenerateScales(LU, LUIdx, LU.Formulae[i]);
3845 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3846 LSRUse &LU = Uses[LUIdx];
3847 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3848 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
3851 GenerateCrossUseConstantOffsets();
3854 "After generating reuse formulae:\n";
3855 print_uses(
dbgs()));
3860 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
3865 bool ChangedFormulae =
false;
3872 BestFormulaeTy BestFormulae;
3874 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3875 LSRUse &LU = Uses[LUIdx];
3879 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
3880 FIdx != NumForms; ++FIdx) {
3881 Formula &F = LU.Formulae[FIdx];
3892 CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
3894 if (CostF.isLoser()) {
3906 for (
const SCEV *Reg : F.BaseRegs) {
3907 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
3911 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
3917 std::pair<BestFormulaeTy::const_iterator, bool> P =
3918 BestFormulae.insert(std::make_pair(Key, FIdx));
3922 Formula &Best = LU.Formulae[P.first->second];
3926 CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
3928 if (CostF < CostBest)
3932 " in favor of formula "; Best.print(
dbgs());
3936 ChangedFormulae =
true;
3938 LU.DeleteFormula(F);
3946 LU.RecomputeRegs(LUIdx, RegUses);
3949 BestFormulae.clear();
3952 DEBUG(
if (ChangedFormulae) {
3954 "After filtering out undesirable candidates:\n";
3966 size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
3968 for (
const LSRUse &LU : Uses) {
3969 size_t FSize = LU.Formulae.size();
3970 if (FSize >= ComplexityLimit) {
3975 if (Power >= ComplexityLimit)
3985 void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
3986 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
3987 DEBUG(
dbgs() <<
"The search space is too complex.\n");
3989 DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
3990 "which use a superset of registers used by other "
3993 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3994 LSRUse &LU = Uses[LUIdx];
3996 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
3997 Formula &F = LU.Formulae[i];
4002 I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++
I) {
4003 if (
const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4006 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4007 (I - F.BaseRegs.begin()));
4008 if (LU.HasFormulaWithSameRegs(NewF)) {
4010 LU.DeleteFormula(F);
4016 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4017 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4021 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4022 (I - F.BaseRegs.begin()));
4023 if (LU.HasFormulaWithSameRegs(NewF)) {
4026 LU.DeleteFormula(F);
4037 LU.RecomputeRegs(LUIdx, RegUses);
4040 DEBUG(
dbgs() <<
"After pre-selection:\n";
4041 print_uses(
dbgs()));
4048 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4049 if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4052 DEBUG(
dbgs() <<
"The search space is too complex.\n"
4053 "Narrowing the search space by assuming that uses separated "
4054 "by a constant offset will use the same registers.\n");
4058 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4059 LSRUse &LU = Uses[LUIdx];
4060 for (
const Formula &F : LU.Formulae) {
4061 if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4064 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4068 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset,
false,
4069 LU.Kind, LU.AccessTy))
4074 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4078 if (
Fixup.LUIdx == LUIdx) {
4079 Fixup.LUIdx = LUThatHas - &Uses.front();
4080 Fixup.Offset += F.BaseOffset;
4082 if (LUThatHas->Offsets.back() !=
Fixup.Offset) {
4083 LUThatHas->Offsets.push_back(
Fixup.Offset);
4084 if (
Fixup.Offset > LUThatHas->MaxOffset)
4085 LUThatHas->MaxOffset =
Fixup.Offset;
4086 if (
Fixup.Offset < LUThatHas->MinOffset)
4087 LUThatHas->MinOffset =
Fixup.Offset;
4089 DEBUG(
dbgs() <<
"New fixup has offset " <<
Fixup.Offset <<
'\n');
4091 if (
Fixup.LUIdx == NumUses-1)
4092 Fixup.LUIdx = LUIdx;
4097 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4098 Formula &F = LUThatHas->Formulae[i];
4099 if (!
isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4100 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4103 LUThatHas->DeleteFormula(F);
4111 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4114 DeleteUse(LU, LUIdx);
4121 DEBUG(
dbgs() <<
"After pre-selection:\n"; print_uses(
dbgs()));
4128 void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4129 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4130 DEBUG(
dbgs() <<
"The search space is too complex.\n");
4132 DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4133 "undesirable dedicated registers.\n");
4135 FilterOutUndesirableDedicatedRegisters();
4137 DEBUG(
dbgs() <<
"After pre-selection:\n";
4138 print_uses(
dbgs()));
4145 void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4149 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4152 DEBUG(
dbgs() <<
"The search space is too complex.\n");
4156 const SCEV *Best =
nullptr;
4157 unsigned BestNum = 0;
4158 for (
const SCEV *Reg : RegUses) {
4159 if (Taken.
count(Reg))
4164 unsigned Count = RegUses.getUsedByIndices(Reg).count();
4165 if (Count > BestNum) {
4172 DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
4173 <<
" will yield profitable reuse.\n");
4178 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4179 LSRUse &LU = Uses[LUIdx];
4180 if (!LU.Regs.count(Best))
continue;
4183 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4184 Formula &F = LU.Formulae[i];
4185 if (!F.referencesReg(Best)) {
4187 LU.DeleteFormula(F);
4191 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
4197 LU.RecomputeRegs(LUIdx, RegUses);
4200 DEBUG(
dbgs() <<
"After pre-selection:\n";
4201 print_uses(
dbgs()));
4209 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4210 NarrowSearchSpaceByDetectingSupersets();
4211 NarrowSearchSpaceByCollapsingUnrolledCode();
4212 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4213 NarrowSearchSpaceByPickingWinnerRegs();
4220 const Cost &CurCost,
4233 const LSRUse &LU = Uses[Workspace.
size()];
4240 for (
const SCEV *S : CurRegs)
4241 if (LU.Regs.count(S))
4246 for (
const Formula &F : LU.Formulae) {
4250 int NumReqRegsToFind =
std::min(F.getNumRegs(), ReqRegs.
size());
4251 for (
const SCEV *Reg : ReqRegs) {
4252 if ((F.ScaledReg && F.ScaledReg == Reg) ||
4253 std::find(F.BaseRegs.begin(), F.BaseRegs.end(),
Reg) !=
4256 if (NumReqRegsToFind == 0)
4260 if (NumReqRegsToFind != 0) {
4270 NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
4272 if (NewCost < SolutionCost) {
4274 if (Workspace.
size() != Uses.size()) {
4275 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4276 NewRegs, VisitedRegs);
4277 if (F.getNumRegs() == 1 && Workspace.
size() == 1)
4278 VisitedRegs.
insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4281 dbgs() <<
".\n Regs:";
4282 for (
const SCEV *S : NewRegs)
4283 dbgs() <<
' ' << *S;
4286 SolutionCost = NewCost;
4287 Solution = Workspace;
4299 SolutionCost.Lose();
4303 Workspace.
reserve(Uses.size());
4306 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4307 CurRegs, VisitedRegs);
4308 if (Solution.
empty()) {
4309 DEBUG(
dbgs() <<
"\nNo Satisfactory Solution\n");
4315 "The chosen solution requires "; SolutionCost.print(
dbgs());
4317 for (
size_t i = 0, e = Uses.size(); i != e; ++i) {
4319 Uses[i].print(
dbgs());
4322 Solution[i]->print(
dbgs());
4326 assert(Solution.
size() == Uses.size() &&
"Malformed solution!");
4338 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4339 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
4343 if (!Rung)
return IP;
4344 Rung = Rung->getIDom();
4345 if (!Rung)
return IP;
4346 IDom = Rung->getBlock();
4349 const Loop *IDomLoop = LI.getLoopFor(IDom);
4350 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
4351 if (IDomDepth <= IPLoopDepth &&
4352 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4356 bool AllDominate =
true;
4360 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
4361 AllDominate =
false;
4367 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
4392 if (
Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4394 if (LU.Kind == LSRUse::ICmpZero)
4396 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4398 if (LF.PostIncLoops.count(L)) {
4399 if (LF.isUseFullyOutsideLoop(L))
4400 Inputs.
push_back(L->getLoopLatch()->getTerminator());
4406 for (
const Loop *PIL : LF.PostIncLoops) {
4407 if (PIL == L)
continue;
4412 if (!ExitingBlocks.
empty()) {
4414 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
4420 assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
4421 && !isa<DbgInfoIntrinsic>(LowestIP) &&
4422 "Insertion point must be a normal instruction");
4429 while (isa<PHINode>(IP)) ++IP;
4432 while (isa<LandingPadInst>(IP)) ++IP;
4435 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
4447 Value *LSRInstance::Expand(
const LSRFixup &LF,
4452 const LSRUse &LU = Uses[LF.LUIdx];
4453 if (LU.RigidFormula)
4454 return LF.OperandValToReplace;
4458 IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
4465 Type *OpTy = LF.OperandValToReplace->getType();
4467 Type *Ty = F.getType();
4481 for (
const SCEV *Reg : F.BaseRegs) {
4482 assert(!Reg->
isZero() &&
"Zero allocated in a base register!");
4487 LF.UserInst, LF.OperandValToReplace,
4494 Value *ICmpScaledV =
nullptr;
4496 const SCEV *ScaledS = F.ScaledReg;
4501 LF.UserInst, LF.OperandValToReplace,
4504 if (LU.Kind == LSRUse::ICmpZero) {
4513 assert(F.Scale == -1 &&
4514 "The only scale supported by ICmpZero uses is -1!");
4557 int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
4559 if (LU.Kind == LSRUse::ICmpZero) {
4563 ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
4566 ICmpScaledV = ConstantInt::get(IntTy, Offset);
4576 int64_t UnfoldedOffset = F.UnfoldedOffset;
4577 if (UnfoldedOffset != 0) {
4595 if (LU.Kind == LSRUse::ICmpZero) {
4596 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4598 assert(!F.BaseGV &&
"ICmp does not support folding a global value and "
4599 "a scale at the same time!");
4600 if (F.Scale == -1) {
4601 if (ICmpScaledV->
getType() != OpTy) {
4603 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV,
false,
4605 ICmpScaledV, OpTy,
"tmp", CI);
4612 assert((F.Scale == 0 || F.Scale == 1) &&
4613 "ICmp does not support folding a global value and "
4614 "a scale at the same time!");
4618 C = ConstantExpr::getCast(CastInst::getCastOpcode(C,
false,
4632 void LSRInstance::RewriteForPHI(
PHINode *PN,
4650 Loop *PNLoop = LI.getLoopFor(Parent);
4651 if (!PNLoop || Parent != PNLoop->
getHeader()) {
4657 .setMergeIdenticalEdges()
4658 .setDontDeleteUselessPHIs());
4672 if (L->contains(BB) && !L->contains(PN))
4683 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
4684 Inserted.
insert(std::make_pair(BB, static_cast<Value *>(
nullptr)));
4691 Type *OpTy = LF.OperandValToReplace->getType();
4694 CastInst::Create(CastInst::getCastOpcode(FullV,
false,
4696 FullV, LF.OperandValToReplace->getType(),
4700 Pair.first->second = FullV;
4708 void LSRInstance::Rewrite(
const LSRFixup &LF,
4715 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
4716 RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
4718 Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
4721 Type *OpTy = LF.OperandValToReplace->getType();
4722 if (FullV->
getType() != OpTy) {
4724 CastInst::Create(CastInst::getCastOpcode(FullV,
false, OpTy,
false),
4725 FullV, OpTy,
"tmp", LF.UserInst);
4734 if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
4735 LF.UserInst->setOperand(0, FullV);
4737 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
4762 for (
const IVChain &Chain : IVChainVec) {
4763 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
4768 for (
const LSRFixup &
Fixup : Fixups) {
4769 Rewrite(
Fixup, *Solution[
Fixup.LUIdx], Rewriter, DeadInsts, P);
4774 for (
const IVChain &Chain : IVChainVec) {
4775 GenerateIVChain(Chain, Rewriter, DeadInsts);
4785 LSRInstance::LSRInstance(
Loop *L,
Pass *P)
4791 L(L), Changed(
false), IVIncInsertPos(nullptr) {
4797 if (IU.empty())
return;
4801 unsigned NumUsers = 0;
4805 DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U <<
"\n");
4818 Rung; Rung = Rung->
getIDom()) {
4820 const Loop *DomLoop = LI.getLoopFor(BB);
4821 if (DomLoop && DomLoop->
getHeader() == BB) {
4833 OptimizeLoopTermCond();
4836 if (IU.empty())
return;
4840 DEBUG(
dbgs() <<
"LSR skipping outer loop " << *L <<
"\n");
4846 CollectInterestingTypesAndFactors();
4847 CollectFixupsAndInitialFormulae();
4848 CollectLoopInvariantFixupsAndFormulae();
4850 assert(!Uses.empty() &&
"IVUsers reported at least one use");
4851 DEBUG(
dbgs() <<
"LSR found " << Uses.size() <<
" uses:\n";
4852 print_uses(
dbgs()));
4856 GenerateAllReuseFormulae();
4858 FilterOutUndesirableDedicatedRegisters();
4859 NarrowSearchSpaceUsingHeuristics();
4869 if (Solution.
empty())
4874 for (
const LSRUse &LU : Uses) {
4875 for (
const Formula &F : LU.Formulae)
4876 assert(
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4877 F) &&
"Illegal formula generated!");
4882 ImplementSolution(Solution, P);
4885 void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
4886 if (Factors.empty() && Types.empty())
return;
4888 OS <<
"LSR has identified the following interesting factors and types: ";
4891 for (int64_t Factor : Factors) {
4892 if (!First) OS <<
", ";
4894 OS <<
'*' << Factor;
4897 for (
Type *Ty : Types) {
4898 if (!First) OS <<
", ";
4900 OS <<
'(' << *Ty <<
')';
4905 void LSRInstance::print_fixups(
raw_ostream &OS)
const {
4906 OS <<
"LSR is examining the following fixup sites:\n";
4907 for (
const LSRFixup &LF : Fixups) {
4914 void LSRInstance::print_uses(
raw_ostream &OS)
const {
4915 OS <<
"LSR is examining the following uses:\n";
4916 for (
const LSRUse &LU : Uses) {
4920 for (
const Formula &F : LU.Formulae) {
4929 print_factors_and_types(OS);
4934 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4942 class LoopStrengthReduce :
public LoopPass {
4945 LoopStrengthReduce();
4956 "Loop Strength Reduction",
false,
false)
4968 return new LoopStrengthReduce();
4971 LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
4975 void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
4996 if (skipOptnoneFunction(L))
4999 bool Changed =
false;
5002 Changed |= LSRInstance(L,
this).getChanged();
5014 L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
5015 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
Pass interface - Implemented by all 'passes'.
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
iterator_range< use_iterator > uses()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT abs() const
Get the absolute value;.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
const SCEV * TransformForPostIncUse(TransformKind Kind, const SCEV *S, Instruction *User, Value *OperandValToReplace, PostIncLoopSet &Loops, ScalarEvolution &SE, DominatorTree &DT)
TransformForPostIncUse - Transform the given expression according to the given transformation kind...
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
isExistingPhi - Return true if this AddRec is already a phi in its loop.
Pass * createLoopStrengthReducePass()
SmallBitVector - This is a 'bitvector' (really, a variable-sized bit array), optimized for the case w...
const SCEV * getConstant(ConstantInt *V)
LLVMContext & getContext() const
bool isZero() const
isZero - Return true if the expression is a constant zero.
static const size_t ComplexityLimit
DenseSet - This implements a dense probed hash-table based set.
ScalarEvolution - This class is the main scalar evolution driver.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Denormalize - Perform the inverse transform on the expression with the given loop set...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale)
isLegalUse - Test whether we know how to expand the current formula.
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for bail out...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
properlyDominates - Return true if elements that makes up the given SCEV properly dominate the specif...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
getStepRecurrence - This method constructs and returns the recurrence indicating how much this expres...
void initializeLoopStrengthReducePass(PassRegistry &)
void setDebugType(const char *s)
const_iterator begin(StringRef path)
Get begin iterator over path.
int getFPMantissaWidth() const
getFPMantissaWidth - Return the width of the mantissa of this type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
isLoopInvariant - Return true if the value of the given SCEV is unchanging in the specified loop...
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
const Function * getParent() const
Return the enclosing method, or null if none.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
iv Induction Variable Users
void reserve(size_type N)
bool isTrueWhenEqual(CondCode Cond)
isTrueWhenEqual - Return true if the specified condition returns true if the two operands to the cond...
iterator end()
Get an iterator to the end of the SetVector.
size_type size() const
Determine the number of elements in the SetVector.
BlockT * getHeader() const
SCEVCastExpr - This is the base class for unary cast operator classes.
const SCEV * getStart() const
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool isNegative() const
Determine sign of this APInt.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
bool isUnconditional() const
SelectInst - This class represents the LLVM 'select' instruction.
Option class for critical edge splitting.
const APInt & getValue() const
Return the constant as an APInt value reference.
void clearPostInc()
Disable all post-inc expansion.
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
A Use represents the edge between a Value definition and its users.
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
findIVOperand - Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop...
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Reg
All possible values of the reg field in the ModR/M byte.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
getExitingBlocks - Return all blocks inside the loop that have successors outside of the loop...
uint64_t getTypeSizeInBits(Type *Ty) const
getTypeSizeInBits - Return the size in bits of the specified type, for which isSCEVable must return t...
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
void setName(const Twine &Name)
Change the name of the value.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Instruction * clone() const
clone() - Create a copy of 'this' instruction that is identical in all ways except the following: ...
op_iterator op_begin() const
SCEVMulExpr - This node represents multiplication of some number of SCEVs.
bool isLoopSimplifyForm() const
isLoopSimplifyForm - Return true if the Loop is in the form that the LoopSimplify form transforms loo...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
bool insert(const value_type &X)
Insert a new element into the SetVector.
LLVMContext & getContext() const
getContext - Return the LLVMContext in which this type was uniqued.
SCEVAddRecExpr - This node represents a polynomial recurrence on the trip count of the specified loop...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
iterator begin()
Get an iterator to the beginning of the SetVector.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
DeleteDeadPHIs - Examine each PHI in the given block and delete it if it is dead. ...
Base class for the actual dominator tree node.
AnalysisUsage & addPreservedID(const void *ID)
StoreInst - an instruction for storing to memory.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Type * getEffectiveSCEVType(Type *Ty) const
getEffectiveSCEVType - Return a type with the same bitwidth as the given type and which represents ho...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
This class represents a truncation of integer types.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
getAddRecExpr - Get an add recurrence expression for the specified loop.
size_t getNumOperands() const
PointerType - Class to represent pointers.
unsigned getNumIncomingValues() const
getNumIncomingValues - Return the number of incoming edges
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
BasicBlock * SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions())
SplitCriticalEdge - If this edge is a critical edge, insert a new node to split the critical edge...
static bool isCompatibleIVType(Value *LVal, Value *RVal)
isCompatibleIVType - Return true if we allow an IV chain to include both types.
SCEVUnknown - This means that we are dealing with an entirely unknown SCEV value, and only represent ...
initializer< Ty > init(const Ty &Val)
friend const_iterator end(StringRef path)
Get end iterator over path.
bool isAffine() const
isAffine - Return true if this represents an expression A + B*x where A and B are loop invariant valu...
bool isSCEVable(Type *Ty) const
isSCEVable - Test if values of the given type are analyzable within the SCEV framework.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
BlockT * getLoopPreheader() const
getLoopPreheader - If there is a preheader for this loop, return it.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg)
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
isMulSExtable - Return true if the given mul can be sign-extended without changing its value...
LLVM Basic Block Representation.
PointerIntPair - This class implements a pair of a pointer and small integer.
SCEVUDivExpr - This class represents a binary unsigned division operation.
The instances of the Type class are immutable: once they are created, they are never changed...
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
DoInitialMatch - Recursion helper for InitialMatch.
Type * getType() const
getType - Return the LLVM type of this SCEV expression.
BranchInst - Conditional or Unconditional Branch instruction.
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
This is an important base class in LLVM.
const SCEV * getOperand(unsigned i) const
int64_t getSExtValue() const
Get sign extended value.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Normalize - Normalize according to the given loops.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
Represent the analysis usage information of a pass.
BasicBlock * getIncomingBlock(unsigned i) const
getIncomingBlock - Return incoming basic block number i.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
This instruction compares its operands according to the predicate given to the constructor.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
ExtractImmediate - If S involves the addition of a constant integer value, return that integer value...
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sdiv(const APInt &RHS) const
Signed division function for APInt.
Value * getOperand(unsigned i) const
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
static Type * getAccessType(const Instruction *Inst)
getAccessType - Return the type of the memory being accessed.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Predicate getPredicate() const
Return the predicate for this instruction.
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
iterator erase(iterator I)
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
isAddSExtable - Return true if the given add can be sign-extended without changing its value...
APInt LLVM_ATTRIBUTE_UNUSED_RESULT srem(const APInt &RHS) const
Function for signed remainder operation.
void setChainedPhi(PHINode *PN)
PowerPC TLS Dynamic Call Fixup
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
getExactSDiv - Return an expression for LHS /s RHS, if it can be determined and if the remainder is k...
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A SetVector that performs no allocations if smaller than a certain size.
void clear()
clear - Clear all bits.
std::pair< iterator, bool > insert(const ValueT &V)
DomTreeNodeBase< NodeT > * getIDom() const
This is the shared class of boolean and integer constants.
Value * getIncomingValue(unsigned i) const
getIncomingValue - Return incoming value number x
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
AnalysisUsage & addRequiredID(const void *ID)
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the input value to the specified...
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
void setIVIncInsertPos(const Loop *L, Instruction *Pos)
Set the current IV increment loop and position.
static Value * getWideOperand(Value *Oper)
getWideOperand - IVChain logic must consistenctly peek base TruncInst operands, so wrap it in a conve...
CHAIN = SC CHAIN, Imm128 - System call.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
ConstantInt * getValue() const
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isAllOnesValue() const
isAllOnesValue - Return true if this is the value that would be returned by getAllOnesValue.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
size_t size() const
size - Returns the number of bits in this bitvector.
Class for arbitrary precision integers.
SCEVAddExpr - This node represents an addition of some number of SCEVs.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty)
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, AliasAnalysis *AA=nullptr, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, bool PreserveLCSSA=false)
SplitLandingPadPredecessors - This method transforms the landing pad, OrigBB, by introducing two new ...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
SCEVSMaxExpr - This class represents a signed maximum selection.
static bool isAddressUse(Instruction *Inst, Value *OperandVal)
isAddressUse - Returns true if the specified instruction is using the specified value as an address...
iterator_range< user_iterator > users()
This class uses information about analyze scalars to rewrite expressions in canonical form...
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getAddExpr - Get a canonical add expression, or something simpler if possible.
loop Loop Strength Reduction
iterator insert(iterator I, T &&Elt)
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
Virtual Register Rewriter
bool isAllOnesValue() const
Determine if all bits are set.
Value * getCondition() const
void emplace_back(ArgTypes &&...Args)
bool isMinSignedValue() const
Determine if this is the smallest signed value.
SCEV - This class represents an analyzed expression in the program.
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool isLandingPad() const
Return true if this basic block is a landing pad.
bool hasOneUse() const
Return true if there is exactly one user of this value.
static const SCEV * getExprBase(const SCEV *S)
getExprBase - Return an approximation of this SCEV expression's "base", or NULL for any constant...
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
INITIALIZE_PASS_BEGIN(LoopStrengthReduce,"loop-reduce","Loop Strength Reduction", false, false) INITIALIZE_PASS_END(LoopStrengthReduce
This class represents a cast unsigned integer to floating point.
iterator find(const KeyT &Val)
SCEVUMaxExpr - This class represents an unsigned maximum selection.
const Loop * getLoop() const
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
const SCEV * getBackedgeTakenCount(const Loop *L)
getBackedgeTakenCount - If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCouldNotCompute object.
This class represents a cast from signed integer to floating point.
static bool DeleteTriviallyDeadInstructions(SmallVectorImpl< WeakVH > &DeadInsts)
DeleteTriviallyDeadInstructions - If any of the instructions is the specified set are trivially dead...
reverse_iterator rbegin()
const ARM::ArchExtKind Kind
unsigned getSCEVType() const
bool operator<(int64_t V1, const APSInt &V2)
Module * getParent()
Get the module that this global value is contained inside of...
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
isInstructionTriviallyDead - Return true if the result produced by the instruction is not used...
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
getSCEV - Return a SCEV expression for the full generality of the specified expression.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
ExtractSymbol - If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a new SCEV with that value excluded.
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
moveBefore - Unlink this instruction from its current basic block and insert it into the basic block ...
IVStrideUse - Keep track of one use of a strided induction variable.
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
CollectSubexprs - Split S into subexpressions which can be pulled out into separate registers...
This class implements an extremely fast bulk output stream that can only output to a stream...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
const SCEV * getUnknown(Value *V)
The legacy pass manager's analysis pass to compute loop information.
C - The default llvm calling convention, compatible with C.
op_iterator op_end() const
Legacy analysis pass which computes a DominatorTree.
DomTreeNodeBase< NodeT > * getNode(NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
hasComputableLoopEvolution - Return true if the given SCEV changes value in a known way in the specif...
SCEVNAryExpr - This node is a base class providing common functionality for n'ary operators...
void setIncomingValue(unsigned i, Value *V)
void setPostInc(const PostIncLoopSet &L)
Enable post-inc expansion for addrecs referring to the given loops.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned getLoopDepth() const
getLoopDepth - Return the nesting level of this loop.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMulExpr - Get a canonical multiply expression, or something simpler if possible.
int getBasicBlockIndex(const BasicBlock *BB) const
getBasicBlockIndex - Return the first index of the specified basic block in the value list for this P...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const BasicBlock * getParent() const
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
isAddRecSExtable - Return true if the given addrec can be sign-extended without changing its value...
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
SCEVConstant - This class represents a constant integer value.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...