132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
363 const std::pair<size_t, Immediate> &
RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
387 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
399 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
415RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
421 RSD.UsedByIndices.
set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
430 RSD.UsedByIndices.
reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
440 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
451 if (
I == RegUsesMap.end())
453 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
460const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
461 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
477 GlobalValue *BaseGV =
nullptr;
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
515 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 bool countsDownToZero()
const;
527 size_t getNumRegs()
const;
530 void deleteBaseReg(
const SCEV *&S);
532 bool referencesReg(
const SCEV *S)
const;
533 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
534 const RegUseTracker &RegUses)
const;
536 void print(raw_ostream &OS)
const;
555 for (
const SCEV *S :
Add->operands())
561 const SCEV *Start, *Step;
576 if (
Mul->getOperand(0)->isAllOnesValue()) {
585 for (
const SCEV *S : MyGood)
587 for (
const SCEV *S : MyBad)
599void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
606 BaseRegs.push_back(Sum);
612 BaseRegs.push_back(Sum);
627bool Formula::isCanonical(
const Loop &L)
const {
628 assert((Scale == 0 || ScaledReg) &&
629 "ScaledReg must be non-null if Scale is non-zero");
632 return BaseRegs.size() <= 1;
637 if (Scale == 1 && BaseRegs.empty())
646 return none_of(BaseRegs, [&L](
const SCEV *S) {
657void Formula::canonicalize(
const Loop &L) {
661 if (BaseRegs.empty()) {
663 assert(ScaledReg &&
"Expected 1*reg => reg");
664 assert(Scale == 1 &&
"Expected 1*reg => reg");
665 BaseRegs.push_back(ScaledReg);
673 ScaledReg = BaseRegs.pop_back_val();
681 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
684 if (
I != BaseRegs.end())
694bool Formula::unscale() {
698 BaseRegs.push_back(ScaledReg);
703bool Formula::hasZeroEnd()
const {
704 if (UnfoldedOffset || BaseOffset)
706 if (BaseRegs.size() != 1 || ScaledReg)
711bool Formula::countsDownToZero()
const {
714 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
715 const APInt *StepInt;
723size_t Formula::getNumRegs()
const {
724 return !!ScaledReg + BaseRegs.size();
729Type *Formula::getType()
const {
730 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
731 ScaledReg ? ScaledReg->
getType() :
737void Formula::deleteBaseReg(
const SCEV *&S) {
738 if (&S != &BaseRegs.back())
744bool Formula::referencesReg(
const SCEV *S)
const {
750bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
751 const RegUseTracker &RegUses)
const {
753 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
755 for (
const SCEV *BaseReg : BaseRegs)
756 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
761#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
762void Formula::print(raw_ostream &OS)
const {
768 if (BaseOffset.isNonZero()) {
772 for (
const SCEV *BaseReg : BaseRegs) {
774 OS <<
"reg(" << *
BaseReg <<
')';
776 if (HasBaseReg && BaseRegs.empty()) {
778 OS <<
"**error: HasBaseReg**";
779 }
else if (!HasBaseReg && !BaseRegs.empty()) {
781 OS <<
"**error: !HasBaseReg**";
785 OS << Scale <<
"*reg(";
792 if (UnfoldedOffset.isNonZero()) {
793 if (!
First) OS <<
" + ";
794 OS <<
"imm(" << UnfoldedOffset <<
')';
835 bool IgnoreSignificantBits =
false) {
846 if (
RA.isAllOnes()) {
847 if (
LHS->getType()->isPointerTy())
860 const APInt &LA =
C->getAPInt();
869 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
871 IgnoreSignificantBits);
872 if (!Step)
return nullptr;
874 IgnoreSignificantBits);
875 if (!Start)
return nullptr;
888 for (
const SCEV *S :
Add->operands()) {
890 if (!
Op)
return nullptr;
918 for (
const SCEV *S :
Mul->operands()) {
921 IgnoreSignificantBits)) {
941 if (
C->getSignificantBits() <= 64) {
943 return Immediate::getFixed(
C->getSExtValue());
948 if (Result.isNonZero())
954 if (Result.isNonZero())
962 return Immediate::getScalable(
C->getSExtValue());
964 return Immediate::getZero();
999 if (
SI->getPointerOperand() == OperandVal)
1004 switch (
II->getIntrinsicID()) {
1005 case Intrinsic::memset:
1006 case Intrinsic::prefetch:
1007 case Intrinsic::masked_load:
1008 if (
II->getArgOperand(0) == OperandVal)
1011 case Intrinsic::masked_store:
1012 if (
II->getArgOperand(1) == OperandVal)
1015 case Intrinsic::memmove:
1016 case Intrinsic::memcpy:
1017 if (
II->getArgOperand(0) == OperandVal ||
1018 II->getArgOperand(1) == OperandVal)
1023 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1024 if (IntrInfo.
PtrVal == OperandVal)
1030 if (RMW->getPointerOperand() == OperandVal)
1033 if (CmpX->getPointerOperand() == OperandVal)
1042 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1046 AccessTy.MemTy = Ty;
1050 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1052 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1054 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1056 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1058 switch (
II->getIntrinsicID()) {
1059 case Intrinsic::prefetch:
1060 case Intrinsic::memset:
1061 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1062 AccessTy.MemTy = OperandVal->
getType();
1064 case Intrinsic::memmove:
1065 case Intrinsic::memcpy:
1067 AccessTy.MemTy = OperandVal->
getType();
1069 case Intrinsic::masked_load:
1070 AccessTy.AddrSpace =
1071 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1073 case Intrinsic::masked_store:
1074 AccessTy.AddrSpace =
1075 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1079 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1135 if (!Processed.
insert(S).second)
1139 for (
const SCEV *S :
Add->operands()) {
1146 const SCEV *Op0, *Op1;
1155 Value *UVal = U->getValue();
1159 if (UI && UI->
getOpcode() == Instruction::Mul &&
1192 const LSRUse &LU,
const Formula &
F);
1196 const LSRUse &LU,
const Formula &
F,
1203 const Loop *
L =
nullptr;
1204 ScalarEvolution *SE =
nullptr;
1205 const TargetTransformInfo *
TTI =
nullptr;
1206 TargetTransformInfo::LSRCost
C;
1211 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1213 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1231 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1232 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1233 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1234 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1240 return C.NumRegs == ~0
u;
1243 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1244 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1245 bool HardwareLoopProfitable,
1246 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1248 void print(raw_ostream &OS)
const;
1252 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1253 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1254 bool HardwareLoopProfitable);
1255 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1256 SmallPtrSetImpl<const SCEV *> &Regs,
1257 const LSRUse &LU,
bool HardwareLoopProfitable,
1258 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1269 Value *OperandValToReplace =
nullptr;
1279 Immediate
Offset = Immediate::getZero();
1281 LSRFixup() =
default;
1283 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1285 void print(raw_ostream &OS)
const;
1295 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1308 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1311 MemAccessTy AccessTy;
1317 Immediate MinOffset = Immediate::getFixedMax();
1318 Immediate MaxOffset = Immediate::getFixedMin();
1322 bool AllFixupsOutsideLoop =
true;
1327 bool AllFixupsUnconditional =
true;
1334 bool RigidFormula =
false;
1340 Type *WidestFixupType =
nullptr;
1348 SmallPtrSet<const SCEV *, 4> Regs;
1350 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1352 LSRFixup &getNewFixup() {
1353 Fixups.push_back(LSRFixup());
1357 void pushFixup(LSRFixup &f) {
1359 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1360 MaxOffset =
f.Offset;
1361 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1362 MinOffset =
f.Offset;
1365 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1366 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1367 bool InsertFormula(
const Formula &
F,
const Loop &L);
1368 void DeleteFormula(Formula &
F);
1369 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1371 void print(raw_ostream &OS)
const;
1378 LSRUse::KindType Kind, MemAccessTy AccessTy,
1379 GlobalValue *BaseGV, Immediate BaseOffset,
1380 bool HasBaseReg, int64_t Scale,
1381 Instruction *
Fixup =
nullptr);
1394 [&](
unsigned i,
const SCEV *
Reg) {
1395 return i + getSetupCost(Reg, Depth - 1);
1404void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1405 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1406 bool HardwareLoopProfitable) {
1411 if (AR->getLoop() != L) {
1418 if (!AR->getLoop()->contains(L)) {
1428 unsigned LoopCost = 1;
1437 F.BaseOffset.isFixed() &&
1438 *Step ==
F.BaseOffset.getFixedValue();
1443 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1450 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1451 HardwareLoopProfitable)
1453 C.AddRecCost += LoopCost;
1458 if (!Regs.
count(AR->getOperand(1))) {
1459 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1471 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1480void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1481 SmallPtrSetImpl<const SCEV *> &Regs,
1482 const LSRUse &LU,
bool HardwareLoopProfitable,
1483 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1484 if (LoserRegs && LoserRegs->
count(
Reg)) {
1489 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1490 if (LoserRegs && isLoser())
1495void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1496 const DenseSet<const SCEV *> &VisitedRegs,
1497 const LSRUse &LU,
bool HardwareLoopProfitable,
1498 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1501 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1503 unsigned PrevAddRecCost =
C.AddRecCost;
1504 unsigned PrevNumRegs =
C.NumRegs;
1505 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1506 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1507 if (VisitedRegs.
count(ScaledReg)) {
1511 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1516 for (
const SCEV *BaseReg :
F.BaseRegs) {
1517 if (VisitedRegs.
count(BaseReg)) {
1521 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1528 size_t NumBaseParts =
F.getNumRegs();
1529 if (NumBaseParts > 1)
1534 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1540 for (
const LSRFixup &
Fixup : LU.Fixups) {
1541 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1542 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1546 else if (
Offset.isNonZero())
1548 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1552 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1573 if (
C.NumRegs > TTIRegNum) {
1576 if (PrevNumRegs > TTIRegNum)
1577 C.Insns += (
C.NumRegs - PrevNumRegs);
1579 C.Insns += (
C.NumRegs - TTIRegNum);
1592 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1596 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1599 if (LU.Kind != LSRUse::ICmpZero)
1600 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1606 C.Insns = std::numeric_limits<unsigned>::max();
1607 C.NumRegs = std::numeric_limits<unsigned>::max();
1608 C.AddRecCost = std::numeric_limits<unsigned>::max();
1609 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1610 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1611 C.ImmCost = std::numeric_limits<unsigned>::max();
1612 C.SetupCost = std::numeric_limits<unsigned>::max();
1613 C.ScaleCost = std::numeric_limits<unsigned>::max();
1617bool Cost::isLess(
const Cost &
Other)
const {
1619 C.Insns !=
Other.C.Insns)
1620 return C.Insns <
Other.C.Insns;
1624#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1627 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1628 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1629 if (
C.AddRecCost != 0)
1630 OS <<
", with addrec cost " <<
C.AddRecCost;
1631 if (
C.NumIVMuls != 0)
1632 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1633 << (
C.NumIVMuls == 1 ?
"" :
"s");
1634 if (
C.NumBaseAdds != 0)
1635 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1636 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1637 if (
C.ScaleCost != 0)
1638 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1640 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1641 if (
C.SetupCost != 0)
1642 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1651bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1654 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1655 if (PN->getIncomingValue(i) == OperandValToReplace &&
1656 L->contains(PN->getIncomingBlock(i)))
1661 return !
L->contains(UserInst);
1664#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1665void LSRFixup::print(raw_ostream &OS)
const {
1670 Store->getOperand(0)->printAsOperand(OS,
false);
1676 OS <<
", OperandValToReplace=";
1679 for (
const Loop *PIL : PostIncLoops) {
1680 OS <<
", PostIncLoop=";
1681 PIL->getHeader()->printAsOperand(OS,
false);
1685 OS <<
", Offset=" <<
Offset;
1695bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1697 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1704float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1706 for (
const Formula &
F : Formulae)
1707 if (
F.referencesReg(
Reg))
1709 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1714bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1715 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1717 if (!Formulae.empty() && RigidFormula)
1721 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1729 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1730 "Zero allocated in a scaled register!");
1732 for (
const SCEV *BaseReg :
F.BaseRegs)
1733 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1737 Formulae.push_back(
F);
1748void LSRUse::DeleteFormula(Formula &
F) {
1749 if (&
F != &Formulae.back())
1751 Formulae.pop_back();
1755void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1757 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1759 for (
const Formula &
F : Formulae) {
1760 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1765 for (
const SCEV *S : OldRegs)
1767 RegUses.dropRegister(S, LUIdx);
1770#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1771void LSRUse::print(raw_ostream &OS)
const {
1772 OS <<
"LSR Use: Kind=";
1774 case Basic: OS <<
"Basic";
break;
1775 case Special: OS <<
"Special";
break;
1776 case ICmpZero: OS <<
"ICmpZero";
break;
1778 OS <<
"Address of ";
1782 OS << *AccessTy.MemTy;
1785 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1788 OS <<
", Offsets={";
1789 bool NeedComma =
false;
1790 for (
const LSRFixup &
Fixup : Fixups) {
1791 if (NeedComma) OS <<
',';
1797 if (AllFixupsOutsideLoop)
1798 OS <<
", all-fixups-outside-loop";
1800 if (AllFixupsUnconditional)
1801 OS <<
", all-fixups-unconditional";
1803 if (WidestFixupType)
1804 OS <<
", widest fixup type: " << *WidestFixupType;
1813 LSRUse::KindType Kind, MemAccessTy AccessTy,
1815 bool HasBaseReg, int64_t Scale,
1818 case LSRUse::Address: {
1819 int64_t FixedOffset =
1820 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1821 int64_t ScalableOffset =
1822 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1823 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1824 HasBaseReg, Scale, AccessTy.AddrSpace,
1825 Fixup, ScalableOffset);
1827 case LSRUse::ICmpZero:
1834 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1839 if (Scale != 0 && Scale != -1)
1844 if (BaseOffset.isNonZero()) {
1847 if (BaseOffset.isScalable())
1857 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1858 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1866 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1868 case LSRUse::Special:
1870 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1877 Immediate MinOffset, Immediate MaxOffset,
1878 LSRUse::KindType Kind, MemAccessTy AccessTy,
1880 bool HasBaseReg, int64_t Scale) {
1881 if (BaseOffset.isNonZero() &&
1882 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1883 BaseOffset.isScalable() != MaxOffset.isScalable()))
1886 int64_t
Base = BaseOffset.getKnownMinValue();
1887 int64_t Min = MinOffset.getKnownMinValue();
1888 int64_t Max = MaxOffset.getKnownMinValue();
1891 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1894 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1897 HasBaseReg, Scale) &&
1903 Immediate MinOffset, Immediate MaxOffset,
1904 LSRUse::KindType Kind, MemAccessTy AccessTy,
1905 const Formula &
F,
const Loop &L) {
1913 assert((
F.isCanonical(L) ||
F.Scale != 0));
1915 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1920 Immediate MaxOffset, LSRUse::KindType Kind,
1922 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1925 BaseOffset, HasBaseReg, Scale) ||
1930 BaseGV, BaseOffset,
true, 0));
1934 Immediate MaxOffset, LSRUse::KindType Kind,
1935 MemAccessTy AccessTy,
const Formula &
F) {
1936 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1937 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1943 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1945 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1949 const LSRUse &LU,
const Formula &
F) {
1951 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1952 for (
const LSRFixup &
Fixup : LU.Fixups)
1954 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1955 F.Scale,
Fixup.UserInst))
1961 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1966 const LSRUse &LU,
const Formula &
F,
1975 return F.Scale != 1;
1978 case LSRUse::Address: {
1980 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1981 if (
F.BaseOffset.isScalable()) {
1982 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1983 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1985 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1986 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1990 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1993 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1996 "Legal addressing mode has an illegal cost!");
1997 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1999 case LSRUse::ICmpZero:
2001 case LSRUse::Special:
2011 LSRUse::KindType Kind, MemAccessTy AccessTy,
2015 if (BaseOffset.isZero() && !BaseGV)
2020 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2024 if (!HasBaseReg && Scale == 1) {
2034 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2044 Immediate MaxOffset, LSRUse::KindType Kind,
2045 MemAccessTy AccessTy,
const SCEV *S,
2048 if (S->
isZero())
return true;
2056 if (!S->
isZero())
return false;
2059 if (BaseOffset.isZero() && !BaseGV)
2062 if (BaseOffset.isScalable())
2067 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2070 BaseOffset, HasBaseReg, Scale);
2087 const SCEV *IncExpr;
2089 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2090 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2097 const SCEV *ExprBase =
nullptr;
2099 IVChain() =
default;
2100 IVChain(
const IVInc &Head,
const SCEV *
Base)
2101 : Incs(1, Head), ExprBase(
Base) {}
2106 const_iterator
begin()
const {
2108 return std::next(Incs.
begin());
2110 const_iterator
end()
const {
2115 bool hasIncs()
const {
return Incs.
size() >= 2; }
2124 bool isProfitableIncrement(
const SCEV *OperExpr,
2125 const SCEV *IncExpr,
2133 SmallPtrSet<Instruction*, 4> FarUsers;
2134 SmallPtrSet<Instruction*, 4> NearUsers;
2140 ScalarEvolution &SE;
2143 AssumptionCache &AC;
2144 TargetLibraryInfo &TLI;
2145 const TargetTransformInfo &
TTI;
2147 MemorySSAUpdater *MSSAU;
2151 bool HardwareLoopProfitable =
false;
2165 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2172 SmallSetVector<Type *, 4> Types;
2178 RegUseTracker RegUses;
2183 static const unsigned MaxChains = 8;
2189 SmallPtrSet<Use*, MaxChains> IVIncSet;
2192 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2198 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2200 void OptimizeShadowIV();
2201 bool FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse);
2203 void OptimizeLoopTermCond();
2205 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2206 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2207 void FinalizeChain(IVChain &Chain);
2208 void CollectChains();
2209 void GenerateIVChain(
const IVChain &Chain,
2210 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2212 void CollectInterestingTypesAndFactors();
2213 void CollectFixupsAndInitialFormulae();
2216 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2219 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2220 LSRUse::KindType Kind, MemAccessTy AccessTy);
2222 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2223 MemAccessTy AccessTy);
2225 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2227 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2229 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2230 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2231 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2232 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2233 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2235 void CollectLoopInvariantFixupsAndFormulae();
2237 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2238 unsigned Depth = 0);
2240 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2242 size_t Idx,
bool IsScaledReg =
false);
2243 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2244 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2245 const Formula &
Base,
size_t Idx,
2246 bool IsScaledReg =
false);
2247 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2248 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2249 const Formula &
Base,
2250 const SmallVectorImpl<Immediate> &Worklist,
2251 size_t Idx,
bool IsScaledReg =
false);
2252 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2253 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2254 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2255 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2256 void GenerateCrossUseConstantOffsets();
2257 void GenerateAllReuseFormulae();
2259 void FilterOutUndesirableDedicatedRegisters();
2261 size_t EstimateSearchSpaceComplexity()
const;
2262 void NarrowSearchSpaceByDetectingSupersets();
2263 void NarrowSearchSpaceByCollapsingUnrolledCode();
2264 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2265 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2266 void NarrowSearchSpaceByFilterPostInc();
2267 void NarrowSearchSpaceByDeletingCostlyFormulas();
2268 void NarrowSearchSpaceByPickingWinnerRegs();
2269 void NarrowSearchSpaceUsingHeuristics();
2271 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2273 SmallVectorImpl<const Formula *> &Workspace,
2274 const Cost &CurCost,
2275 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2276 DenseSet<const SCEV *> &VisitedRegs)
const;
2277 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2281 const SmallVectorImpl<Instruction *> &Inputs)
const;
2284 const LSRUse &LU)
const;
2286 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2288 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2289 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2291 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2292 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2293 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2294 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2297 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2298 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2299 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2301 bool getChanged()
const {
return Changed; }
2302 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2303 return ScalarEvolutionIVs;
2306 void print_factors_and_types(raw_ostream &OS)
const;
2307 void print_fixups(raw_ostream &OS)
const;
2308 void print_uses(raw_ostream &OS)
const;
2309 void print(raw_ostream &OS)
const;
2317void LSRInstance::OptimizeShadowIV() {
2327 Type *DestTy =
nullptr;
2328 bool IsSigned =
false;
2344 DestTy = UCast->getDestTy();
2348 DestTy = SCast->getDestTy();
2350 if (!DestTy)
continue;
2370 if (Mantissa == -1)
continue;
2374 unsigned Entry, Latch;
2384 if (!Init)
continue;
2385 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2389 BinaryOperator *Incr =
2391 if (!Incr)
continue;
2392 if (Incr->
getOpcode() != Instruction::Add
2393 && Incr->
getOpcode() != Instruction::Sub)
2397 ConstantInt *
C =
nullptr;
2409 if (!
C->getValue().isStrictlyPositive())
2417 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2419 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2420 : Instruction::FSub,
2437bool LSRInstance::FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse) {
2438 for (IVStrideUse &U : IU)
2439 if (
U.getUser() ==
Cond) {
2497Instruction *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2512 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2513 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2519 const SCEVNAryExpr *
Max =
nullptr;
2521 Pred = ICmpInst::ICMP_SLE;
2524 Pred = ICmpInst::ICMP_SLT;
2527 Pred = ICmpInst::ICMP_ULT;
2536 if (
Max->getNumOperands() != 2)
2539 const SCEV *MaxLHS =
Max->getOperand(0);
2540 const SCEV *MaxRHS =
Max->getOperand(1);
2545 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2556 "Loop condition operand is an addrec in a different loop!");
2560 Value *NewRHS =
nullptr;
2561 if (ICmpInst::isTrueWhenEqual(Pred)) {
2565 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2566 NewRHS = BO->getOperand(0);
2569 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2570 NewRHS = BO->getOperand(0);
2578 NewRHS = SU->getValue();
2590 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2591 Cond->getOperand(0), NewRHS,
"scmp");
2595 Cond->replaceAllUsesWith(NewCond);
2598 Cond->eraseFromParent();
2600 if (
Cmp->use_empty()) {
2602 Cmp->eraseFromParent();
2609LSRInstance::OptimizeLoopTermCond() {
2610 SmallPtrSet<Instruction *, 4> PostIncs;
2625 SmallVector<BasicBlock*, 8> ExitingBlocks;
2626 L->getExitingBlocks(ExitingBlocks);
2634 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2656 IVStrideUse *CondUse =
nullptr;
2657 if (!FindIVUserForCond(
Cond, CondUse))
2667 Cond = OptimizeMax(Cmp, CondUse);
2672 if (!DT.
dominates(ExitingBlock, LatchBlock))
2677 if (LatchBlock != ExitingBlock)
2678 for (
const IVStrideUse &UI : IU)
2681 if (&UI != CondUse &&
2685 const SCEV *
A = IU.getStride(*CondUse, L);
2686 const SCEV *
B = IU.getStride(UI, L);
2687 if (!
A || !
B)
continue;
2696 if (
const SCEVConstant *
D =
2698 const ConstantInt *
C =
D->getValue();
2700 if (
C->isOne() ||
C->isMinusOne())
2701 goto decline_post_inc;
2703 if (
C->getValue().getSignificantBits() >= 64 ||
2704 C->getValue().isMinSignedValue())
2705 goto decline_post_inc;
2708 MemAccessTy AccessTy =
2710 int64_t Scale =
C->getSExtValue();
2714 AccessTy.AddrSpace))
2715 goto decline_post_inc;
2720 AccessTy.AddrSpace))
2721 goto decline_post_inc;
2726 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2734 if (
Cond->hasOneUse()) {
2740 Cond->setName(
L->getHeader()->getName() +
".termcond");
2762 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2763 for (Instruction *Inst : PostIncs)
2769bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2770 bool HasBaseReg, LSRUse::KindType Kind,
2771 MemAccessTy AccessTy) {
2772 Immediate NewMinOffset = LU.MinOffset;
2773 Immediate NewMaxOffset = LU.MaxOffset;
2774 MemAccessTy NewAccessTy = AccessTy;
2779 if (LU.Kind != Kind)
2785 if (Kind == LSRUse::Address) {
2786 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2787 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2788 AccessTy.AddrSpace);
2793 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2795 LU.MaxOffset - NewOffset, HasBaseReg))
2797 NewMinOffset = NewOffset;
2798 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2800 NewOffset - LU.MinOffset, HasBaseReg))
2802 NewMaxOffset = NewOffset;
2808 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2809 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2813 LU.MinOffset = NewMinOffset;
2814 LU.MaxOffset = NewMaxOffset;
2815 LU.AccessTy = NewAccessTy;
2822std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2823 LSRUse::KindType Kind,
2824 MemAccessTy AccessTy) {
2825 const SCEV *
Copy = Expr;
2832 Offset = Immediate::getFixed(0);
2835 std::pair<UseMapTy::iterator, bool>
P =
2836 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2839 size_t LUIdx =
P.first->second;
2840 LSRUse &LU =
Uses[LUIdx];
2841 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2843 return std::make_pair(LUIdx,
Offset);
2847 size_t LUIdx =
Uses.size();
2848 P.first->second = LUIdx;
2849 Uses.push_back(LSRUse(Kind, AccessTy));
2850 LSRUse &LU =
Uses[LUIdx];
2854 return std::make_pair(LUIdx,
Offset);
2858void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2859 if (&LU != &
Uses.back())
2864 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2870LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2871 const LSRUse &OrigLU) {
2873 for (LSRUse &LU :
Uses) {
2879 if (&LU != &OrigLU &&
2880 LU.Kind != LSRUse::ICmpZero &&
2881 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2882 LU.WidestFixupType == OrigLU.WidestFixupType &&
2883 LU.HasFormulaWithSameRegs(OrigF)) {
2885 for (
const Formula &
F : LU.Formulae) {
2888 if (
F.BaseRegs == OrigF.BaseRegs &&
2889 F.ScaledReg == OrigF.ScaledReg &&
2890 F.BaseGV == OrigF.BaseGV &&
2891 F.Scale == OrigF.Scale &&
2892 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2893 if (
F.BaseOffset.isZero())
2908void LSRInstance::CollectInterestingTypesAndFactors() {
2909 SmallSetVector<const SCEV *, 4> Strides;
2913 for (
const IVStrideUse &U : IU) {
2914 const SCEV *Expr = IU.getExpr(U);
2932 }
while (!Worklist.
empty());
2936 for (SmallSetVector<const SCEV *, 4>::const_iterator
2938 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2939 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2940 const SCEV *OldStride = *
I;
2941 const SCEV *NewStride = *NewStrideIter;
2951 if (
const SCEVConstant *Factor =
2954 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2955 Factors.insert(Factor->getAPInt().getSExtValue());
2956 }
else if (
const SCEVConstant *Factor =
2960 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2961 Factors.insert(Factor->getAPInt().getSExtValue());
2967 if (Types.size() == 1)
2979 for(; OI != OE; ++OI) {
2998 return Trunc->getOperand(0);
3031 if (SubExpr->getSCEVType() ==
scAddExpr)
3034 if (SubExpr->getSCEVType() !=
scMulExpr)
3050bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3051 const SCEV *IncExpr,
3052 ScalarEvolution &SE) {
3065 SmallPtrSet<const SCEV*, 8> Processed;
3086 if (!Chain.hasIncs())
3089 if (!
Users.empty()) {
3090 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3092 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3095 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3104 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3107 const SCEV *LastIncExpr =
nullptr;
3108 unsigned NumConstIncrements = 0;
3109 unsigned NumVarIncrements = 0;
3110 unsigned NumReusedIncrements = 0;
3112 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3115 for (
const IVInc &Inc : Chain) {
3116 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3118 if (Inc.IncExpr->isZero())
3124 ++NumConstIncrements;
3128 if (Inc.IncExpr == LastIncExpr)
3129 ++NumReusedIncrements;
3133 LastIncExpr = Inc.IncExpr;
3138 if (NumConstIncrements > 1)
3145 cost += NumVarIncrements;
3149 cost -= NumReusedIncrements;
3151 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3158void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3159 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3163 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3164 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3168 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3169 const SCEV *LastIncExpr =
nullptr;
3170 for (; ChainIdx < NChains; ++ChainIdx) {
3171 IVChain &Chain = IVChainVec[ChainIdx];
3189 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3190 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3194 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3195 LastIncExpr = IncExpr;
3201 if (ChainIdx == NChains) {
3208 LastIncExpr = OperExpr;
3215 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3217 ChainUsersVec.
resize(NChains);
3218 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3219 <<
") IV=" << *LastIncExpr <<
"\n");
3221 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3222 <<
") IV+" << *LastIncExpr <<
"\n");
3224 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3226 IVChain &Chain = IVChainVec[ChainIdx];
3228 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3230 if (!LastIncExpr->
isZero()) {
3231 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3240 for (User *U : IVOper->
users()) {
3246 IVChain::const_iterator IncIter = Chain.Incs.begin();
3247 IVChain::const_iterator IncEnd = Chain.Incs.end();
3248 for( ; IncIter != IncEnd; ++IncIter) {
3249 if (IncIter->UserInst == OtherUse)
3252 if (IncIter != IncEnd)
3257 && IU.isIVUserOrOperand(OtherUse)) {
3260 NearUsers.
insert(OtherUse);
3265 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3290void LSRInstance::CollectChains() {
3294 SmallVector<BasicBlock *,8> LatchPath;
3297 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3303 for (BasicBlock *BB :
reverse(LatchPath)) {
3304 for (Instruction &
I : *BB) {
3316 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3317 ChainIdx < NChains; ++ChainIdx) {
3318 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3321 SmallPtrSet<Instruction*, 4> UniqueOperands;
3324 while (IVOpIter != IVOpEnd) {
3326 if (UniqueOperands.
insert(IVOpInst).second)
3327 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3328 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3333 for (PHINode &PN :
L->getHeader()->phis()) {
3340 ChainInstruction(&PN, IncV, ChainUsersVec);
3343 unsigned ChainIdx = 0;
3344 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3345 UsersIdx < NChains; ++UsersIdx) {
3347 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3350 if (ChainIdx != UsersIdx)
3351 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3352 FinalizeChain(IVChainVec[ChainIdx]);
3355 IVChainVec.resize(ChainIdx);
3358void LSRInstance::FinalizeChain(IVChain &Chain) {
3359 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3360 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3362 for (
const IVInc &Inc : Chain) {
3364 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3365 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3366 IVIncSet.insert(UseI);
3374 Immediate IncOffset = Immediate::getZero();
3383 C->getSignificantBits() > 64)
3385 IncOffset = Immediate::getScalable(
C->getSExtValue());
3401void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3402 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3405 const IVInc &Head = Chain.Incs[0];
3410 Value *IVSrc =
nullptr;
3411 while (IVOpIter != IVOpEnd) {
3422 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3423 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3426 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3428 if (IVOpIter == IVOpEnd) {
3430 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3433 assert(IVSrc &&
"Failed to find IV chain source");
3438 const SCEV *LeftOverExpr =
nullptr;
3439 const SCEV *Accum = SE.
getZero(IntTy);
3443 for (
const IVInc &Inc : Chain) {
3446 InsertPt =
L->getLoopLatch()->getTerminator();
3450 Value *IVOper = IVSrc;
3451 if (!Inc.IncExpr->isZero()) {
3456 LeftOverExpr = LeftOverExpr ?
3457 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3461 bool FoundBase =
false;
3462 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3463 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3465 if (!Remainder->
isZero()) {
3467 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3468 const SCEV *IVOperExpr =
3470 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3479 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3482 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3485 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3489 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3492 LeftOverExpr =
nullptr;
3496 if (IVTy != OperTy) {
3498 "cannot extend a chained IV");
3500 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3502 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3509 for (PHINode &Phi :
L->getHeader()->phis()) {
3513 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3516 Value *IVOper = IVSrc;
3518 if (IVTy != PostIncTy) {
3520 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3521 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3522 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3524 Phi.replaceUsesOfWith(PostIncV, IVOper);
3530void LSRInstance::CollectFixupsAndInitialFormulae() {
3531 BranchInst *ExitBranch =
nullptr;
3532 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3535 SmallPtrSet<const SCEV *, 16> Regs;
3536 DenseSet<const SCEV *> VisitedRegs;
3537 DenseSet<size_t> VisitedLSRUse;
3539 for (
const IVStrideUse &U : IU) {
3544 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3545 if (IVIncSet.count(UseI)) {
3546 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3550 LSRUse::KindType
Kind = LSRUse::Basic;
3551 MemAccessTy AccessTy;
3553 Kind = LSRUse::Address;
3557 const SCEV *S = IU.getExpr(U);
3573 if (CI->isEquality()) {
3576 Value *
NV = CI->getOperand(1);
3577 if (NV ==
U.getOperandValToReplace()) {
3578 CI->setOperand(1, CI->getOperand(0));
3579 CI->setOperand(0, NV);
3580 NV = CI->getOperand(1);
3587 (!
NV->getType()->isPointerTy() ||
3594 Kind = LSRUse::ICmpZero;
3596 }
else if (
L->isLoopInvariant(NV) &&
3599 !
NV->getType()->isPointerTy()) {
3610 Kind = LSRUse::ICmpZero;
3617 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3618 if (Factors[i] != -1)
3619 Factors.insert(-(uint64_t)Factors[i]);
3625 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3626 size_t LUIdx =
P.first;
3628 LSRUse &LU =
Uses[LUIdx];
3631 LSRFixup &LF = LU.getNewFixup();
3632 LF.UserInst = UserInst;
3633 LF.OperandValToReplace =
U.getOperandValToReplace();
3634 LF.PostIncLoops = TmpPostIncLoops;
3636 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3637 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3640 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3642 F.initialMatch(S, L, SE);
3643 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3644 HardwareLoopProfitable);
3645 VisitedLSRUse.
insert(LUIdx);
3648 if (!LU.WidestFixupType ||
3651 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3654 if (LU.Formulae.empty()) {
3655 InsertInitialFormula(S, LU, LUIdx);
3656 CountRegisters(LU.Formulae.back(), LUIdx);
3665void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3669 LU.RigidFormula =
true;
3672 F.initialMatch(S, L, SE);
3673 bool Inserted = InsertFormula(LU, LUIdx,
F);
3674 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3680LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3681 LSRUse &LU,
size_t LUIdx) {
3683 F.BaseRegs.push_back(S);
3684 F.HasBaseReg =
true;
3685 bool Inserted = InsertFormula(LU, LUIdx,
F);
3686 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3690void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3692 RegUses.countRegister(
F.ScaledReg, LUIdx);
3693 for (
const SCEV *BaseReg :
F.BaseRegs)
3694 RegUses.countRegister(BaseReg, LUIdx);
3699bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3702 "Formula is illegal");
3704 if (!LU.InsertFormula(
F, *L))
3707 CountRegisters(
F, LUIdx);
3713bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3725LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3727 SmallPtrSet<const SCEV *, 32> Visited;
3734 while (!Worklist.
empty()) {
3738 if (!Visited.
insert(S).second)
3749 const Value *
V = US->getValue();
3752 if (
L->contains(Inst))
continue;
3756 for (
const Use &U :
V->uses()) {
3766 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3788 bool HasIncompatibleEHPTerminatedBlock =
false;
3790 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3791 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3792 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3793 HasIncompatibleEHPTerminatedBlock =
true;
3798 if (HasIncompatibleEHPTerminatedBlock) {
3821 unsigned OtherIdx = !
U.getOperandNo();
3822 Value *OtherOp = ICI->getOperand(OtherIdx);
3832 std::pair<size_t, Immediate>
P =
3833 getUse(S, LSRUse::Basic, MemAccessTy());
3834 size_t LUIdx =
P.first;
3836 LSRUse &LU =
Uses[LUIdx];
3837 LSRFixup &LF = LU.getNewFixup();
3838 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3839 LF.OperandValToReplace =
U;
3841 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3842 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3843 if (!LU.WidestFixupType ||
3846 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3847 InsertSupplementalFormula(US, LU, LUIdx);
3848 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3864 unsigned Depth = 0) {
3871 for (
const SCEV *S :
Add->operands()) {
3878 const SCEV *Start, *Step;
3883 if (Start->isZero())
3892 Remainder =
nullptr;
3894 if (Remainder != Start) {
3916 LSRUse &LU,
const SCEV *S,
const Loop *L,
3918 if (LU.Kind != LSRUse::Address ||
3919 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3925 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3934void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3935 const Formula &
Base,
3936 unsigned Depth,
size_t Idx,
3938 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3946 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3950 if (AddOps.
size() == 1)
3964 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3969 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3973 if (InnerAddOps.size() == 1 &&
3975 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3978 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
3983 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3992 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3995 F.ScaledReg =
nullptr;
3998 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3999 }
else if (IsScaledReg)
4000 F.ScaledReg = InnerSum;
4002 F.BaseRegs[Idx] = InnerSum;
4010 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4013 F.BaseRegs.push_back(*J);
4018 if (InsertFormula(LU, LUIdx,
F))
4025 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4031void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4033 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4038 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4039 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4041 if (
Base.Scale == 1)
4042 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4048void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4051 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4052 (
Base.UnfoldedOffset.isNonZero()) <=
4060 Formula NewBase =
Base;
4061 NewBase.BaseRegs.clear();
4062 Type *CombinedIntegerType =
nullptr;
4063 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4066 if (!CombinedIntegerType)
4068 Ops.push_back(BaseReg);
4071 NewBase.BaseRegs.push_back(BaseReg);
4075 if (
Ops.size() == 0)
4080 auto GenerateFormula = [&](
const SCEV *Sum) {
4081 Formula
F = NewBase;
4089 F.BaseRegs.push_back(Sum);
4091 (void)InsertFormula(LU, LUIdx,
F);
4095 if (
Ops.size() > 1) {
4102 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4103 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4105 NewBase.UnfoldedOffset.getFixedValue(),
true));
4106 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4112void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4113 const Formula &
Base,
size_t Idx,
4115 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4117 if (
G->isZero() || !GV)
4121 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4126 F.BaseRegs[Idx] =
G;
4127 (void)InsertFormula(LU, LUIdx,
F);
4131void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4134 if (
Base.BaseGV)
return;
4136 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4137 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4138 if (
Base.Scale == 1)
4139 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4144void LSRInstance::GenerateConstantOffsetsImpl(
4145 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4146 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4148 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4150 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4152 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4154 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4156 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4162 F.ScaledReg =
nullptr;
4164 F.deleteBaseReg(
F.BaseRegs[Idx]);
4166 }
else if (IsScaledReg)
4169 F.BaseRegs[Idx] = NewG;
4171 (void)InsertFormula(LU, LUIdx,
F);
4175 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4186 const APInt *StepInt;
4191 for (Immediate
Offset : Worklist) {
4193 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4199 for (Immediate
Offset : Worklist)
4203 if (
G->isZero() ||
Imm.isZero() ||
4204 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4207 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4208 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4213 F.BaseRegs[Idx] =
G;
4218 (void)InsertFormula(LU, LUIdx,
F);
4222void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4228 if (LU.MaxOffset != LU.MinOffset)
4231 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4232 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4233 if (
Base.Scale == 1)
4234 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4240void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4242 if (LU.Kind != LSRUse::ICmpZero)
return;
4250 if (LU.MinOffset != LU.MaxOffset)
return;
4253 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4255 for (
const SCEV *BaseReg :
Base.BaseRegs)
4256 if (
BaseReg->getType()->isPointerTy())
4258 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4261 for (int64_t Factor : Factors) {
4266 if (
Base.BaseOffset.isMin() && Factor == -1)
4269 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4271 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4272 assert(Factor != 0 &&
"Zero factor not expected!");
4273 if (NewBaseOffset.getFixedValue() / Factor !=
4274 Base.BaseOffset.getFixedValue())
4282 Immediate
Offset = LU.MinOffset;
4283 if (
Offset.isMin() && Factor == -1)
4286 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4294 F.BaseOffset = NewBaseOffset;
4301 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4303 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4306 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4320 if (
F.UnfoldedOffset.isNonZero()) {
4321 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4323 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4324 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4325 Base.UnfoldedOffset.getFixedValue())
4329 IntTy,
F.UnfoldedOffset.getFixedValue()))
4334 (void)InsertFormula(LU, LUIdx,
F);
4341void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4348 if (
Base.Scale != 0 && !
Base.unscale())
4351 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4354 for (int64_t Factor : Factors) {
4355 Base.Scale = Factor;
4356 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4358 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4362 if (LU.Kind == LSRUse::Basic &&
4363 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4364 LU.AccessTy,
Base) &&
4365 LU.AllFixupsOutsideLoop)
4366 LU.Kind = LSRUse::Special;
4372 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4373 Base.BaseOffset.isZero() && !
Base.BaseGV)
4376 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4378 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4379 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4384 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4385 if (!Quotient->isZero()) {
4388 F.ScaledReg = Quotient;
4389 F.deleteBaseReg(
F.BaseRegs[i]);
4393 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4394 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4398 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4400 (void)InsertFormula(LU, LUIdx,
F);
4416 const SCEV *Result =
nullptr;
4417 for (
auto &L :
Loops) {
4421 if (!New || (Result && New != Result))
4426 assert(Result &&
"failed to create expression");
4431void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4433 if (
Base.BaseGV)
return;
4443 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4446 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4450 for (
auto &LF : LU.Fixups)
4451 Loops.push_back(LF.PostIncLoops);
4453 for (
Type *SrcTy : Types) {
4462 const SCEV *NewScaledReg =
4464 if (!NewScaledReg || NewScaledReg->
isZero())
4466 F.ScaledReg = NewScaledReg;
4468 bool HasZeroBaseReg =
false;
4469 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4470 const SCEV *NewBaseReg =
4472 if (!NewBaseReg || NewBaseReg->
isZero()) {
4473 HasZeroBaseReg =
true;
4483 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4487 (void)InsertFormula(LU, LUIdx,
F);
4500 const SCEV *OrigReg;
4502 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4503 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4505 void print(raw_ostream &OS)
const;
4511#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4512void WorkItem::print(raw_ostream &OS)
const {
4513 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4514 <<
" , add offset " <<
Imm;
4524void LSRInstance::GenerateCrossUseConstantOffsets() {
4526 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4528 DenseMap<const SCEV *, ImmMapTy>
Map;
4529 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4531 for (
const SCEV *Use : RegUses) {
4534 auto Pair =
Map.try_emplace(
Reg);
4537 Pair.first->second.insert(std::make_pair(Imm, Use));
4538 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4545 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4547 for (
const SCEV *
Reg : Sequence) {
4548 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4551 if (Imms.size() == 1)
4555 for (
const auto &Entry
4557 <<
' ' <<
Entry.first;
4561 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4563 const SCEV *OrigReg = J->second;
4565 Immediate JImm = J->first;
4566 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4569 UsedByIndicesMap[
Reg].
count() == 1) {
4577 Immediate
First = Imms.begin()->first;
4578 Immediate
Last = std::prev(Imms.end())->first;
4579 if (!
First.isCompatibleImmediate(
Last)) {
4586 bool Scalable =
First.isScalable() ||
Last.isScalable();
4587 int64_t FI =
First.getKnownMinValue();
4588 int64_t LI =
Last.getKnownMinValue();
4591 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4594 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4595 ImmMapTy::const_iterator OtherImms[] = {
4596 Imms.begin(), std::prev(Imms.end()),
4597 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4598 for (
const auto &M : OtherImms) {
4599 if (M == J || M == JE)
continue;
4600 if (!JImm.isCompatibleImmediate(
M->first))
4604 Immediate
Imm = JImm.subUnsigned(
M->first);
4605 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4607 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4608 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4615 UsedByIndicesMap.
clear();
4616 UniqueItems.
clear();
4619 for (
const WorkItem &WI : WorkItems) {
4620 size_t LUIdx = WI.LUIdx;
4621 LSRUse &LU =
Uses[LUIdx];
4622 Immediate
Imm = WI.Imm;
4623 const SCEV *OrigReg = WI.OrigReg;
4626 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4630 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4631 Formula
F = LU.Formulae[
L];
4638 if (
F.ScaledReg == OrigReg) {
4639 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4641 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4643 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4644 if (
F.referencesReg(S))
4647 NewF.BaseOffset =
Offset;
4648 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4651 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4660 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4662 if (
C->getValue()->isNegative() !=
4663 (NewF.BaseOffset.isLessThanZero()) &&
4664 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4665 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4670 NewF.canonicalize(*this->L);
4671 (void)InsertFormula(LU, LUIdx, NewF);
4674 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4676 if (BaseReg != OrigReg)
4679 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4680 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4681 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4683 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4685 LU.Kind, LU.AccessTy, NewF)) {
4689 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4693 NewF.UnfoldedOffset = NewUnfoldedOffset;
4695 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4700 for (
const SCEV *NewReg : NewF.BaseRegs)
4702 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4704 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4706 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4707 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4710 NewF.BaseOffset.getFixedValue()))
4715 NewF.canonicalize(*this->L);
4716 (void)InsertFormula(LU, LUIdx, NewF);
4727LSRInstance::GenerateAllReuseFormulae() {
4730 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4731 LSRUse &LU =
Uses[LUIdx];
4732 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4733 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4734 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4735 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4737 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4738 LSRUse &LU =
Uses[LUIdx];
4739 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4740 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4741 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4742 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4743 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4744 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4745 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4746 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4748 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4749 LSRUse &LU =
Uses[LUIdx];
4750 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4751 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4754 GenerateCrossUseConstantOffsets();
4757 "After generating reuse formulae:\n";
4758 print_uses(
dbgs()));
4763void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4764 DenseSet<const SCEV *> VisitedRegs;
4765 SmallPtrSet<const SCEV *, 16> Regs;
4766 SmallPtrSet<const SCEV *, 16> LoserRegs;
4768 bool ChangedFormulae =
false;
4773 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4775 BestFormulaeTy BestFormulae;
4777 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4778 LSRUse &LU =
Uses[LUIdx];
4783 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4784 FIdx != NumForms; ++FIdx) {
4785 Formula &
F = LU.Formulae[FIdx];
4796 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4798 if (CostF.isLoser()) {
4810 for (
const SCEV *
Reg :
F.BaseRegs) {
4811 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4815 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4816 Key.push_back(
F.ScaledReg);
4821 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4822 BestFormulae.insert(std::make_pair(
Key, FIdx));
4826 Formula &Best = LU.Formulae[
P.first->second];
4828 Cost CostBest(L, SE,
TTI, AMK);
4830 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4831 HardwareLoopProfitable);
4832 if (CostF.isLess(CostBest))
4836 " in favor of formula ";
4837 Best.print(
dbgs());
dbgs() <<
'\n');
4840 ChangedFormulae =
true;
4842 LU.DeleteFormula(
F);
4850 LU.RecomputeRegs(LUIdx, RegUses);
4853 BestFormulae.clear();
4858 "After filtering out undesirable candidates:\n";
4866size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4868 for (
const LSRUse &LU :
Uses) {
4869 size_t FSize = LU.Formulae.size();
4884void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4888 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4889 "which use a superset of registers used by other "
4892 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4893 LSRUse &LU =
Uses[LUIdx];
4895 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4896 Formula &
F = LU.Formulae[i];
4897 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4903 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4909 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4910 (uint64_t)
C->getValue()->getSExtValue());
4911 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4912 (
I -
F.BaseRegs.begin()));
4913 if (LU.HasFormulaWithSameRegs(NewF)) {
4916 LU.DeleteFormula(
F);
4927 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4928 (
I -
F.BaseRegs.begin()));
4929 if (LU.HasFormulaWithSameRegs(NewF)) {
4932 LU.DeleteFormula(
F);
4943 LU.RecomputeRegs(LUIdx, RegUses);
4952void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4957 dbgs() <<
"The search space is too complex.\n"
4958 "Narrowing the search space by assuming that uses separated "
4959 "by a constant offset will use the same registers.\n");
4963 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4964 LSRUse &LU =
Uses[LUIdx];
4965 for (
const Formula &
F : LU.Formulae) {
4966 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4969 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4973 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4974 LU.Kind, LU.AccessTy))
4979 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4980 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4983 for (LSRFixup &
Fixup : LU.Fixups) {
4984 Fixup.Offset +=
F.BaseOffset;
4985 LUThatHas->pushFixup(
Fixup);
4991 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4992 Formula &
F = LUThatHas->Formulae[i];
4993 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4994 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4996 LUThatHas->DeleteFormula(
F);
5004 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
5007 DeleteUse(LU, LUIdx);
5020void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5024 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5025 "undesirable dedicated registers.\n");
5027 FilterOutUndesirableDedicatedRegisters();
5042void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5047 dbgs() <<
"The search space is too complex.\n"
5048 "Narrowing the search space by choosing the best Formula "
5049 "from the Formulae with the same Scale and ScaledReg.\n");
5052 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5054 BestFormulaeTy BestFormulae;
5056 bool ChangedFormulae =
false;
5058 DenseSet<const SCEV *> VisitedRegs;
5059 SmallPtrSet<const SCEV *, 16> Regs;
5061 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5062 LSRUse &LU =
Uses[LUIdx];
5067 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5072 size_t FARegNum = 0;
5073 for (
const SCEV *
Reg : FA.BaseRegs) {
5074 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5075 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5077 size_t FBRegNum = 0;
5078 for (
const SCEV *
Reg : FB.BaseRegs) {
5079 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5080 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5082 if (FARegNum != FBRegNum)
5083 return FARegNum < FBRegNum;
5090 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5092 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5093 return CostFA.isLess(CostFB);
5097 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5099 Formula &
F = LU.Formulae[FIdx];
5102 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5106 Formula &Best = LU.Formulae[
P.first->second];
5107 if (IsBetterThan(
F, Best))
5111 " in favor of formula ";
5112 Best.print(
dbgs());
dbgs() <<
'\n');
5114 ChangedFormulae =
true;
5116 LU.DeleteFormula(
F);
5122 LU.RecomputeRegs(LUIdx, RegUses);
5125 BestFormulae.clear();
5130 "After filtering out undesirable candidates:\n";
5137void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5144 "Narrowing the search space by choosing the lowest "
5145 "register Formula for PostInc Uses.\n");
5147 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5148 LSRUse &LU =
Uses[LUIdx];
5150 if (LU.Kind != LSRUse::Address)
5156 size_t MinRegs = std::numeric_limits<size_t>::max();
5157 for (
const Formula &
F : LU.Formulae)
5158 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5161 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5163 Formula &
F = LU.Formulae[FIdx];
5164 if (
F.getNumRegs() > MinRegs) {
5167 LU.DeleteFormula(
F);
5174 LU.RecomputeRegs(LUIdx, RegUses);
5225void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5234 SmallPtrSet<const SCEV *, 4> UniqRegs;
5238 DenseMap <const SCEV *, float> RegNumMap;
5239 for (
const SCEV *
Reg : RegUses) {
5243 for (
const LSRUse &LU :
Uses) {
5244 if (!LU.Regs.count(
Reg))
5246 float P = LU.getNotSelectedProbability(
Reg);
5252 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5256 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5259 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5260 LSRUse &LU =
Uses[LUIdx];
5262 if (LU.Formulae.size() < 2)
5267 float FMinRegNum = LU.Formulae[0].getNumRegs();
5268 float FMinARegNum = LU.Formulae[0].getNumRegs();
5270 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5271 Formula &
F = LU.Formulae[i];
5274 for (
const SCEV *BaseReg :
F.BaseRegs) {
5275 if (UniqRegs.
count(BaseReg))
5277 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5280 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5282 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5283 if (!UniqRegs.
count(ScaledReg)) {
5285 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5288 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5291 if (FMinRegNum > FRegNum ||
5292 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5293 FMinRegNum = FRegNum;
5294 FMinARegNum = FARegNum;
5299 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5301 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5302 while (LU.Formulae.size() != 1) {
5305 LU.Formulae.pop_back();
5307 LU.RecomputeRegs(LUIdx, RegUses);
5308 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5309 Formula &
F = LU.Formulae[0];
5325 MemAccessTy AccessType) {
5335 return TTI.isLegalAddressingMode(
5336 AccessType.MemTy,
nullptr,
5337 Diff->getSExtValue(),
5338 true, 0, AccessType.AddrSpace) &&
5339 !
TTI.isLegalAddressingMode(
5340 AccessType.MemTy,
nullptr,
5341 -Diff->getSExtValue(),
5342 true, 0, AccessType.AddrSpace);
5348void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5351 SmallPtrSet<const SCEV *, 4> Taken;
5359 const SCEV *Best =
nullptr;
5360 unsigned BestNum = 0;
5361 for (
const SCEV *
Reg : RegUses) {
5366 BestNum = RegUses.getUsedByIndices(
Reg).count();
5368 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5369 if (
Count > BestNum) {
5377 if (
Count == BestNum) {
5378 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5379 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5381 Uses[LUIdx].AccessTy)) {
5388 assert(Best &&
"Failed to find best LSRUse candidate");
5390 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5391 <<
" will yield profitable reuse.\n");
5396 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5397 LSRUse &LU =
Uses[LUIdx];
5398 if (!LU.Regs.count(Best))
continue;
5401 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5402 Formula &
F = LU.Formulae[i];
5403 if (!
F.referencesReg(Best)) {
5405 LU.DeleteFormula(
F);
5409 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5415 LU.RecomputeRegs(LUIdx, RegUses);
5426void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5427 NarrowSearchSpaceByDetectingSupersets();
5428 NarrowSearchSpaceByCollapsingUnrolledCode();
5429 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5431 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5432 NarrowSearchSpaceByFilterPostInc();
5434 NarrowSearchSpaceByDeletingCostlyFormulas();
5436 NarrowSearchSpaceByPickingWinnerRegs();
5440void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5442 SmallVectorImpl<const Formula *> &Workspace,
5443 const Cost &CurCost,
5444 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5445 DenseSet<const SCEV *> &VisitedRegs)
const {
5456 const LSRUse &LU =
Uses[Workspace.
size()];
5462 SmallSetVector<const SCEV *, 4> ReqRegs;
5463 for (
const SCEV *S : CurRegs)
5464 if (LU.Regs.count(S))
5467 SmallPtrSet<const SCEV *, 16> NewRegs;
5468 Cost NewCost(L, SE,
TTI, AMK);
5469 for (
const Formula &
F : LU.Formulae) {
5477 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5478 for (
const SCEV *
Reg : ReqRegs) {
5479 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5482 if (NumReqRegsToFind == 0)
5486 if (NumReqRegsToFind != 0) {
5497 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5498 if (NewCost.isLess(SolutionCost)) {
5500 if (Workspace.
size() !=
Uses.size()) {
5501 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5502 NewRegs, VisitedRegs);
5503 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5504 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5507 dbgs() <<
".\nRegs:\n";
5508 for (
const SCEV *S : NewRegs)
dbgs()
5509 <<
"- " << *S <<
"\n";
5512 SolutionCost = NewCost;
5513 Solution = Workspace;
5522void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5524 Cost SolutionCost(L, SE,
TTI, AMK);
5525 SolutionCost.Lose();
5526 Cost CurCost(L, SE,
TTI, AMK);
5527 SmallPtrSet<const SCEV *, 16> CurRegs;
5528 DenseSet<const SCEV *> VisitedRegs;
5532 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5533 CurRegs, VisitedRegs);
5534 if (Solution.
empty()) {
5541 "The chosen solution requires ";
5542 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5543 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5548 Solution[i]->print(
dbgs());
5554 const bool EnableDropUnprofitableSolution = [&] {
5566 if (BaselineCost.isLess(SolutionCost)) {
5567 if (!EnableDropUnprofitableSolution)
5569 dbgs() <<
"Baseline is more profitable than chosen solution, "
5570 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5573 "solution, dropping LSR solution.\n";);
5584 const SmallVectorImpl<Instruction *> &Inputs)
5588 bool AllDominate =
true;
5595 for (Instruction *Inst : Inputs) {
5596 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5597 AllDominate =
false;
5602 if (Tentative->
getParent() == Inst->getParent() &&
5603 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5613 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5614 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5618 if (!Rung)
return IP;
5619 Rung = Rung->getIDom();
5620 if (!Rung)
return IP;
5621 IDom = Rung->getBlock();
5624 const Loop *IDomLoop = LI.getLoopFor(IDom);
5625 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5626 if (IDomDepth <= IPLoopDepth &&
5627 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5644 SmallVector<Instruction *, 4> Inputs;
5647 if (LU.Kind == LSRUse::ICmpZero)
5648 if (Instruction *
I =
5651 if (LF.PostIncLoops.
count(L)) {
5652 if (LF.isUseFullyOutsideLoop(L))
5653 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5659 for (
const Loop *PIL : LF.PostIncLoops) {
5660 if (PIL == L)
continue;
5665 if (!ExitingBlocks.
empty()) {
5667 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5674 "Insertion point must be a normal instruction");
5684 while (IP->isEHPad()) ++IP;
5689 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5697Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5699 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5700 if (LU.RigidFormula)
5701 return LF.OperandValToReplace;
5705 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5710 Rewriter.setPostInc(LF.PostIncLoops);
5715 Type *Ty =
F.getType();
5729 for (
const SCEV *
Reg :
F.BaseRegs) {
5730 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5738 Value *ICmpScaledV =
nullptr;
5740 const SCEV *ScaledS =
F.ScaledReg;
5746 if (LU.Kind == LSRUse::ICmpZero) {
5756 "The only scale supported by ICmpZero uses is -1!");
5757 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5765 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5775 Ops.push_back(ScaledS);
5801 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5802 "Expanding mismatched offsets\n");
5804 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5805 if (
Offset.isNonZero()) {
5806 if (LU.Kind == LSRUse::ICmpZero) {
5813 IntTy, -(uint64_t)
Offset.getFixedValue(),
true);
5822 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5827 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5828 if (UnfoldedOffset.isNonZero()) {
5830 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5834 const SCEV *FullS =
Ops.empty() ?
5845 if (LU.Kind == LSRUse::ICmpZero) {
5849 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5850 "a scale at the same time!");
5851 if (
F.Scale == -1) {
5852 if (ICmpScaledV->
getType() != OpTy) {
5862 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5863 "ICmp does not support folding a global value and "
5864 "a scale at the same time!");
5868 -(uint64_t)
Offset.getFixedValue(),
5870 if (
C->getType() != OpTy) {
5874 assert(
C &&
"Cast of ConstantInt should have folded");
5887void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
5888 const LSRFixup &LF,
const Formula &
F,
5889 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5890 DenseMap<BasicBlock *, Value *>
Inserted;
5894 bool needUpdateFixups =
false;
5905 Loop *PNLoop = LI.getLoopFor(Parent);
5906 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5912 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5913 .setMergeIdenticalEdges()
5914 .setKeepOneInputPHIs());
5917 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5928 if (
L->contains(BB) && !
L->contains(PN))
5936 needUpdateFixups =
true;
5941 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5954 LF.OperandValToReplace->
getType(),
"tmp",
5961 if (
L->contains(
I) && !
L->contains(BB))
5962 InsertedNonLCSSAInsts.insert(
I);
5965 Pair.first->second = FullV;
5972 if (needUpdateFixups) {
5973 for (LSRUse &LU :
Uses)
5974 for (LSRFixup &
Fixup : LU.Fixups)
5978 if (
Fixup.UserInst == PN) {
5981 bool foundInOriginalPHI =
false;
5983 if (val ==
Fixup.OperandValToReplace) {
5984 foundInOriginalPHI =
true;
5989 if (foundInOriginalPHI)
6000 if (val ==
Fixup.OperandValToReplace)
6001 Fixup.UserInst = NewPN;
6011void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6013 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6017 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6023 if (FullV->
getType() != OpTy) {
6035 if (LU.Kind == LSRUse::ICmpZero)
6051 const LSRFixup &
Fixup,
const LSRUse &LU,
6055 if (LU.Kind != LSRUse::Address)
6056 return IVIncInsertPos;
6060 Type *Ty =
I->getType();
6063 return IVIncInsertPos;
6070 return IVIncInsertPos;
6077void LSRInstance::ImplementSolution(
6078 const SmallVectorImpl<const Formula *> &Solution) {
6084 for (
const IVChain &Chain : IVChainVec) {
6090 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6091 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6094 Rewriter.setIVIncInsertPos(L, InsertPos);
6095 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6099 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6102 for (
const IVChain &Chain : IVChainVec) {
6103 GenerateIVChain(Chain, DeadInsts);
6107 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6125 for (PHINode &PN :
L->getHeader()->phis()) {
6126 BinaryOperator *BO =
nullptr;
6132 case Instruction::Sub:
6137 case Instruction::Add:
6154 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6163LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6164 DominatorTree &DT, LoopInfo &LI,
6165 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6166 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6167 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6170 :
TTI.getPreferredAddressingMode(
L, &SE)),
6173 if (!
L->isLoopSimplifyForm())
6181 unsigned NumUsers = 0;
6185 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6193 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6203 L->getHeader()->printAsOperand(
dbgs(),
false);
6209 HardwareLoopProfitable =
6210 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6214#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6217 Rewriter.disableCanonicalMode();
6218 Rewriter.enableLSRMode();
6222 OptimizeLoopTermCond();
6225 if (IU.empty())
return;
6228 if (!
L->isInnermost()) {
6241 CollectInterestingTypesAndFactors();
6242 CollectFixupsAndInitialFormulae();
6243 CollectLoopInvariantFixupsAndFormulae();
6249 print_uses(
dbgs()));
6251 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6255 GenerateAllReuseFormulae();
6257 FilterOutUndesirableDedicatedRegisters();
6258 NarrowSearchSpaceUsingHeuristics();
6268 if (Solution.
empty())
6273 for (
const LSRUse &LU :
Uses) {
6274 for (
const Formula &
F : LU.Formulae)
6276 F) &&
"Illegal formula generated!");
6281 ImplementSolution(Solution);
6284#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6285void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6286 if (Factors.empty() &&
Types.empty())
return;
6288 OS <<
"LSR has identified the following interesting factors and types: ";
6291 for (int64_t Factor : Factors) {
6292 if (!
First) OS <<
", ";
6294 OS <<
'*' << Factor;
6297 for (
Type *Ty : Types) {
6298 if (!
First) OS <<
", ";
6300 OS <<
'(' << *Ty <<
')';
6305void LSRInstance::print_fixups(raw_ostream &OS)
const {
6306 OS <<
"LSR is examining the following fixup sites:\n";
6307 for (
const LSRUse &LU :
Uses)
6308 for (
const LSRFixup &LF : LU.Fixups) {
6315void LSRInstance::print_uses(raw_ostream &OS)
const {
6316 OS <<
"LSR is examining the following uses:\n";
6317 for (
const LSRUse &LU :
Uses) {
6321 for (
const Formula &
F : LU.Formulae) {
6329void LSRInstance::print(raw_ostream &OS)
const {
6330 print_factors_and_types(OS);
6342class LoopStrengthReduce :
public LoopPass {
6346 LoopStrengthReduce();
6349 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6350 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6355LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6359void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6386ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6387 llvm::DIExpression::expr_op_iterator Begin =
6388 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6389 llvm::DIExpression::expr_op_iterator End =
6390 llvm::DIExpression::expr_op_iterator(Expr.
end());
6391 return {Begin, End};
6394struct SCEVDbgValueBuilder {
6395 SCEVDbgValueBuilder() =
default;
6396 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6398 void clone(
const SCEVDbgValueBuilder &
Base) {
6399 LocationOps =
Base.LocationOps;
6404 LocationOps.
clear();
6411 SmallVector<Value *, 2> LocationOps;
6414 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6421 unsigned ArgIndex = 0;
6422 if (It != LocationOps.
end()) {
6423 ArgIndex = std::distance(LocationOps.
begin(), It);
6425 ArgIndex = LocationOps.
size();
6431 void pushValue(
const SCEVUnknown *U) {
6436 bool pushConst(
const SCEVConstant *
C) {
6437 if (
C->getAPInt().getSignificantBits() > 64)
6439 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6440 Expr.
push_back(
C->getAPInt().getSExtValue());
6447 return ToDwarfOpIter(Expr);
6452 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6455 "Expected arithmetic SCEV type");
6457 unsigned EmitOperator = 0;
6458 for (
const auto &
Op : CommExpr->
operands()) {
6461 if (EmitOperator >= 1)
6462 pushOperator(DwarfOp);
6469 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6470 const llvm::SCEV *Inner =
C->getOperand(0);
6471 const llvm::Type *
Type =
C->getType();
6472 uint64_t ToWidth =
Type->getIntegerBitWidth();
6473 bool Success = pushSCEV(Inner);
6475 IsSigned ? llvm::dwarf::DW_ATE_signed
6476 : llvm::dwarf::DW_ATE_unsigned};
6477 for (
const auto &
Op : CastOps)
6483 bool pushSCEV(
const llvm::SCEV *S) {
6486 Success &= pushConst(StartInt);
6491 pushLocation(
U->getValue());
6494 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6497 Success &= pushSCEV(UDiv->getLHS());
6498 Success &= pushSCEV(UDiv->getRHS());
6499 pushOperator(llvm::dwarf::DW_OP_div);
6505 "Unexpected cast type in SCEV.");
6509 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6524 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6526 if (
C->getAPInt().getSignificantBits() > 64)
6528 int64_t
I =
C->getAPInt().getSExtValue();
6530 case llvm::dwarf::DW_OP_plus:
6531 case llvm::dwarf::DW_OP_minus:
6533 case llvm::dwarf::DW_OP_mul:
6534 case llvm::dwarf::DW_OP_div:
6547 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6553 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6554 if (!pushSCEV(Stride))
6556 pushOperator(llvm::dwarf::DW_OP_mul);
6558 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6559 if (!pushSCEV(Start))
6561 pushOperator(llvm::dwarf::DW_OP_plus);
6567 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6568 pushLocation(OffsetValue);
6571 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6572 << std::to_string(
Offset) <<
"\n");
6578 bool createIterCountExpr(
const SCEV *S,
6579 const SCEVDbgValueBuilder &IterationCount,
6580 ScalarEvolution &SE) {
6589 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6593 if (!Rec->isAffine())
6601 clone(IterationCount);
6602 if (!SCEVToValueExpr(*Rec, SE))
6613 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6614 ScalarEvolution &SE) {
6620 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6621 if (!pushSCEV(Start))
6623 pushOperator(llvm::dwarf::DW_OP_minus);
6625 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6626 if (!pushSCEV(Stride))
6628 pushOperator(llvm::dwarf::DW_OP_div);
6636 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6637 SmallVectorImpl<Value *> &DestLocations) {
6639 "Expected the locations vector to contain the IV");
6644 "Expected the location ops to contain the IV.");
6648 for (
const auto &
Op : LocationOps) {
6649 auto It =
find(DestLocations,
Op);
6650 if (It != DestLocations.
end()) {
6652 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6660 for (
const auto &
Op : expr_ops()) {
6662 Op.appendToVector(DestExpr);
6669 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6677struct DVIRecoveryRec {
6678 DVIRecoveryRec(DbgVariableRecord *DVR)
6679 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6681 DbgVariableRecord *DbgRef;
6683 bool HadLocationArgList;
6689 for (
auto &RE : RecoveryExprs)
6691 RecoveryExprs.clear();
6694 ~DVIRecoveryRec() { clear(); }
6702 auto expr_ops = ToDwarfOpIter(Expr);
6704 for (
auto Op : expr_ops)
6713template <
typename T>
6717 "contain any DW_OP_llvm_arg operands.");
6724template <
typename T>
6729 "Expected expression that references DIArglist locations using "
6730 "DW_OP_llvm_arg operands.");
6732 for (
Value *V : Locations)
6749 if (NumLLVMArgs == 0) {
6756 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6786 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6787 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6788 assert(DVIRec.Expr &&
"Expected an expression");
6793 if (!DVIRec.HadLocationArgList) {
6794 assert(DVIRec.LocationOps.size() == 1 &&
6795 "Unexpected number of location ops.");
6799 Value *CachedValue =
6804 for (
WeakVH VH : DVIRec.LocationOps) {
6812 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6817 const SCEV *SCEVInductionVar,
6818 SCEVDbgValueBuilder IterCountExpr) {
6832 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6834 NewLocationOps.
push_back(LSRInductionVar);
6836 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6837 WeakVH VH = DVIRec.LocationOps[i];
6843 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6845 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6853 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6854 <<
" refers to a location that is now undef or erased. "
6855 "Salvage abandoned.\n");
6859 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6860 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6862 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6863 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6867 if (std::optional<APInt>
Offset =
6869 if (
Offset->getSignificantBits() <= 64)
6870 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6873 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6882 assert(DVIRec.RecoveryExprs.size() == 1 &&
6883 "Expected only a single recovery expression for an empty "
6885 assert(DVIRec.RecoveryExprs[0] &&
6886 "Expected a SCEVDbgSalvageBuilder for location 0");
6887 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6888 B->appendToVectors(
NewExpr, NewLocationOps);
6890 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
6898 SCEVDbgValueBuilder *DbgBuilder =
6899 DVIRec.RecoveryExprs[LocationArgIndex].get();
6905 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6906 "Expected a positive index for the location-op position.");
6907 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6911 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6915 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6923 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6924 if (DVIToUpdate.empty())
6928 assert(SCEVInductionVar &&
6929 "Anticipated a SCEV for the post-LSR induction variable");
6933 if (!IVAddRec->isAffine())
6941 SCEVDbgValueBuilder IterCountExpr;
6942 IterCountExpr.pushLocation(LSRInductionVar);
6943 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6946 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6949 for (
auto &DVIRec : DVIToUpdate) {
6950 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6961 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6962 for (
const auto &
B : L->getBlocks()) {
6963 for (
auto &
I : *
B) {
6965 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6970 if (DbgVal.isKillLocation())
6975 const auto &HasTranslatableLocationOps =
6977 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6991 if (!HasTranslatableLocationOps(DbgVal))
6994 std::unique_ptr<DVIRecoveryRec> NewRec =
6995 std::make_unique<DVIRecoveryRec>(&DbgVal);
6999 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
7000 for (
const auto LocOp : DbgVal.location_ops()) {
7001 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7002 NewRec->LocationOps.push_back(LocOp);
7003 NewRec->HadLocationArgList = DbgVal.hasArgList();
7005 SalvageableDVISCEVs.push_back(std::move(NewRec));
7015 const LSRInstance &LSR) {
7017 auto IsSuitableIV = [&](
PHINode *
P) {
7028 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7035 if (IsSuitableIV(
P))
7039 for (
PHINode &
P : L.getHeader()->phis()) {
7040 if (IsSuitableIV(&
P))
7058 std::unique_ptr<MemorySSAUpdater> MSSAU;
7060 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7063 const LSRInstance &Reducer =
7064 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7065 Changed |= Reducer.getChanged();
7072#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7075 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7089 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7103 if (SalvageableDVIRecords.
empty())
7109 for (
const auto &L : LI) {
7113 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7114 "could not be identified.\n");
7118 for (
auto &Rec : SalvageableDVIRecords)
7120 SalvageableDVIRecords.
clear();
7124bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7128 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7129 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7130 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7131 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7132 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7133 *
L->getHeader()->getParent());
7134 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7135 *
L->getHeader()->getParent());
7136 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7137 *
L->getHeader()->getParent());
7138 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7141 MSSA = &MSSAAnalysis->getMSSA();
7158char LoopStrengthReduce::ID = 0;
7161 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.