132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
363 const std::pair<size_t, Immediate> &
RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
387 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
399 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
415RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
421 RSD.UsedByIndices.
set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
430 RSD.UsedByIndices.
reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
440 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
451 if (
I == RegUsesMap.end())
453 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
460const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
461 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
477 GlobalValue *BaseGV =
nullptr;
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
515 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 bool countsDownToZero()
const;
527 size_t getNumRegs()
const;
530 void deleteBaseReg(
const SCEV *&S);
532 bool referencesReg(
const SCEV *S)
const;
533 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
534 const RegUseTracker &RegUses)
const;
536 void print(raw_ostream &OS)
const;
554 for (
const SCEV *S :
Add->operands())
560 const SCEV *Start, *Step;
575 if (
Mul->getOperand(0)->isAllOnesValue()) {
584 for (
const SCEV *S : MyGood)
586 for (
const SCEV *S : MyBad)
598void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
605 BaseRegs.push_back(Sum);
611 BaseRegs.push_back(Sum);
626bool Formula::isCanonical(
const Loop &L)
const {
627 assert((Scale == 0 || ScaledReg) &&
628 "ScaledReg must be non-null if Scale is non-zero");
631 return BaseRegs.size() <= 1;
636 if (Scale == 1 && BaseRegs.empty())
645 return none_of(BaseRegs, [&L](
const SCEV *S) {
656void Formula::canonicalize(
const Loop &L) {
660 if (BaseRegs.empty()) {
662 assert(ScaledReg &&
"Expected 1*reg => reg");
663 assert(Scale == 1 &&
"Expected 1*reg => reg");
664 BaseRegs.push_back(ScaledReg);
672 ScaledReg = BaseRegs.pop_back_val();
680 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
683 if (
I != BaseRegs.end())
693bool Formula::unscale() {
697 BaseRegs.push_back(ScaledReg);
702bool Formula::hasZeroEnd()
const {
703 if (UnfoldedOffset || BaseOffset)
705 if (BaseRegs.size() != 1 || ScaledReg)
710bool Formula::countsDownToZero()
const {
713 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
714 const APInt *StepInt;
722size_t Formula::getNumRegs()
const {
723 return !!ScaledReg + BaseRegs.size();
728Type *Formula::getType()
const {
729 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
730 ScaledReg ? ScaledReg->
getType() :
736void Formula::deleteBaseReg(
const SCEV *&S) {
737 if (&S != &BaseRegs.back())
743bool Formula::referencesReg(
const SCEV *S)
const {
749bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
750 const RegUseTracker &RegUses)
const {
752 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
754 for (
const SCEV *BaseReg : BaseRegs)
755 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
761void Formula::print(raw_ostream &OS)
const {
762 ListSeparator
Plus(
" + ");
767 if (BaseOffset.isNonZero())
768 OS <<
Plus << BaseOffset;
770 for (
const SCEV *BaseReg : BaseRegs)
773 if (HasBaseReg && BaseRegs.empty())
774 OS <<
Plus <<
"**error: HasBaseReg**";
775 else if (!HasBaseReg && !BaseRegs.empty())
776 OS <<
Plus <<
"**error: !HasBaseReg**";
779 OS <<
Plus << Scale <<
"*reg(";
786 if (UnfoldedOffset.isNonZero())
787 OS <<
Plus <<
"imm(" << UnfoldedOffset <<
')';
827 bool IgnoreSignificantBits =
false) {
838 if (
RA.isAllOnes()) {
839 if (
LHS->getType()->isPointerTy())
852 const APInt &LA =
C->getAPInt();
861 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
863 IgnoreSignificantBits);
864 if (!Step)
return nullptr;
866 IgnoreSignificantBits);
867 if (!Start)
return nullptr;
880 for (
const SCEV *S :
Add->operands()) {
882 if (!
Op)
return nullptr;
910 for (
const SCEV *S :
Mul->operands()) {
913 IgnoreSignificantBits)) {
933 bool PreferScalable) {
936 Immediate Result = Immediate::getZero();
945 C->getSignificantBits() <= 64) {
947 Result = Immediate::getFixed(
C->getSExtValue());
959 Result = Immediate::getScalable(
C->getSExtValue());
965 if (Result.isNonZero()) {
976 bool PreferScalable =
false) {
982 if (Result.isNonZero())
988 if (Result.isNonZero())
1029 if (
SI->getPointerOperand() == OperandVal)
1034 switch (
II->getIntrinsicID()) {
1035 case Intrinsic::memset:
1036 case Intrinsic::prefetch:
1037 case Intrinsic::masked_load:
1038 if (
II->getArgOperand(0) == OperandVal)
1041 case Intrinsic::masked_store:
1042 if (
II->getArgOperand(1) == OperandVal)
1045 case Intrinsic::memmove:
1046 case Intrinsic::memcpy:
1047 if (
II->getArgOperand(0) == OperandVal ||
1048 II->getArgOperand(1) == OperandVal)
1053 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1054 if (IntrInfo.
PtrVal == OperandVal)
1060 if (RMW->getPointerOperand() == OperandVal)
1063 if (CmpX->getPointerOperand() == OperandVal)
1072 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1076 AccessTy.MemTy = Ty;
1080 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1082 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1084 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1086 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1088 switch (
II->getIntrinsicID()) {
1089 case Intrinsic::prefetch:
1090 case Intrinsic::memset:
1091 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1092 AccessTy.MemTy = OperandVal->
getType();
1094 case Intrinsic::memmove:
1095 case Intrinsic::memcpy:
1097 AccessTy.MemTy = OperandVal->
getType();
1099 case Intrinsic::masked_load:
1100 AccessTy.AddrSpace =
1101 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1103 case Intrinsic::masked_store:
1104 AccessTy.AddrSpace =
1105 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1109 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1165 if (!Processed.
insert(S).second)
1169 for (
const SCEV *S :
Add->operands()) {
1176 const SCEV *Op0, *Op1;
1185 Value *UVal = U->getValue();
1189 if (UI && UI->
getOpcode() == Instruction::Mul &&
1222 const LSRUse &LU,
const Formula &
F);
1226 const LSRUse &LU,
const Formula &
F,
1233 const Loop *
L =
nullptr;
1234 ScalarEvolution *SE =
nullptr;
1235 const TargetTransformInfo *
TTI =
nullptr;
1236 TargetTransformInfo::LSRCost
C;
1241 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1243 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1261 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1262 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1263 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1264 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1270 return C.NumRegs == ~0
u;
1273 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1274 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1275 bool HardwareLoopProfitable,
1276 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1278 void print(raw_ostream &OS)
const;
1282 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1283 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1284 bool HardwareLoopProfitable);
1285 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1286 SmallPtrSetImpl<const SCEV *> &Regs,
1287 const LSRUse &LU,
bool HardwareLoopProfitable,
1288 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1299 Value *OperandValToReplace =
nullptr;
1309 Immediate
Offset = Immediate::getZero();
1311 LSRFixup() =
default;
1313 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1315 void print(raw_ostream &OS)
const;
1325 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1338 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1341 MemAccessTy AccessTy;
1347 Immediate MinOffset = Immediate::getFixedMax();
1348 Immediate MaxOffset = Immediate::getFixedMin();
1352 bool AllFixupsOutsideLoop =
true;
1357 bool AllFixupsUnconditional =
true;
1364 bool RigidFormula =
false;
1372 SmallPtrSet<const SCEV *, 4> Regs;
1374 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1376 LSRFixup &getNewFixup() {
1377 Fixups.push_back(LSRFixup());
1381 void pushFixup(LSRFixup &f) {
1383 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1384 MaxOffset =
f.Offset;
1385 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1386 MinOffset =
f.Offset;
1389 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1390 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1391 bool InsertFormula(
const Formula &
F,
const Loop &L);
1392 void DeleteFormula(Formula &
F);
1393 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1395 void print(raw_ostream &OS)
const;
1402 LSRUse::KindType Kind, MemAccessTy AccessTy,
1403 GlobalValue *BaseGV, Immediate BaseOffset,
1404 bool HasBaseReg, int64_t Scale,
1405 Instruction *
Fixup =
nullptr);
1412 if (
TTI.getIntImmCost(
C->getAPInt(),
C->getType(),
1426 [&](
unsigned i,
const SCEV *
Reg) {
1427 return i + getSetupCost(Reg, Depth - 1, TTI);
1436void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1437 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1438 bool HardwareLoopProfitable) {
1443 if (AR->getLoop() != L) {
1450 if (!AR->getLoop()->contains(L)) {
1460 unsigned LoopCost = 1;
1469 F.BaseOffset.isFixed() &&
1470 *Step ==
F.BaseOffset.getFixedValue();
1475 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1482 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1483 HardwareLoopProfitable)
1485 C.AddRecCost += LoopCost;
1490 if (!Regs.
count(AR->getOperand(1))) {
1491 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1503 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1512void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1513 SmallPtrSetImpl<const SCEV *> &Regs,
1514 const LSRUse &LU,
bool HardwareLoopProfitable,
1515 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1516 if (LoserRegs && LoserRegs->
count(
Reg)) {
1521 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1522 if (LoserRegs && isLoser())
1527void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1528 const DenseSet<const SCEV *> &VisitedRegs,
1529 const LSRUse &LU,
bool HardwareLoopProfitable,
1530 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1533 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1535 unsigned PrevAddRecCost =
C.AddRecCost;
1536 unsigned PrevNumRegs =
C.NumRegs;
1537 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1538 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1539 if (VisitedRegs.
count(ScaledReg)) {
1543 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1548 for (
const SCEV *BaseReg :
F.BaseRegs) {
1549 if (VisitedRegs.
count(BaseReg)) {
1553 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1560 size_t NumBaseParts =
F.getNumRegs();
1561 if (NumBaseParts > 1)
1566 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1572 for (
const LSRFixup &
Fixup : LU.Fixups) {
1573 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1574 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1578 else if (
Offset.isNonZero())
1580 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1584 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1605 if (
C.NumRegs > TTIRegNum) {
1608 if (PrevNumRegs > TTIRegNum)
1609 C.Insns += (
C.NumRegs - PrevNumRegs);
1611 C.Insns += (
C.NumRegs - TTIRegNum);
1624 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1628 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1631 if (LU.Kind != LSRUse::ICmpZero)
1632 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1638 C.Insns = std::numeric_limits<unsigned>::max();
1639 C.NumRegs = std::numeric_limits<unsigned>::max();
1640 C.AddRecCost = std::numeric_limits<unsigned>::max();
1641 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1642 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1643 C.ImmCost = std::numeric_limits<unsigned>::max();
1644 C.SetupCost = std::numeric_limits<unsigned>::max();
1645 C.ScaleCost = std::numeric_limits<unsigned>::max();
1649bool Cost::isLess(
const Cost &
Other)
const {
1651 C.Insns !=
Other.C.Insns)
1652 return C.Insns <
Other.C.Insns;
1656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1659 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1660 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1661 if (
C.AddRecCost != 0)
1662 OS <<
", with addrec cost " <<
C.AddRecCost;
1663 if (
C.NumIVMuls != 0)
1664 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1665 << (
C.NumIVMuls == 1 ?
"" :
"s");
1666 if (
C.NumBaseAdds != 0)
1667 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1668 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1669 if (
C.ScaleCost != 0)
1670 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1672 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1673 if (
C.SetupCost != 0)
1674 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1683bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1686 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1687 if (PN->getIncomingValue(i) == OperandValToReplace &&
1688 L->contains(PN->getIncomingBlock(i)))
1693 return !
L->contains(UserInst);
1696#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1697void LSRFixup::print(raw_ostream &OS)
const {
1702 Store->getOperand(0)->printAsOperand(OS,
false);
1708 OS <<
", OperandValToReplace=";
1711 for (
const Loop *PIL : PostIncLoops) {
1712 OS <<
", PostIncLoop=";
1713 PIL->getHeader()->printAsOperand(OS,
false);
1717 OS <<
", Offset=" <<
Offset;
1727bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1729 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1736float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1738 for (
const Formula &
F : Formulae)
1739 if (
F.referencesReg(
Reg))
1741 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1746bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1747 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1749 if (!Formulae.empty() && RigidFormula)
1753 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1761 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1762 "Zero allocated in a scaled register!");
1764 for (
const SCEV *BaseReg :
F.BaseRegs)
1765 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1769 Formulae.push_back(
F);
1780void LSRUse::DeleteFormula(Formula &
F) {
1781 if (&
F != &Formulae.back())
1783 Formulae.pop_back();
1787void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1789 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1791 for (
const Formula &
F : Formulae) {
1792 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1797 for (
const SCEV *S : OldRegs)
1799 RegUses.dropRegister(S, LUIdx);
1802#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1803void LSRUse::print(raw_ostream &OS)
const {
1804 OS <<
"LSR Use: Kind=";
1806 case Basic: OS <<
"Basic";
break;
1807 case Special: OS <<
"Special";
break;
1808 case ICmpZero: OS <<
"ICmpZero";
break;
1810 OS <<
"Address of ";
1814 OS << *AccessTy.MemTy;
1817 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1820 OS <<
", Offsets={";
1821 bool NeedComma =
false;
1822 for (
const LSRFixup &
Fixup : Fixups) {
1823 if (NeedComma) OS <<
',';
1829 if (AllFixupsOutsideLoop)
1830 OS <<
", all-fixups-outside-loop";
1832 if (AllFixupsUnconditional)
1833 OS <<
", all-fixups-unconditional";
1842 LSRUse::KindType Kind, MemAccessTy AccessTy,
1844 bool HasBaseReg, int64_t Scale,
1847 case LSRUse::Address: {
1848 int64_t FixedOffset =
1849 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1850 int64_t ScalableOffset =
1851 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1852 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1853 HasBaseReg, Scale, AccessTy.AddrSpace,
1854 Fixup, ScalableOffset);
1856 case LSRUse::ICmpZero:
1863 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1868 if (Scale != 0 && Scale != -1)
1873 if (BaseOffset.isNonZero()) {
1876 if (BaseOffset.isScalable())
1886 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1887 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1895 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1897 case LSRUse::Special:
1899 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1906 Immediate MinOffset, Immediate MaxOffset,
1907 LSRUse::KindType Kind, MemAccessTy AccessTy,
1909 bool HasBaseReg, int64_t Scale) {
1910 if (BaseOffset.isNonZero() &&
1911 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1912 BaseOffset.isScalable() != MaxOffset.isScalable()))
1915 int64_t
Base = BaseOffset.getKnownMinValue();
1916 int64_t Min = MinOffset.getKnownMinValue();
1917 int64_t Max = MaxOffset.getKnownMinValue();
1920 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1923 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1926 HasBaseReg, Scale) &&
1932 Immediate MinOffset, Immediate MaxOffset,
1933 LSRUse::KindType Kind, MemAccessTy AccessTy,
1934 const Formula &
F,
const Loop &L) {
1942 assert((
F.isCanonical(L) ||
F.Scale != 0));
1944 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1949 Immediate MaxOffset, LSRUse::KindType Kind,
1951 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1954 BaseOffset, HasBaseReg, Scale) ||
1959 BaseGV, BaseOffset,
true, 0));
1963 Immediate MaxOffset, LSRUse::KindType Kind,
1964 MemAccessTy AccessTy,
const Formula &
F) {
1965 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1966 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1972 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1974 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1978 const LSRUse &LU,
const Formula &
F) {
1980 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1981 for (
const LSRFixup &
Fixup : LU.Fixups)
1983 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1984 F.Scale,
Fixup.UserInst))
1990 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1995 const LSRUse &LU,
const Formula &
F,
2004 return F.Scale != 1;
2007 case LSRUse::Address: {
2009 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
2010 if (
F.BaseOffset.isScalable()) {
2011 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
2012 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
2014 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
2015 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
2019 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
2022 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
2025 "Legal addressing mode has an illegal cost!");
2026 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
2028 case LSRUse::ICmpZero:
2030 case LSRUse::Special:
2040 LSRUse::KindType Kind, MemAccessTy AccessTy,
2044 if (BaseOffset.isZero() && !BaseGV)
2049 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2053 if (!HasBaseReg && Scale == 1) {
2063 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2073 Immediate MaxOffset, LSRUse::KindType Kind,
2074 MemAccessTy AccessTy,
const SCEV *S,
2077 if (S->
isZero())
return true;
2090 if (BaseOffset.isZero() && !BaseGV)
2093 if (BaseOffset.isScalable())
2098 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2101 BaseOffset, HasBaseReg, Scale);
2118 const SCEV *IncExpr;
2120 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2121 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2128 const SCEV *ExprBase =
nullptr;
2130 IVChain() =
default;
2131 IVChain(
const IVInc &Head,
const SCEV *
Base)
2132 : Incs(1, Head), ExprBase(
Base) {}
2137 const_iterator
begin()
const {
2139 return std::next(Incs.
begin());
2141 const_iterator
end()
const {
2146 bool hasIncs()
const {
return Incs.
size() >= 2; }
2155 bool isProfitableIncrement(
const SCEV *OperExpr,
2156 const SCEV *IncExpr,
2164 SmallPtrSet<Instruction*, 4> FarUsers;
2165 SmallPtrSet<Instruction*, 4> NearUsers;
2171 ScalarEvolution &SE;
2174 AssumptionCache &AC;
2175 TargetLibraryInfo &TLI;
2176 const TargetTransformInfo &
TTI;
2178 MemorySSAUpdater *MSSAU;
2182 bool HardwareLoopProfitable =
false;
2183 bool ShouldPreserveLCSSA =
false;
2197 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2204 SmallSetVector<Type *, 4> Types;
2210 RegUseTracker RegUses;
2215 static const unsigned MaxChains = 8;
2221 SmallPtrSet<Use*, MaxChains> IVIncSet;
2224 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2230 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2232 void OptimizeShadowIV();
2233 bool FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse);
2235 void OptimizeLoopTermCond();
2237 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2238 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2239 void FinalizeChain(IVChain &Chain);
2240 void CollectChains();
2241 void GenerateIVChain(
const IVChain &Chain,
2242 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2244 void CollectInterestingTypesAndFactors();
2245 void CollectFixupsAndInitialFormulae();
2248 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2251 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2252 LSRUse::KindType Kind, MemAccessTy AccessTy);
2254 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2255 MemAccessTy AccessTy);
2257 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2259 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2261 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2262 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2263 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2264 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2265 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2267 void CollectLoopInvariantFixupsAndFormulae();
2269 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2270 unsigned Depth = 0);
2272 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2274 size_t Idx,
bool IsScaledReg =
false);
2275 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2276 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2277 const Formula &
Base,
size_t Idx,
2278 bool IsScaledReg =
false);
2279 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2280 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2281 const Formula &
Base,
2282 const SmallVectorImpl<Immediate> &Worklist,
2283 size_t Idx,
bool IsScaledReg =
false);
2284 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2285 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2286 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2287 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2288 void GenerateCrossUseConstantOffsets();
2289 void GenerateAllReuseFormulae();
2291 void FilterOutUndesirableDedicatedRegisters();
2293 size_t EstimateSearchSpaceComplexity()
const;
2294 void NarrowSearchSpaceByDetectingSupersets();
2295 void NarrowSearchSpaceByCollapsingUnrolledCode();
2296 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2297 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2298 void NarrowSearchSpaceByFilterPostInc();
2299 void NarrowSearchSpaceByMergingUsesOutsideLoop();
2300 void NarrowSearchSpaceByDeletingCostlyFormulas();
2301 void NarrowSearchSpaceByPickingWinnerRegs();
2302 void NarrowSearchSpaceUsingHeuristics();
2304 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2306 SmallVectorImpl<const Formula *> &Workspace,
2307 const Cost &CurCost,
2308 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2309 DenseSet<const SCEV *> &VisitedRegs)
const;
2310 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2314 const SmallVectorImpl<Instruction *> &Inputs)
const;
2317 const LSRUse &LU)
const;
2319 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2321 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2322 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2324 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2325 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2326 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2327 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2334 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2335 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2336 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU,
2337 bool PreserveLCSSA);
2339 bool getChanged()
const {
return Changed; }
2340 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2341 return ScalarEvolutionIVs;
2344 void print_factors_and_types(raw_ostream &OS)
const;
2345 void print_fixups(raw_ostream &OS)
const;
2346 void print_uses(raw_ostream &OS)
const;
2347 void print(raw_ostream &OS)
const;
2355void LSRInstance::OptimizeShadowIV() {
2365 Type *DestTy =
nullptr;
2366 bool IsSigned =
false;
2382 DestTy = UCast->getDestTy();
2386 DestTy = SCast->getDestTy();
2388 if (!DestTy)
continue;
2408 if (Mantissa == -1)
continue;
2412 unsigned Entry, Latch;
2422 if (!Init)
continue;
2423 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2427 BinaryOperator *Incr =
2429 if (!Incr)
continue;
2430 if (Incr->
getOpcode() != Instruction::Add
2431 && Incr->
getOpcode() != Instruction::Sub)
2435 ConstantInt *
C =
nullptr;
2447 if (!
C->getValue().isStrictlyPositive())
2455 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2457 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2458 : Instruction::FSub,
2475bool LSRInstance::FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse) {
2476 for (IVStrideUse &U : IU)
2477 if (
U.getUser() ==
Cond) {
2535Instruction *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2550 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2551 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2557 const SCEVNAryExpr *
Max =
nullptr;
2559 Pred = ICmpInst::ICMP_SLE;
2562 Pred = ICmpInst::ICMP_SLT;
2565 Pred = ICmpInst::ICMP_ULT;
2574 if (
Max->getNumOperands() != 2)
2577 const SCEV *MaxLHS =
Max->getOperand(0);
2578 const SCEV *MaxRHS =
Max->getOperand(1);
2583 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2594 "Loop condition operand is an addrec in a different loop!");
2598 Value *NewRHS =
nullptr;
2599 if (ICmpInst::isTrueWhenEqual(Pred)) {
2603 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2604 NewRHS = BO->getOperand(0);
2607 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2608 NewRHS = BO->getOperand(0);
2616 NewRHS = SU->getValue();
2628 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2629 Cond->getOperand(0), NewRHS,
"scmp");
2633 Cond->replaceAllUsesWith(NewCond);
2636 Cond->eraseFromParent();
2638 if (
Cmp->use_empty()) {
2640 Cmp->eraseFromParent();
2647LSRInstance::OptimizeLoopTermCond() {
2648 SmallPtrSet<Instruction *, 4> PostIncs;
2663 SmallVector<BasicBlock*, 8> ExitingBlocks;
2664 L->getExitingBlocks(ExitingBlocks);
2672 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2694 IVStrideUse *CondUse =
nullptr;
2695 if (!FindIVUserForCond(
Cond, CondUse))
2705 Cond = OptimizeMax(Cmp, CondUse);
2710 if (!DT.
dominates(ExitingBlock, LatchBlock))
2715 if (LatchBlock != ExitingBlock)
2716 for (
const IVStrideUse &UI : IU)
2719 if (&UI != CondUse &&
2723 const SCEV *
A = IU.getStride(*CondUse, L);
2724 const SCEV *
B = IU.getStride(UI, L);
2725 if (!
A || !
B)
continue;
2734 if (
const SCEVConstant *
D =
2736 const ConstantInt *
C =
D->getValue();
2738 if (
C->isOne() ||
C->isMinusOne())
2739 goto decline_post_inc;
2741 if (
C->getValue().getSignificantBits() >= 64 ||
2742 C->getValue().isMinSignedValue())
2743 goto decline_post_inc;
2746 MemAccessTy AccessTy =
2748 int64_t Scale =
C->getSExtValue();
2752 AccessTy.AddrSpace))
2753 goto decline_post_inc;
2758 AccessTy.AddrSpace))
2759 goto decline_post_inc;
2764 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2772 if (
Cond->hasOneUse()) {
2773 Cond->moveBefore(TermBr->getIterator());
2778 Cond->setName(
L->getHeader()->getName() +
".termcond");
2779 Cond->insertInto(ExitingBlock, TermBr->getIterator());
2783 TermBr->replaceUsesOfWith(OldCond,
Cond);
2800 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2801 for (Instruction *Inst : PostIncs)
2807bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2808 bool HasBaseReg, LSRUse::KindType Kind,
2809 MemAccessTy AccessTy) {
2810 Immediate NewMinOffset = LU.MinOffset;
2811 Immediate NewMaxOffset = LU.MaxOffset;
2812 MemAccessTy NewAccessTy = AccessTy;
2817 if (LU.Kind != Kind)
2823 if (Kind == LSRUse::Address) {
2824 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2825 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2826 AccessTy.AddrSpace);
2831 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2833 LU.MaxOffset - NewOffset, HasBaseReg))
2835 NewMinOffset = NewOffset;
2836 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2838 NewOffset - LU.MinOffset, HasBaseReg))
2840 NewMaxOffset = NewOffset;
2846 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2847 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2851 LU.MinOffset = NewMinOffset;
2852 LU.MaxOffset = NewMaxOffset;
2853 LU.AccessTy = NewAccessTy;
2860std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2861 LSRUse::KindType Kind,
2862 MemAccessTy AccessTy) {
2863 const SCEV *
Copy = Expr;
2866 ExprUse, SE, AccessTy.MemTy && AccessTy.MemTy->
isScalableTy());
2873 Offset = Immediate::getFixed(0);
2876 std::pair<UseMapTy::iterator, bool>
P =
2877 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2880 size_t LUIdx =
P.first->second;
2881 LSRUse &LU =
Uses[LUIdx];
2882 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2884 return std::make_pair(LUIdx,
Offset);
2888 size_t LUIdx =
Uses.size();
2889 P.first->second = LUIdx;
2890 Uses.push_back(LSRUse(Kind, AccessTy));
2891 LSRUse &LU =
Uses[LUIdx];
2895 return std::make_pair(LUIdx,
Offset);
2899void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2900 if (&LU != &
Uses.back())
2905 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2911LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2912 const LSRUse &OrigLU) {
2914 for (LSRUse &LU :
Uses) {
2920 if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero &&
2921 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2922 LU.HasFormulaWithSameRegs(OrigF)) {
2924 for (
const Formula &
F : LU.Formulae) {
2927 if (
F.BaseRegs == OrigF.BaseRegs &&
2928 F.ScaledReg == OrigF.ScaledReg &&
2929 F.BaseGV == OrigF.BaseGV &&
2930 F.Scale == OrigF.Scale &&
2931 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2932 if (
F.BaseOffset.isZero())
2947void LSRInstance::CollectInterestingTypesAndFactors() {
2948 SmallSetVector<const SCEV *, 4> Strides;
2952 for (
const IVStrideUse &U : IU) {
2953 const SCEV *Expr = IU.getExpr(U);
2971 }
while (!Worklist.
empty());
2975 for (SmallSetVector<const SCEV *, 4>::const_iterator
2977 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2978 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2979 const SCEV *OldStride = *
I;
2980 const SCEV *NewStride = *NewStrideIter;
2990 if (
const SCEVConstant *Factor =
2993 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2994 Factors.insert(Factor->getAPInt().getSExtValue());
2995 }
else if (
const SCEVConstant *Factor =
2999 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
3000 Factors.insert(Factor->getAPInt().getSExtValue());
3006 if (Types.size() == 1)
3018 for(; OI != OE; ++OI) {
3037 return Trunc->getOperand(0);
3070 if (SubExpr->getSCEVType() ==
scAddExpr)
3073 if (SubExpr->getSCEVType() !=
scMulExpr)
3089bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3090 const SCEV *IncExpr,
3091 ScalarEvolution &SE) {
3104 SmallPtrSet<const SCEV*, 8> Processed;
3125 if (!Chain.hasIncs())
3128 if (!
Users.empty()) {
3129 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3131 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3134 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3143 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3146 const SCEV *LastIncExpr =
nullptr;
3147 unsigned NumConstIncrements = 0;
3148 unsigned NumVarIncrements = 0;
3149 unsigned NumReusedIncrements = 0;
3151 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3154 for (
const IVInc &Inc : Chain) {
3155 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3157 if (Inc.IncExpr->isZero())
3163 ++NumConstIncrements;
3167 if (Inc.IncExpr == LastIncExpr)
3168 ++NumReusedIncrements;
3172 LastIncExpr = Inc.IncExpr;
3177 if (NumConstIncrements > 1)
3184 cost += NumVarIncrements;
3188 cost -= NumReusedIncrements;
3190 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3197void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3198 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3202 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3203 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3207 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3208 const SCEV *LastIncExpr =
nullptr;
3209 for (; ChainIdx < NChains; ++ChainIdx) {
3210 IVChain &Chain = IVChainVec[ChainIdx];
3228 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3229 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3233 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3234 LastIncExpr = IncExpr;
3240 if (ChainIdx == NChains) {
3247 LastIncExpr = OperExpr;
3254 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3256 ChainUsersVec.
resize(NChains);
3257 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3258 <<
") IV=" << *LastIncExpr <<
"\n");
3260 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3261 <<
") IV+" << *LastIncExpr <<
"\n");
3263 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3265 IVChain &Chain = IVChainVec[ChainIdx];
3267 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3269 if (!LastIncExpr->
isZero()) {
3270 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3279 for (User *U : IVOper->
users()) {
3285 IVChain::const_iterator IncIter = Chain.Incs.begin();
3286 IVChain::const_iterator IncEnd = Chain.Incs.end();
3287 for( ; IncIter != IncEnd; ++IncIter) {
3288 if (IncIter->UserInst == OtherUse)
3291 if (IncIter != IncEnd)
3296 && IU.isIVUserOrOperand(OtherUse)) {
3299 NearUsers.
insert(OtherUse);
3304 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3329void LSRInstance::CollectChains() {
3333 SmallVector<BasicBlock *,8> LatchPath;
3336 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3342 for (BasicBlock *BB :
reverse(LatchPath)) {
3343 for (Instruction &
I : *BB) {
3349 if (IU.isEphemeral(&
I))
3359 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3360 ChainIdx < NChains; ++ChainIdx) {
3361 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3364 SmallPtrSet<Instruction*, 4> UniqueOperands;
3367 while (IVOpIter != IVOpEnd) {
3369 if (UniqueOperands.
insert(IVOpInst).second)
3370 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3371 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3376 for (PHINode &PN :
L->getHeader()->phis()) {
3383 ChainInstruction(&PN, IncV, ChainUsersVec);
3386 unsigned ChainIdx = 0;
3387 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3388 UsersIdx < NChains; ++UsersIdx) {
3390 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3393 if (ChainIdx != UsersIdx)
3394 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3395 FinalizeChain(IVChainVec[ChainIdx]);
3398 IVChainVec.resize(ChainIdx);
3401void LSRInstance::FinalizeChain(IVChain &Chain) {
3402 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3403 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3405 for (
const IVInc &Inc : Chain) {
3407 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3408 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3409 IVIncSet.insert(UseI);
3417 Immediate IncOffset = Immediate::getZero();
3426 C->getSignificantBits() > 64)
3428 IncOffset = Immediate::getScalable(
C->getSExtValue());
3444void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3445 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3448 const IVInc &Head = Chain.Incs[0];
3453 Value *IVSrc =
nullptr;
3454 while (IVOpIter != IVOpEnd) {
3465 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3466 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3469 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3471 if (IVOpIter == IVOpEnd) {
3473 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3476 assert(IVSrc &&
"Failed to find IV chain source");
3481 const SCEV *LeftOverExpr =
nullptr;
3482 const SCEV *Accum = SE.
getZero(IntTy);
3486 for (
const IVInc &Inc : Chain) {
3489 InsertPt =
L->getLoopLatch()->getTerminator();
3493 Value *IVOper = IVSrc;
3494 if (!Inc.IncExpr->isZero()) {
3499 LeftOverExpr = LeftOverExpr ?
3500 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3504 bool FoundBase =
false;
3505 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3506 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3508 if (!Remainder->
isZero()) {
3510 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3511 const SCEV *IVOperExpr =
3513 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3522 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3525 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3528 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3532 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3535 LeftOverExpr =
nullptr;
3539 if (IVTy != OperTy) {
3541 "cannot extend a chained IV");
3543 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3545 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3552 for (PHINode &Phi :
L->getHeader()->phis()) {
3556 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3559 Value *IVOper = IVSrc;
3561 if (IVTy != PostIncTy) {
3563 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3564 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3565 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3567 Phi.replaceUsesOfWith(PostIncV, IVOper);
3573void LSRInstance::CollectFixupsAndInitialFormulae() {
3574 CondBrInst *ExitBranch =
nullptr;
3575 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3578 SmallPtrSet<const SCEV *, 16> Regs;
3579 DenseSet<const SCEV *> VisitedRegs;
3580 DenseSet<size_t> VisitedLSRUse;
3582 for (
const IVStrideUse &U : IU) {
3587 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3588 if (IVIncSet.count(UseI)) {
3589 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3593 LSRUse::KindType
Kind = LSRUse::Basic;
3594 MemAccessTy AccessTy;
3596 Kind = LSRUse::Address;
3600 const SCEV *S = IU.getExpr(U);
3616 if (CI->isEquality()) {
3619 Value *
NV = CI->getOperand(1);
3620 if (NV ==
U.getOperandValToReplace()) {
3621 CI->setOperand(1, CI->getOperand(0));
3622 CI->setOperand(0, NV);
3623 NV = CI->getOperand(1);
3630 (!
NV->getType()->isPointerTy() ||
3637 Kind = LSRUse::ICmpZero;
3639 }
else if (
L->isLoopInvariant(NV) &&
3642 !
NV->getType()->isPointerTy()) {
3653 Kind = LSRUse::ICmpZero;
3660 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3661 if (Factors[i] != -1)
3662 Factors.insert(-(uint64_t)Factors[i]);
3668 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3669 size_t LUIdx =
P.first;
3671 LSRUse &LU =
Uses[LUIdx];
3674 LSRFixup &LF = LU.getNewFixup();
3675 LF.UserInst = UserInst;
3676 LF.OperandValToReplace =
U.getOperandValToReplace();
3677 LF.PostIncLoops = TmpPostIncLoops;
3679 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3680 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3683 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3685 F.initialMatch(S, L, SE);
3686 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3687 HardwareLoopProfitable);
3688 VisitedLSRUse.
insert(LUIdx);
3692 if (LU.Formulae.empty()) {
3693 InsertInitialFormula(S, LU, LUIdx);
3694 CountRegisters(LU.Formulae.back(), LUIdx);
3703void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3707 LU.RigidFormula =
true;
3710 F.initialMatch(S, L, SE);
3711 bool Inserted = InsertFormula(LU, LUIdx,
F);
3712 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3718LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3719 LSRUse &LU,
size_t LUIdx) {
3721 F.BaseRegs.push_back(S);
3722 F.HasBaseReg =
true;
3723 bool Inserted = InsertFormula(LU, LUIdx,
F);
3724 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3728void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3730 RegUses.countRegister(
F.ScaledReg, LUIdx);
3731 for (
const SCEV *BaseReg :
F.BaseRegs)
3732 RegUses.countRegister(BaseReg, LUIdx);
3737bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3740 "Formula is illegal");
3742 if (!LU.InsertFormula(
F, *L))
3745 CountRegisters(
F, LUIdx);
3751bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3763LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3765 SmallPtrSet<const SCEV *, 32> Visited;
3772 while (!Worklist.
empty()) {
3776 if (!Visited.
insert(S).second)
3787 const Value *
V = US->getValue();
3790 if (
L->contains(Inst))
continue;
3794 for (
const Use &U :
V->uses()) {
3804 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3826 bool HasIncompatibleEHPTerminatedBlock =
false;
3828 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3829 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3830 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3831 HasIncompatibleEHPTerminatedBlock =
true;
3836 if (HasIncompatibleEHPTerminatedBlock) {
3859 unsigned OtherIdx = !
U.getOperandNo();
3860 Value *OtherOp = ICI->getOperand(OtherIdx);
3870 std::pair<size_t, Immediate>
P =
3871 getUse(S, LSRUse::Basic, MemAccessTy());
3872 size_t LUIdx =
P.first;
3874 LSRUse &LU =
Uses[LUIdx];
3875 LSRFixup &LF = LU.getNewFixup();
3876 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3877 LF.OperandValToReplace =
U;
3879 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3880 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3881 InsertSupplementalFormula(US, LU, LUIdx);
3882 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3898 unsigned Depth = 0) {
3905 for (
const SCEV *S :
Add->operands()) {
3912 const SCEV *Start, *Step;
3917 if (Start->isZero())
3926 Remainder =
nullptr;
3928 if (Remainder != Start) {
3950 LSRUse &LU,
const SCEV *S,
const Loop *L,
3952 if (LU.Kind != LSRUse::Address ||
3953 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3959 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3968void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3969 const Formula &
Base,
3970 unsigned Depth,
size_t Idx,
3972 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3980 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3984 if (AddOps.
size() == 1)
3998 LU.AccessTy, *J,
Base.getNumRegs() > 1))
4003 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
4007 if (InnerAddOps.size() == 1 &&
4009 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
4012 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
4017 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
4026 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4029 F.ScaledReg =
nullptr;
4032 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
4033 }
else if (IsScaledReg)
4034 F.ScaledReg = InnerSum;
4036 F.BaseRegs[Idx] = InnerSum;
4044 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4047 F.BaseRegs.push_back(*J);
4052 if (InsertFormula(LU, LUIdx,
F))
4059 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4065void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4067 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4072 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4073 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4075 if (
Base.Scale == 1)
4076 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4082void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4085 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4086 (
Base.UnfoldedOffset.isNonZero()) <=
4094 Formula NewBase =
Base;
4095 NewBase.BaseRegs.clear();
4096 Type *CombinedIntegerType =
nullptr;
4097 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4100 if (!CombinedIntegerType)
4102 Ops.push_back(BaseReg);
4105 NewBase.BaseRegs.push_back(BaseReg);
4109 if (
Ops.size() == 0)
4114 auto GenerateFormula = [&](
const SCEV *Sum) {
4115 Formula
F = NewBase;
4123 F.BaseRegs.push_back(Sum);
4125 (void)InsertFormula(LU, LUIdx,
F);
4129 if (
Ops.size() > 1) {
4136 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4137 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4139 NewBase.UnfoldedOffset.getFixedValue(),
true));
4140 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4146void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4147 const Formula &
Base,
size_t Idx,
4151 if (
G->isZero() || !GV)
4155 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4160 F.BaseRegs[Idx] =
G;
4161 (void)InsertFormula(LU, LUIdx,
F);
4165void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4168 if (
Base.BaseGV)
return;
4170 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4171 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4172 if (
Base.Scale == 1)
4173 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4178void LSRInstance::GenerateConstantOffsetsImpl(
4179 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4180 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4182 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4184 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4186 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4188 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4190 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4196 F.ScaledReg =
nullptr;
4198 F.deleteBaseReg(
F.BaseRegs[Idx]);
4200 }
else if (IsScaledReg)
4203 F.BaseRegs[Idx] = NewG;
4205 (void)InsertFormula(LU, LUIdx,
F);
4220 const APInt *StepInt;
4225 for (Immediate
Offset : Worklist) {
4227 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4233 for (Immediate
Offset : Worklist)
4240 if (
G->isZero() ||
Imm.isZero() ||
4241 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4244 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4245 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4250 F.BaseRegs[Idx] =
G;
4255 (void)InsertFormula(LU, LUIdx,
F);
4259void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4265 if (LU.MaxOffset != LU.MinOffset)
4268 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4269 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4270 if (
Base.Scale == 1)
4271 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4277void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4279 if (LU.Kind != LSRUse::ICmpZero)
return;
4287 if (LU.MinOffset != LU.MaxOffset)
return;
4290 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4292 for (
const SCEV *BaseReg :
Base.BaseRegs)
4293 if (
BaseReg->getType()->isPointerTy())
4295 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4298 for (int64_t Factor : Factors) {
4303 if (
Base.BaseOffset.isMin() && Factor == -1)
4306 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4308 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4309 assert(Factor != 0 &&
"Zero factor not expected!");
4310 if (NewBaseOffset.getFixedValue() / Factor !=
4311 Base.BaseOffset.getFixedValue())
4319 Immediate
Offset = LU.MinOffset;
4320 if (
Offset.isMin() && Factor == -1)
4323 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4331 F.BaseOffset = NewBaseOffset;
4338 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4340 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4343 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4357 if (
F.UnfoldedOffset.isNonZero()) {
4358 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4360 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4361 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4362 Base.UnfoldedOffset.getFixedValue())
4366 IntTy,
F.UnfoldedOffset.getFixedValue()))
4371 (void)InsertFormula(LU, LUIdx,
F);
4378void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4385 if (
Base.Scale != 0 && !
Base.unscale())
4388 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4391 for (int64_t Factor : Factors) {
4392 Base.Scale = Factor;
4393 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4395 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4399 if (LU.Kind == LSRUse::Basic &&
4400 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4401 LU.AccessTy,
Base) &&
4402 LU.AllFixupsOutsideLoop)
4403 LU.Kind = LSRUse::Special;
4409 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4410 Base.BaseOffset.isZero() && !
Base.BaseGV)
4413 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4415 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4416 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4421 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4422 if (!Quotient->isZero()) {
4425 F.ScaledReg = Quotient;
4426 F.deleteBaseReg(
F.BaseRegs[i]);
4430 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4431 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4435 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4437 (void)InsertFormula(LU, LUIdx,
F);
4453 const SCEV *Result =
nullptr;
4454 for (
auto &L :
Loops) {
4458 if (!New || (Result && New != Result))
4463 assert(Result &&
"failed to create expression");
4468void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4470 if (
Base.BaseGV)
return;
4480 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4483 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4487 for (
auto &LF : LU.Fixups)
4488 Loops.push_back(LF.PostIncLoops);
4490 for (
Type *SrcTy : Types) {
4499 const SCEV *NewScaledReg =
4501 if (!NewScaledReg || NewScaledReg->
isZero())
4503 F.ScaledReg = NewScaledReg;
4505 bool HasZeroBaseReg =
false;
4506 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4507 const SCEV *NewBaseReg =
4509 if (!NewBaseReg || NewBaseReg->
isZero()) {
4510 HasZeroBaseReg =
true;
4520 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4524 (void)InsertFormula(LU, LUIdx,
F);
4537 const SCEV *OrigReg;
4539 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4540 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4542 void print(raw_ostream &OS)
const;
4548#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4549void WorkItem::print(raw_ostream &OS)
const {
4550 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4551 <<
" , add offset " <<
Imm;
4561void LSRInstance::GenerateCrossUseConstantOffsets() {
4563 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4565 DenseMap<const SCEV *, ImmMapTy>
Map;
4566 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4568 for (
const SCEV *Use : RegUses) {
4572 auto Pair =
Map.try_emplace(
Reg);
4575 Pair.first->second.insert(std::make_pair(Imm, Use));
4576 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4583 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4585 for (
const SCEV *
Reg : Sequence) {
4586 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4589 if (Imms.size() == 1)
4593 for (
const auto &Entry
4595 <<
' ' <<
Entry.first;
4599 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4601 const SCEV *OrigReg = J->second;
4603 Immediate JImm = J->first;
4604 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4607 UsedByIndicesMap[
Reg].
count() == 1) {
4615 Immediate
First = Imms.begin()->first;
4616 Immediate
Last = std::prev(Imms.end())->first;
4617 if (!
First.isCompatibleImmediate(
Last)) {
4624 bool Scalable =
First.isScalable() ||
Last.isScalable();
4625 int64_t FI =
First.getKnownMinValue();
4626 int64_t LI =
Last.getKnownMinValue();
4629 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4632 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4633 ImmMapTy::const_iterator OtherImms[] = {
4634 Imms.begin(), std::prev(Imms.end()),
4635 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4636 for (
const auto &M : OtherImms) {
4637 if (M == J || M == JE)
continue;
4638 if (!JImm.isCompatibleImmediate(
M->first))
4642 Immediate
Imm = JImm.subUnsigned(
M->first);
4643 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4645 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4646 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4653 UsedByIndicesMap.
clear();
4654 UniqueItems.
clear();
4657 for (
const WorkItem &WI : WorkItems) {
4658 size_t LUIdx = WI.LUIdx;
4659 LSRUse &LU =
Uses[LUIdx];
4660 Immediate
Imm = WI.Imm;
4661 const SCEV *OrigReg = WI.OrigReg;
4664 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4668 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4669 Formula
F = LU.Formulae[
L];
4676 if (
F.ScaledReg == OrigReg) {
4677 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4679 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4681 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4682 if (
F.referencesReg(S))
4685 NewF.BaseOffset =
Offset;
4686 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4689 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4698 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4700 if (
C->getValue()->isNegative() !=
4701 (NewF.BaseOffset.isLessThanZero()) &&
4702 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4703 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4708 NewF.canonicalize(*this->L);
4709 (void)InsertFormula(LU, LUIdx, NewF);
4712 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4714 if (BaseReg != OrigReg)
4717 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4718 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4719 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4721 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4723 LU.Kind, LU.AccessTy, NewF)) {
4727 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4731 NewF.UnfoldedOffset = NewUnfoldedOffset;
4733 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4738 for (
const SCEV *NewReg : NewF.BaseRegs)
4740 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4742 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4744 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4745 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4748 NewF.BaseOffset.getFixedValue()))
4753 NewF.canonicalize(*this->L);
4754 (void)InsertFormula(LU, LUIdx, NewF);
4765LSRInstance::GenerateAllReuseFormulae() {
4768 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4769 LSRUse &LU =
Uses[LUIdx];
4770 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4771 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4772 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4773 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4775 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4776 LSRUse &LU =
Uses[LUIdx];
4777 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4778 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4779 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4780 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4781 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4782 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4783 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4784 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4786 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4787 LSRUse &LU =
Uses[LUIdx];
4788 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4789 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4792 GenerateCrossUseConstantOffsets();
4795 "After generating reuse formulae:\n";
4796 print_uses(
dbgs()));
4801void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4802 DenseSet<const SCEV *> VisitedRegs;
4803 SmallPtrSet<const SCEV *, 16> Regs;
4804 SmallPtrSet<const SCEV *, 16> LoserRegs;
4806 bool ChangedFormulae =
false;
4811 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4813 BestFormulaeTy BestFormulae;
4815 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4816 LSRUse &LU =
Uses[LUIdx];
4821 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4822 FIdx != NumForms; ++FIdx) {
4823 Formula &
F = LU.Formulae[FIdx];
4834 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4836 if (CostF.isLoser()) {
4848 for (
const SCEV *
Reg :
F.BaseRegs) {
4849 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4853 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4854 Key.push_back(
F.ScaledReg);
4859 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4860 BestFormulae.insert(std::make_pair(
Key, FIdx));
4864 Formula &Best = LU.Formulae[
P.first->second];
4866 Cost CostBest(L, SE,
TTI, AMK);
4868 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4869 HardwareLoopProfitable);
4870 if (CostF.isLess(CostBest))
4874 " in favor of formula ";
4875 Best.print(
dbgs());
dbgs() <<
'\n');
4878 ChangedFormulae =
true;
4880 LU.DeleteFormula(
F);
4888 LU.RecomputeRegs(LUIdx, RegUses);
4891 BestFormulae.clear();
4896 "After filtering out undesirable candidates:\n";
4904size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4906 for (
const LSRUse &LU :
Uses) {
4907 size_t FSize = LU.Formulae.size();
4922void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4926 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4927 "which use a superset of registers used by other "
4930 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4931 LSRUse &LU =
Uses[LUIdx];
4933 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4934 Formula &
F = LU.Formulae[i];
4935 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4941 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4947 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4948 (uint64_t)
C->getValue()->getSExtValue());
4949 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4950 (
I -
F.BaseRegs.begin()));
4951 if (LU.HasFormulaWithSameRegs(NewF)) {
4954 LU.DeleteFormula(
F);
4965 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4966 (
I -
F.BaseRegs.begin()));
4967 if (LU.HasFormulaWithSameRegs(NewF)) {
4970 LU.DeleteFormula(
F);
4981 LU.RecomputeRegs(LUIdx, RegUses);
4990void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4995 dbgs() <<
"The search space is too complex.\n"
4996 "Narrowing the search space by assuming that uses separated "
4997 "by a constant offset will use the same registers.\n");
5001 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5002 LSRUse &LU =
Uses[LUIdx];
5003 for (
const Formula &
F : LU.Formulae) {
5004 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
5006 assert((LU.Kind == LSRUse::Address || LU.Kind == LSRUse::ICmpZero) &&
5007 "Only address and cmp uses expected to have nonzero BaseOffset");
5009 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
5013 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
5014 LU.Kind, LU.AccessTy))
5019 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
5020 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
5023 for (LSRFixup &
Fixup : LU.Fixups) {
5024 Fixup.Offset +=
F.BaseOffset;
5025 LUThatHas->pushFixup(
Fixup);
5030 Type *FixupType = LUThatHas->Fixups[0].OperandValToReplace->getType();
5031 for (LSRFixup &
Fixup : LUThatHas->Fixups)
5032 assert(
Fixup.OperandValToReplace->getType() == FixupType &&
5033 "Expected all fixups to have the same type");
5038 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
5039 Formula &
F = LUThatHas->Formulae[i];
5040 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
5041 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
5043 LUThatHas->DeleteFormula(
F);
5051 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
5054 DeleteUse(LU, LUIdx);
5067void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5071 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5072 "undesirable dedicated registers.\n");
5074 FilterOutUndesirableDedicatedRegisters();
5089void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5094 dbgs() <<
"The search space is too complex.\n"
5095 "Narrowing the search space by choosing the best Formula "
5096 "from the Formulae with the same Scale and ScaledReg.\n");
5099 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5101 BestFormulaeTy BestFormulae;
5103 bool ChangedFormulae =
false;
5105 DenseSet<const SCEV *> VisitedRegs;
5106 SmallPtrSet<const SCEV *, 16> Regs;
5108 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5109 LSRUse &LU =
Uses[LUIdx];
5114 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5119 size_t FARegNum = 0;
5120 for (
const SCEV *
Reg : FA.BaseRegs) {
5121 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5122 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5124 size_t FBRegNum = 0;
5125 for (
const SCEV *
Reg : FB.BaseRegs) {
5126 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5127 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5129 if (FARegNum != FBRegNum)
5130 return FARegNum < FBRegNum;
5137 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5139 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5140 return CostFA.isLess(CostFB);
5144 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5146 Formula &
F = LU.Formulae[FIdx];
5149 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5153 Formula &Best = LU.Formulae[
P.first->second];
5154 if (IsBetterThan(
F, Best))
5158 " in favor of formula ";
5159 Best.print(
dbgs());
dbgs() <<
'\n');
5161 ChangedFormulae =
true;
5163 LU.DeleteFormula(
F);
5169 LU.RecomputeRegs(LUIdx, RegUses);
5172 BestFormulae.clear();
5177 "After filtering out undesirable candidates:\n";
5184void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5191 "Narrowing the search space by choosing the lowest "
5192 "register Formula for PostInc Uses.\n");
5194 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5195 LSRUse &LU =
Uses[LUIdx];
5197 if (LU.Kind != LSRUse::Address)
5203 size_t MinRegs = std::numeric_limits<size_t>::max();
5204 for (
const Formula &
F : LU.Formulae)
5205 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5208 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5210 Formula &
F = LU.Formulae[FIdx];
5211 if (
F.getNumRegs() > MinRegs) {
5214 LU.DeleteFormula(
F);
5221 LU.RecomputeRegs(LUIdx, RegUses);
5230void LSRInstance::NarrowSearchSpaceByMergingUsesOutsideLoop() {
5235 dbgs() <<
"The search space is too complex.\n"
5236 "Narrowing the search space by merging uses with fixups "
5237 "entirely outside the loop with uses inside the loop.\n");
5239 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5240 LSRUse &LU =
Uses[LUIdx];
5243 if (!LU.AllFixupsOutsideLoop || LU.Formulae.empty() ||
5244 LU.Kind == LSRUse::ICmpZero)
5251 LSRUse *LUToMergeWith =
nullptr;
5252 const Formula &ThisF = LU.Formulae[0];
5253 for (LSRUse &OtherLU :
Uses) {
5255 if (OtherLU.AllFixupsOutsideLoop)
5259 if (OtherLU.Kind == LSRUse::ICmpZero)
5262 if (OtherLU.Formulae.empty())
5265 if (
any_of(OtherLU.Formulae, [&](
const Formula &
F) {
5266 return !isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, OtherLU.Kind,
5267 OtherLU.AccessTy, F);
5274 const Formula &OtherF = OtherLU.Formulae[0];
5275 if (ThisF.BaseRegs == OtherF.BaseRegs &&
5276 ThisF.ScaledReg == OtherF.ScaledReg &&
5277 ThisF.BaseGV == OtherF.BaseGV && ThisF.Scale == OtherF.Scale &&
5278 ThisF.UnfoldedOffset == OtherF.UnfoldedOffset &&
5279 ThisF.BaseOffset == OtherF.BaseOffset) {
5280 LUToMergeWith = &OtherLU;
5291 for (LSRFixup &
Fixup : LU.Fixups) {
5292 LUToMergeWith->pushFixup(
Fixup);
5296 DeleteUse(LU, LUIdx);
5346void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5355 SmallPtrSet<const SCEV *, 4> UniqRegs;
5359 DenseMap <const SCEV *, float> RegNumMap;
5360 for (
const SCEV *
Reg : RegUses) {
5364 for (
const LSRUse &LU :
Uses) {
5365 if (!LU.Regs.count(
Reg))
5367 float P = LU.getNotSelectedProbability(
Reg);
5373 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5377 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5380 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5381 LSRUse &LU =
Uses[LUIdx];
5383 if (LU.Formulae.size() < 2)
5388 float FMinRegNum = LU.Formulae[0].getNumRegs();
5389 float FMinARegNum = LU.Formulae[0].getNumRegs();
5391 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5392 Formula &
F = LU.Formulae[i];
5395 for (
const SCEV *BaseReg :
F.BaseRegs) {
5396 if (UniqRegs.
count(BaseReg))
5398 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5401 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5403 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5404 if (!UniqRegs.
count(ScaledReg)) {
5406 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5409 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5412 if (FMinRegNum > FRegNum ||
5413 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5414 FMinRegNum = FRegNum;
5415 FMinARegNum = FARegNum;
5420 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5422 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5423 while (LU.Formulae.size() != 1) {
5426 LU.Formulae.pop_back();
5428 LU.RecomputeRegs(LUIdx, RegUses);
5429 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5430 Formula &
F = LU.Formulae[0];
5446 MemAccessTy AccessType) {
5456 return TTI.isLegalAddressingMode(
5457 AccessType.MemTy,
nullptr,
5458 Diff->getSExtValue(),
5459 true, 0, AccessType.AddrSpace) &&
5460 !
TTI.isLegalAddressingMode(
5461 AccessType.MemTy,
nullptr,
5462 -Diff->getSExtValue(),
5463 true, 0, AccessType.AddrSpace);
5469void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5472 SmallPtrSet<const SCEV *, 4> Taken;
5480 const SCEV *Best =
nullptr;
5481 unsigned BestNum = 0;
5482 for (
const SCEV *
Reg : RegUses) {
5487 BestNum = RegUses.getUsedByIndices(
Reg).count();
5489 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5490 if (
Count > BestNum) {
5498 if (
Count == BestNum) {
5499 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5500 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5502 Uses[LUIdx].AccessTy)) {
5509 assert(Best &&
"Failed to find best LSRUse candidate");
5511 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5512 <<
" will yield profitable reuse.\n");
5517 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5518 LSRUse &LU =
Uses[LUIdx];
5519 if (!LU.Regs.count(Best))
continue;
5522 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5523 Formula &
F = LU.Formulae[i];
5524 if (!
F.referencesReg(Best)) {
5526 LU.DeleteFormula(
F);
5530 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5536 LU.RecomputeRegs(LUIdx, RegUses);
5547void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5548 NarrowSearchSpaceByDetectingSupersets();
5549 NarrowSearchSpaceByCollapsingUnrolledCode();
5550 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5552 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5553 NarrowSearchSpaceByFilterPostInc();
5554 NarrowSearchSpaceByMergingUsesOutsideLoop();
5556 NarrowSearchSpaceByDeletingCostlyFormulas();
5558 NarrowSearchSpaceByPickingWinnerRegs();
5562void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5564 SmallVectorImpl<const Formula *> &Workspace,
5565 const Cost &CurCost,
5566 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5567 DenseSet<const SCEV *> &VisitedRegs)
const {
5578 const LSRUse &LU =
Uses[Workspace.
size()];
5584 SmallSetVector<const SCEV *, 4> ReqRegs;
5585 for (
const SCEV *S : CurRegs)
5586 if (LU.Regs.count(S))
5589 SmallPtrSet<const SCEV *, 16> NewRegs;
5590 Cost NewCost(L, SE,
TTI, AMK);
5591 for (
const Formula &
F : LU.Formulae) {
5599 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5600 for (
const SCEV *
Reg : ReqRegs) {
5601 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5604 if (NumReqRegsToFind == 0)
5608 if (NumReqRegsToFind != 0) {
5619 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5620 if (NewCost.isLess(SolutionCost)) {
5622 if (Workspace.
size() !=
Uses.size()) {
5623 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5624 NewRegs, VisitedRegs);
5625 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5626 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5629 dbgs() <<
".\nRegs:\n";
5630 for (
const SCEV *S : NewRegs)
dbgs()
5631 <<
"- " << *S <<
"\n";
5634 SolutionCost = NewCost;
5635 Solution = Workspace;
5644void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5646 Cost SolutionCost(L, SE,
TTI, AMK);
5647 SolutionCost.Lose();
5648 Cost CurCost(L, SE,
TTI, AMK);
5649 SmallPtrSet<const SCEV *, 16> CurRegs;
5650 DenseSet<const SCEV *> VisitedRegs;
5654 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5655 CurRegs, VisitedRegs);
5656 if (Solution.
empty()) {
5663 "The chosen solution requires ";
5664 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5665 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5670 Solution[i]->print(
dbgs());
5676 const bool EnableDropUnprofitableSolution = [&] {
5678 case cl::boolOrDefault::BOU_TRUE:
5680 case cl::boolOrDefault::BOU_FALSE:
5682 case cl::boolOrDefault::BOU_UNSET:
5688 if (BaselineCost.isLess(SolutionCost)) {
5689 if (!EnableDropUnprofitableSolution)
5691 dbgs() <<
"Baseline is more profitable than chosen solution, "
5692 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5695 "solution, dropping LSR solution.\n";);
5706 const SmallVectorImpl<Instruction *> &Inputs)
5710 bool AllDominate =
true;
5717 for (Instruction *Inst : Inputs) {
5718 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5719 AllDominate =
false;
5724 if (Tentative->
getParent() == Inst->getParent() &&
5725 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5735 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5736 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5740 if (!Rung)
return IP;
5741 Rung = Rung->getIDom();
5742 if (!Rung)
return IP;
5743 IDom = Rung->getBlock();
5746 const Loop *IDomLoop = LI.getLoopFor(IDom);
5747 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5748 if (IDomDepth <= IPLoopDepth &&
5749 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5766 SmallVector<Instruction *, 4> Inputs;
5769 if (LU.Kind == LSRUse::ICmpZero)
5770 if (Instruction *
I =
5773 if (LF.PostIncLoops.
count(L)) {
5774 if (LF.isUseFullyOutsideLoop(L))
5775 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5781 for (
const Loop *PIL : LF.PostIncLoops) {
5782 if (PIL == L)
continue;
5787 if (!ExitingBlocks.
empty()) {
5789 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5796 "Insertion point must be a normal instruction");
5806 while (IP->isEHPad()) ++IP;
5811 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5819Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5821 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5822 if (LU.RigidFormula)
5823 return LF.OperandValToReplace;
5827 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5832 Rewriter.setPostInc(LF.PostIncLoops);
5837 Type *Ty =
F.getType();
5850 if (LU.Kind == LSRUse::ICmpZero && OpTy->
isPointerTy()) {
5859 for (
const SCEV *
Reg :
F.BaseRegs) {
5860 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5868 Value *ICmpScaledV =
nullptr;
5870 const SCEV *ScaledS =
F.ScaledReg;
5876 if (LU.Kind == LSRUse::ICmpZero) {
5886 "The only scale supported by ICmpZero uses is -1!");
5887 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5895 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5905 Ops.push_back(ScaledS);
5931 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5932 "Expanding mismatched offsets\n");
5934 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5935 if (
Offset.isNonZero()) {
5936 if (LU.Kind == LSRUse::ICmpZero) {
5943 IntTy, -(uint64_t)
Offset.getFixedValue(),
true);
5952 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5957 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5958 if (UnfoldedOffset.isNonZero()) {
5960 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5964 const SCEV *FullS =
Ops.empty() ?
5975 if (LU.Kind == LSRUse::ICmpZero) {
5979 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5980 "a scale at the same time!");
5981 if (
F.Scale == -1) {
5982 if (ICmpScaledV->
getType() != OpTy) {
5992 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5993 "ICmp does not support folding a global value and "
5994 "a scale at the same time!");
5998 -(uint64_t)
Offset.getFixedValue(),
6000 if (
C->getType() != OpTy) {
6004 assert(
C &&
"Cast of ConstantInt should have folded");
6017void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
6018 const LSRFixup &LF,
const Formula &
F,
6019 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6020 DenseMap<BasicBlock *, Value *>
Inserted;
6024 bool needUpdateFixups =
false;
6035 Loop *PNLoop = LI.getLoopFor(Parent);
6036 if (!PNLoop || Parent != PNLoop->
getHeader()) {
6040 CriticalEdgeSplittingOptions SplitOptions(&DT, &LI, MSSAU);
6042 SplitOptions.setMergeIdenticalEdges().setKeepOneInputPHIs();
6043 if (ShouldPreserveLCSSA)
6044 SplitOptions = SplitOptions.setPreserveLCSSA();
6048 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
6059 if (
L->contains(BB) && !
L->contains(PN))
6067 needUpdateFixups =
true;
6072 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
6085 LF.OperandValToReplace->
getType(),
"tmp",
6092 if (
L->contains(
I) && !
L->contains(BB))
6093 InsertedNonLCSSAInsts.insert(
I);
6096 Pair.first->second = FullV;
6103 if (needUpdateFixups) {
6104 for (LSRUse &LU :
Uses)
6105 for (LSRFixup &
Fixup : LU.Fixups)
6109 if (
Fixup.UserInst == PN) {
6112 bool foundInOriginalPHI =
false;
6114 if (val ==
Fixup.OperandValToReplace) {
6115 foundInOriginalPHI =
true;
6120 if (foundInOriginalPHI)
6131 if (val ==
Fixup.OperandValToReplace)
6132 Fixup.UserInst = NewPN;
6142void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6144 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6148 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6156 if (FullV->
getType() != OpTy &&
6157 !(LU.Kind == LSRUse::ICmpZero && OpTy->
isPointerTy())) {
6169 if (LU.Kind == LSRUse::ICmpZero)
6185 const LSRFixup &
Fixup,
const LSRUse &LU,
6189 if (LU.Kind != LSRUse::Address)
6190 return IVIncInsertPos;
6194 Type *Ty =
I->getType();
6197 return IVIncInsertPos;
6204 return IVIncInsertPos;
6211void LSRInstance::ImplementSolution(
6212 const SmallVectorImpl<const Formula *> &Solution) {
6218 for (
const IVChain &Chain : IVChainVec) {
6224 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6225 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6228 Rewriter.setIVIncInsertPos(L, InsertPos);
6229 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6233 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6236 for (
const IVChain &Chain : IVChainVec) {
6237 GenerateIVChain(Chain, DeadInsts);
6241 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6259 for (PHINode &PN :
L->getHeader()->phis()) {
6260 BinaryOperator *BO =
nullptr;
6266 case Instruction::Sub:
6271 case Instruction::Add:
6288 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6297LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6298 DominatorTree &DT, LoopInfo &LI,
6299 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6300 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU,
6302 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6305 :
TTI.getPreferredAddressingMode(
L, &SE)),
6306 Rewriter(SE,
"lsr", PreserveLCSSA), ShouldPreserveLCSSA(PreserveLCSSA),
6307 BaselineCost(
L, SE,
TTI, AMK) {
6309 if (!
L->isLoopSimplifyForm())
6317 unsigned NumUsers = 0;
6321 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6329 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6339 L->getHeader()->printAsOperand(
dbgs(),
false);
6345 HardwareLoopProfitable =
6346 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6350#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6353 Rewriter.disableCanonicalMode();
6354 Rewriter.enableLSRMode();
6358 OptimizeLoopTermCond();
6361 if (IU.empty())
return;
6364 if (!
L->isInnermost()) {
6377 CollectInterestingTypesAndFactors();
6378 CollectFixupsAndInitialFormulae();
6379 CollectLoopInvariantFixupsAndFormulae();
6385 print_uses(
dbgs()));
6387 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6391 GenerateAllReuseFormulae();
6393 FilterOutUndesirableDedicatedRegisters();
6394 NarrowSearchSpaceUsingHeuristics();
6404 if (Solution.
empty())
6409 for (
const LSRUse &LU :
Uses) {
6410 for (
const Formula &
F : LU.Formulae)
6412 F) &&
"Illegal formula generated!");
6417 ImplementSolution(Solution);
6420#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6421void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6422 if (Factors.empty() &&
Types.empty())
return;
6424 OS <<
"LSR has identified the following interesting factors and types: ";
6427 for (int64_t Factor : Factors)
6428 OS <<
LS <<
'*' << Factor;
6430 for (
Type *Ty : Types)
6431 OS <<
LS <<
'(' << *Ty <<
')';
6435void LSRInstance::print_fixups(raw_ostream &OS)
const {
6436 OS <<
"LSR is examining the following fixup sites:\n";
6437 for (
const LSRUse &LU :
Uses)
6438 for (
const LSRFixup &LF : LU.Fixups) {
6445void LSRInstance::print_uses(raw_ostream &OS)
const {
6446 OS <<
"LSR is examining the following uses:\n";
6447 for (
const LSRUse &LU :
Uses) {
6451 for (
const Formula &
F : LU.Formulae) {
6459void LSRInstance::print(raw_ostream &OS)
const {
6460 print_factors_and_types(OS);
6472class LoopStrengthReduce :
public LoopPass {
6476 LoopStrengthReduce();
6479 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6480 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6485LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6489void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6516ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6517 llvm::DIExpression::expr_op_iterator Begin =
6518 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6519 llvm::DIExpression::expr_op_iterator End =
6520 llvm::DIExpression::expr_op_iterator(Expr.
end());
6521 return {Begin, End};
6524struct SCEVDbgValueBuilder {
6525 SCEVDbgValueBuilder() =
default;
6526 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6528 void clone(
const SCEVDbgValueBuilder &
Base) {
6529 LocationOps =
Base.LocationOps;
6534 LocationOps.
clear();
6541 SmallVector<Value *, 2> LocationOps;
6544 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6551 unsigned ArgIndex = 0;
6552 if (It != LocationOps.
end()) {
6553 ArgIndex = std::distance(LocationOps.
begin(), It);
6555 ArgIndex = LocationOps.
size();
6561 void pushValue(
const SCEVUnknown *U) {
6566 bool pushConst(
const SCEVConstant *
C) {
6567 if (
C->getAPInt().getSignificantBits() > 64)
6569 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6570 Expr.
push_back(
C->getAPInt().getSExtValue());
6577 return ToDwarfOpIter(Expr);
6582 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6585 "Expected arithmetic SCEV type");
6587 unsigned EmitOperator = 0;
6588 for (
const auto &
Op : CommExpr->
operands()) {
6591 if (EmitOperator >= 1)
6592 pushOperator(DwarfOp);
6599 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6600 const llvm::SCEV *Inner =
C->getOperand(0);
6601 const llvm::Type *
Type =
C->getType();
6602 uint64_t ToWidth =
Type->getIntegerBitWidth();
6603 bool Success = pushSCEV(Inner);
6605 IsSigned ? llvm::dwarf::DW_ATE_signed
6606 : llvm::dwarf::DW_ATE_unsigned};
6607 for (
const auto &
Op : CastOps)
6613 bool pushSCEV(
const llvm::SCEV *S) {
6616 Success &= pushConst(StartInt);
6621 pushLocation(
U->getValue());
6624 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6627 Success &= pushSCEV(UDiv->getLHS());
6628 Success &= pushSCEV(UDiv->getRHS());
6629 pushOperator(llvm::dwarf::DW_OP_div);
6636 "Unexpected cast type in SCEV.");
6640 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6655 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6657 if (
C->getAPInt().getSignificantBits() > 64)
6659 int64_t
I =
C->getAPInt().getSExtValue();
6661 case llvm::dwarf::DW_OP_plus:
6662 case llvm::dwarf::DW_OP_minus:
6664 case llvm::dwarf::DW_OP_mul:
6665 case llvm::dwarf::DW_OP_div:
6678 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6684 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6685 if (!pushSCEV(Stride))
6687 pushOperator(llvm::dwarf::DW_OP_mul);
6689 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6690 if (!pushSCEV(Start))
6692 pushOperator(llvm::dwarf::DW_OP_plus);
6698 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6699 pushLocation(OffsetValue);
6702 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6703 << std::to_string(
Offset) <<
"\n");
6709 bool createIterCountExpr(
const SCEV *S,
6710 const SCEVDbgValueBuilder &IterationCount,
6711 ScalarEvolution &SE) {
6720 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6724 if (!Rec->isAffine())
6732 clone(IterationCount);
6733 if (!SCEVToValueExpr(*Rec, SE))
6744 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6745 ScalarEvolution &SE) {
6751 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6752 if (!pushSCEV(Start))
6754 pushOperator(llvm::dwarf::DW_OP_minus);
6756 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6757 if (!pushSCEV(Stride))
6759 pushOperator(llvm::dwarf::DW_OP_div);
6767 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6768 SmallVectorImpl<Value *> &DestLocations) {
6770 "Expected the locations vector to contain the IV");
6775 "Expected the location ops to contain the IV.");
6779 for (
const auto &
Op : LocationOps) {
6780 auto It =
find(DestLocations,
Op);
6781 if (It != DestLocations.
end()) {
6783 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6791 for (
const auto &
Op : expr_ops()) {
6793 Op.appendToVector(DestExpr);
6800 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6808struct DVIRecoveryRec {
6809 DVIRecoveryRec(DbgVariableRecord *DVR)
6810 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6812 DbgVariableRecord *DbgRef;
6814 bool HadLocationArgList;
6820 for (
auto &RE : RecoveryExprs)
6822 RecoveryExprs.clear();
6825 ~DVIRecoveryRec() { clear(); }
6833 auto expr_ops = ToDwarfOpIter(Expr);
6835 for (
auto Op : expr_ops)
6844template <
typename T>
6848 "contain any DW_OP_llvm_arg operands.");
6855template <
typename T>
6860 "Expected expression that references DIArglist locations using "
6861 "DW_OP_llvm_arg operands.");
6863 for (
Value *V : Locations)
6880 if (NumLLVMArgs == 0) {
6887 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6917 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6918 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6919 assert(DVIRec.Expr &&
"Expected an expression");
6924 if (!DVIRec.HadLocationArgList) {
6925 assert(DVIRec.LocationOps.size() == 1 &&
6926 "Unexpected number of location ops.");
6930 Value *CachedValue =
6935 for (
WeakVH VH : DVIRec.LocationOps) {
6943 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6948 const SCEV *SCEVInductionVar,
6949 SCEVDbgValueBuilder IterCountExpr) {
6963 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6965 NewLocationOps.
push_back(LSRInductionVar);
6967 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6968 WeakVH VH = DVIRec.LocationOps[i];
6974 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6976 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6984 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6985 <<
" refers to a location that is now undef or erased. "
6986 "Salvage abandoned.\n");
6990 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6991 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6993 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6994 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6998 if (std::optional<APInt>
Offset =
7000 if (
Offset->getSignificantBits() <= 64)
7001 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
7004 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
7013 assert(DVIRec.RecoveryExprs.size() == 1 &&
7014 "Expected only a single recovery expression for an empty "
7016 assert(DVIRec.RecoveryExprs[0] &&
7017 "Expected a SCEVDbgSalvageBuilder for location 0");
7018 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
7019 B->appendToVectors(
NewExpr, NewLocationOps);
7021 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
7029 SCEVDbgValueBuilder *DbgBuilder =
7030 DVIRec.RecoveryExprs[LocationArgIndex].get();
7036 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
7037 "Expected a positive index for the location-op position.");
7038 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
7042 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
7046 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
7054 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
7055 if (DVIToUpdate.empty())
7059 assert(SCEVInductionVar &&
7060 "Anticipated a SCEV for the post-LSR induction variable");
7064 if (!IVAddRec->isAffine())
7072 SCEVDbgValueBuilder IterCountExpr;
7073 IterCountExpr.pushLocation(LSRInductionVar);
7074 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
7077 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
7080 for (
auto &DVIRec : DVIToUpdate) {
7081 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
7092 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
7093 for (
const auto &
B : L->getBlocks()) {
7094 for (
auto &
I : *
B) {
7096 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
7101 if (DbgVal.isKillLocation())
7106 const auto &HasTranslatableLocationOps =
7108 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
7122 if (!HasTranslatableLocationOps(DbgVal))
7125 std::unique_ptr<DVIRecoveryRec> NewRec =
7126 std::make_unique<DVIRecoveryRec>(&DbgVal);
7130 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
7131 for (
const auto LocOp : DbgVal.location_ops()) {
7132 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7133 NewRec->LocationOps.push_back(LocOp);
7134 NewRec->HadLocationArgList = DbgVal.hasArgList();
7136 SalvageableDVISCEVs.push_back(std::move(NewRec));
7146 const LSRInstance &LSR) {
7148 auto IsSuitableIV = [&](
PHINode *
P) {
7159 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7166 if (IsSuitableIV(
P))
7170 for (
PHINode &
P : L.getHeader()->phis()) {
7171 if (IsSuitableIV(&
P))
7189 std::unique_ptr<MemorySSAUpdater> MSSAU;
7191 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7194 const LSRInstance &Reducer =
7195 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get(), PreserveLCSSA);
7196 Changed |= Reducer.getChanged();
7203#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7206 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7220 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7234 if (SalvageableDVIRecords.
empty())
7240 for (
const auto &L : LI) {
7244 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7245 "could not be identified.\n");
7249 for (
auto &Rec : SalvageableDVIRecords)
7251 SalvageableDVIRecords.
clear();
7255bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7259 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7260 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7261 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7262 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7263 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7264 *
L->getHeader()->getParent());
7265 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7266 *
L->getHeader()->getParent());
7267 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7268 *
L->getHeader()->getParent());
7269 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7272 MSSA = &MSSAAnalysis->getMSSA();
7291char LoopStrengthReduce::ID = 0;
7294 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< SCEVUse > &Good, SmallVectorImpl< SCEVUse > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA, bool PreserveLCSSA)
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static Immediate ExtractImmediate(SCEVUse &S, ScalarEvolution &SE, bool PreferScalable=false)
If S involves the addition of a constant integer value, return that integer value,...
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static Immediate ExtractImmediateOperand(MutableArrayRef< SCEVUse > Ops, ScalarEvolution &SE, bool PreferScalable)
Extracts an immediate operand from Ops and replaces the operand with zero.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static GlobalValue * ExtractSymbol(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth, const TargetTransformInfo &TTI)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Value * getCondition() const
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
LLVM_ABI PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
ArrayRef< SCEVUse > operands() const
bool hasNoSignedWrap() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
static constexpr auto FlagAnyWrap
LLVM_ABI ArrayRef< SCEVUse > operands() const
Return operands of this SCEV expression.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
match_bind< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.