132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
363 const std::pair<size_t, Immediate> &
RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
387 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
399 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
415RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
421 RSD.UsedByIndices.
set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
430 RSD.UsedByIndices.
reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
440 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
451 if (
I == RegUsesMap.end())
453 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
460const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
461 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
477 GlobalValue *BaseGV =
nullptr;
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
515 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 bool countsDownToZero()
const;
527 size_t getNumRegs()
const;
530 void deleteBaseReg(
const SCEV *&S);
532 bool referencesReg(
const SCEV *S)
const;
533 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
534 const RegUseTracker &RegUses)
const;
536 void print(raw_ostream &OS)
const;
554 for (
const SCEV *S :
Add->operands())
560 const SCEV *Start, *Step;
575 if (
Mul->getOperand(0)->isAllOnesValue()) {
584 for (
const SCEV *S : MyGood)
586 for (
const SCEV *S : MyBad)
598void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
605 BaseRegs.push_back(Sum);
611 BaseRegs.push_back(Sum);
626bool Formula::isCanonical(
const Loop &L)
const {
627 assert((Scale == 0 || ScaledReg) &&
628 "ScaledReg must be non-null if Scale is non-zero");
631 return BaseRegs.size() <= 1;
636 if (Scale == 1 && BaseRegs.empty())
645 return none_of(BaseRegs, [&L](
const SCEV *S) {
656void Formula::canonicalize(
const Loop &L) {
660 if (BaseRegs.empty()) {
662 assert(ScaledReg &&
"Expected 1*reg => reg");
663 assert(Scale == 1 &&
"Expected 1*reg => reg");
664 BaseRegs.push_back(ScaledReg);
672 ScaledReg = BaseRegs.pop_back_val();
680 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
683 if (
I != BaseRegs.end())
693bool Formula::unscale() {
697 BaseRegs.push_back(ScaledReg);
702bool Formula::hasZeroEnd()
const {
703 if (UnfoldedOffset || BaseOffset)
705 if (BaseRegs.size() != 1 || ScaledReg)
710bool Formula::countsDownToZero()
const {
713 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
714 const APInt *StepInt;
722size_t Formula::getNumRegs()
const {
723 return !!ScaledReg + BaseRegs.size();
728Type *Formula::getType()
const {
729 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
730 ScaledReg ? ScaledReg->
getType() :
736void Formula::deleteBaseReg(
const SCEV *&S) {
737 if (&S != &BaseRegs.back())
743bool Formula::referencesReg(
const SCEV *S)
const {
749bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
750 const RegUseTracker &RegUses)
const {
752 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
754 for (
const SCEV *BaseReg : BaseRegs)
755 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
761void Formula::print(raw_ostream &OS)
const {
762 ListSeparator
Plus(
" + ");
767 if (BaseOffset.isNonZero())
768 OS <<
Plus << BaseOffset;
770 for (
const SCEV *BaseReg : BaseRegs)
773 if (HasBaseReg && BaseRegs.empty())
774 OS <<
Plus <<
"**error: HasBaseReg**";
775 else if (!HasBaseReg && !BaseRegs.empty())
776 OS <<
Plus <<
"**error: !HasBaseReg**";
779 OS <<
Plus << Scale <<
"*reg(";
786 if (UnfoldedOffset.isNonZero())
787 OS <<
Plus <<
"imm(" << UnfoldedOffset <<
')';
827 bool IgnoreSignificantBits =
false) {
838 if (
RA.isAllOnes()) {
839 if (
LHS->getType()->isPointerTy())
852 const APInt &LA =
C->getAPInt();
861 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
863 IgnoreSignificantBits);
864 if (!Step)
return nullptr;
866 IgnoreSignificantBits);
867 if (!Start)
return nullptr;
880 for (
const SCEV *S :
Add->operands()) {
882 if (!
Op)
return nullptr;
910 for (
const SCEV *S :
Mul->operands()) {
913 IgnoreSignificantBits)) {
933 bool PreferScalable) {
936 Immediate Result = Immediate::getZero();
945 C->getSignificantBits() <= 64) {
947 Result = Immediate::getFixed(
C->getSExtValue());
959 Result = Immediate::getScalable(
C->getSExtValue());
965 if (Result.isNonZero()) {
976 bool PreferScalable =
false) {
982 if (Result.isNonZero())
988 if (Result.isNonZero())
1029 if (
SI->getPointerOperand() == OperandVal)
1034 switch (
II->getIntrinsicID()) {
1035 case Intrinsic::memset:
1036 case Intrinsic::prefetch:
1037 case Intrinsic::masked_load:
1038 if (
II->getArgOperand(0) == OperandVal)
1041 case Intrinsic::masked_store:
1042 if (
II->getArgOperand(1) == OperandVal)
1045 case Intrinsic::memmove:
1046 case Intrinsic::memcpy:
1047 if (
II->getArgOperand(0) == OperandVal ||
1048 II->getArgOperand(1) == OperandVal)
1053 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1054 if (IntrInfo.
PtrVal == OperandVal)
1060 if (RMW->getPointerOperand() == OperandVal)
1063 if (CmpX->getPointerOperand() == OperandVal)
1072 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1076 AccessTy.MemTy = Ty;
1080 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1082 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1084 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1086 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1088 switch (
II->getIntrinsicID()) {
1089 case Intrinsic::prefetch:
1090 case Intrinsic::memset:
1091 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1092 AccessTy.MemTy = OperandVal->
getType();
1094 case Intrinsic::memmove:
1095 case Intrinsic::memcpy:
1097 AccessTy.MemTy = OperandVal->
getType();
1099 case Intrinsic::masked_load:
1100 AccessTy.AddrSpace =
1101 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1103 case Intrinsic::masked_store:
1104 AccessTy.AddrSpace =
1105 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1109 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1165 if (!Processed.
insert(S).second)
1169 for (
const SCEV *S :
Add->operands()) {
1176 const SCEV *Op0, *Op1;
1185 Value *UVal = U->getValue();
1189 if (UI && UI->
getOpcode() == Instruction::Mul &&
1222 const LSRUse &LU,
const Formula &
F);
1226 const LSRUse &LU,
const Formula &
F,
1233 const Loop *
L =
nullptr;
1234 ScalarEvolution *SE =
nullptr;
1235 const TargetTransformInfo *
TTI =
nullptr;
1236 TargetTransformInfo::LSRCost
C;
1241 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1243 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1261 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1262 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1263 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1264 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1270 return C.NumRegs == ~0
u;
1273 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1274 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1275 bool HardwareLoopProfitable,
1276 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1278 void print(raw_ostream &OS)
const;
1282 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1283 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1284 bool HardwareLoopProfitable);
1285 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1286 SmallPtrSetImpl<const SCEV *> &Regs,
1287 const LSRUse &LU,
bool HardwareLoopProfitable,
1288 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1299 Value *OperandValToReplace =
nullptr;
1309 Immediate
Offset = Immediate::getZero();
1311 LSRFixup() =
default;
1313 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1315 void print(raw_ostream &OS)
const;
1325 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1338 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1341 MemAccessTy AccessTy;
1347 Immediate MinOffset = Immediate::getFixedMax();
1348 Immediate MaxOffset = Immediate::getFixedMin();
1352 bool AllFixupsOutsideLoop =
true;
1357 bool AllFixupsUnconditional =
true;
1364 bool RigidFormula =
false;
1372 SmallPtrSet<const SCEV *, 4> Regs;
1374 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1376 LSRFixup &getNewFixup() {
1377 Fixups.push_back(LSRFixup());
1381 void pushFixup(LSRFixup &f) {
1383 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1384 MaxOffset =
f.Offset;
1385 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1386 MinOffset =
f.Offset;
1389 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1390 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1391 bool InsertFormula(
const Formula &
F,
const Loop &L);
1392 void DeleteFormula(Formula &
F);
1393 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1395 void print(raw_ostream &OS)
const;
1402 LSRUse::KindType Kind, MemAccessTy AccessTy,
1403 GlobalValue *BaseGV, Immediate BaseOffset,
1404 bool HasBaseReg, int64_t Scale,
1405 Instruction *
Fixup =
nullptr);
1412 if (
TTI.getIntImmCost(
C->getAPInt(),
C->getType(),
1426 [&](
unsigned i,
const SCEV *
Reg) {
1427 return i + getSetupCost(Reg, Depth - 1, TTI);
1436void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1437 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1438 bool HardwareLoopProfitable) {
1443 if (AR->getLoop() != L) {
1450 if (!AR->getLoop()->contains(L)) {
1460 unsigned LoopCost = 1;
1469 F.BaseOffset.isFixed() &&
1470 *Step ==
F.BaseOffset.getFixedValue();
1475 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1482 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1483 HardwareLoopProfitable)
1485 C.AddRecCost += LoopCost;
1490 if (!Regs.
count(AR->getOperand(1))) {
1491 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1503 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1512void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1513 SmallPtrSetImpl<const SCEV *> &Regs,
1514 const LSRUse &LU,
bool HardwareLoopProfitable,
1515 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1516 if (LoserRegs && LoserRegs->
count(
Reg)) {
1521 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1522 if (LoserRegs && isLoser())
1527void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1528 const DenseSet<const SCEV *> &VisitedRegs,
1529 const LSRUse &LU,
bool HardwareLoopProfitable,
1530 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1533 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1535 unsigned PrevAddRecCost =
C.AddRecCost;
1536 unsigned PrevNumRegs =
C.NumRegs;
1537 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1538 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1539 if (VisitedRegs.
count(ScaledReg)) {
1543 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1548 for (
const SCEV *BaseReg :
F.BaseRegs) {
1549 if (VisitedRegs.
count(BaseReg)) {
1553 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1560 size_t NumBaseParts =
F.getNumRegs();
1561 if (NumBaseParts > 1)
1566 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1572 for (
const LSRFixup &
Fixup : LU.Fixups) {
1573 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1574 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1578 else if (
Offset.isNonZero())
1580 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1584 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1605 if (
C.NumRegs > TTIRegNum) {
1608 if (PrevNumRegs > TTIRegNum)
1609 C.Insns += (
C.NumRegs - PrevNumRegs);
1611 C.Insns += (
C.NumRegs - TTIRegNum);
1624 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1628 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1631 if (LU.Kind != LSRUse::ICmpZero)
1632 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1638 C.Insns = std::numeric_limits<unsigned>::max();
1639 C.NumRegs = std::numeric_limits<unsigned>::max();
1640 C.AddRecCost = std::numeric_limits<unsigned>::max();
1641 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1642 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1643 C.ImmCost = std::numeric_limits<unsigned>::max();
1644 C.SetupCost = std::numeric_limits<unsigned>::max();
1645 C.ScaleCost = std::numeric_limits<unsigned>::max();
1649bool Cost::isLess(
const Cost &
Other)
const {
1651 C.Insns !=
Other.C.Insns)
1652 return C.Insns <
Other.C.Insns;
1656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1659 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1660 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1661 if (
C.AddRecCost != 0)
1662 OS <<
", with addrec cost " <<
C.AddRecCost;
1663 if (
C.NumIVMuls != 0)
1664 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1665 << (
C.NumIVMuls == 1 ?
"" :
"s");
1666 if (
C.NumBaseAdds != 0)
1667 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1668 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1669 if (
C.ScaleCost != 0)
1670 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1672 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1673 if (
C.SetupCost != 0)
1674 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1683bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1686 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1687 if (PN->getIncomingValue(i) == OperandValToReplace &&
1688 L->contains(PN->getIncomingBlock(i)))
1693 return !
L->contains(UserInst);
1696#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1697void LSRFixup::print(raw_ostream &OS)
const {
1702 Store->getOperand(0)->printAsOperand(OS,
false);
1708 OS <<
", OperandValToReplace=";
1711 for (
const Loop *PIL : PostIncLoops) {
1712 OS <<
", PostIncLoop=";
1713 PIL->getHeader()->printAsOperand(OS,
false);
1717 OS <<
", Offset=" <<
Offset;
1727bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1729 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1736float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1738 for (
const Formula &
F : Formulae)
1739 if (
F.referencesReg(
Reg))
1741 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1746bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1747 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1749 if (!Formulae.empty() && RigidFormula)
1753 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1761 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1762 "Zero allocated in a scaled register!");
1764 for (
const SCEV *BaseReg :
F.BaseRegs)
1765 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1769 Formulae.push_back(
F);
1780void LSRUse::DeleteFormula(Formula &
F) {
1781 if (&
F != &Formulae.back())
1783 Formulae.pop_back();
1787void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1789 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1791 for (
const Formula &
F : Formulae) {
1792 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1797 for (
const SCEV *S : OldRegs)
1799 RegUses.dropRegister(S, LUIdx);
1802#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1803void LSRUse::print(raw_ostream &OS)
const {
1804 OS <<
"LSR Use: Kind=";
1806 case Basic: OS <<
"Basic";
break;
1807 case Special: OS <<
"Special";
break;
1808 case ICmpZero: OS <<
"ICmpZero";
break;
1810 OS <<
"Address of ";
1814 OS << *AccessTy.MemTy;
1817 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1820 OS <<
", Offsets={";
1821 bool NeedComma =
false;
1822 for (
const LSRFixup &
Fixup : Fixups) {
1823 if (NeedComma) OS <<
',';
1829 if (AllFixupsOutsideLoop)
1830 OS <<
", all-fixups-outside-loop";
1832 if (AllFixupsUnconditional)
1833 OS <<
", all-fixups-unconditional";
1842 LSRUse::KindType Kind, MemAccessTy AccessTy,
1844 bool HasBaseReg, int64_t Scale,
1847 case LSRUse::Address: {
1848 int64_t FixedOffset =
1849 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1850 int64_t ScalableOffset =
1851 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1852 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1853 HasBaseReg, Scale, AccessTy.AddrSpace,
1854 Fixup, ScalableOffset);
1856 case LSRUse::ICmpZero:
1863 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1868 if (Scale != 0 && Scale != -1)
1873 if (BaseOffset.isNonZero()) {
1876 if (BaseOffset.isScalable())
1886 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1887 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1895 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1897 case LSRUse::Special:
1899 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1906 Immediate MinOffset, Immediate MaxOffset,
1907 LSRUse::KindType Kind, MemAccessTy AccessTy,
1909 bool HasBaseReg, int64_t Scale) {
1910 if (BaseOffset.isNonZero() &&
1911 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1912 BaseOffset.isScalable() != MaxOffset.isScalable()))
1915 int64_t
Base = BaseOffset.getKnownMinValue();
1916 int64_t Min = MinOffset.getKnownMinValue();
1917 int64_t Max = MaxOffset.getKnownMinValue();
1920 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1923 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1926 HasBaseReg, Scale) &&
1932 Immediate MinOffset, Immediate MaxOffset,
1933 LSRUse::KindType Kind, MemAccessTy AccessTy,
1934 const Formula &
F,
const Loop &L) {
1942 assert((
F.isCanonical(L) ||
F.Scale != 0));
1944 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1949 Immediate MaxOffset, LSRUse::KindType Kind,
1951 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1954 BaseOffset, HasBaseReg, Scale) ||
1959 BaseGV, BaseOffset,
true, 0));
1963 Immediate MaxOffset, LSRUse::KindType Kind,
1964 MemAccessTy AccessTy,
const Formula &
F) {
1965 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1966 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1972 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1974 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1978 const LSRUse &LU,
const Formula &
F) {
1980 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1981 for (
const LSRFixup &
Fixup : LU.Fixups)
1983 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1984 F.Scale,
Fixup.UserInst))
1990 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1995 const LSRUse &LU,
const Formula &
F,
2004 return F.Scale != 1;
2007 case LSRUse::Address: {
2009 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
2010 if (
F.BaseOffset.isScalable()) {
2011 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
2012 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
2014 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
2015 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
2019 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
2022 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
2025 "Legal addressing mode has an illegal cost!");
2026 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
2028 case LSRUse::ICmpZero:
2030 case LSRUse::Special:
2040 LSRUse::KindType Kind, MemAccessTy AccessTy,
2044 if (BaseOffset.isZero() && !BaseGV)
2049 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2053 if (!HasBaseReg && Scale == 1) {
2063 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2073 Immediate MaxOffset, LSRUse::KindType Kind,
2074 MemAccessTy AccessTy,
const SCEV *S,
2077 if (S->
isZero())
return true;
2090 if (BaseOffset.isZero() && !BaseGV)
2093 if (BaseOffset.isScalable())
2098 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2101 BaseOffset, HasBaseReg, Scale);
2118 const SCEV *IncExpr;
2120 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2121 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2128 const SCEV *ExprBase =
nullptr;
2130 IVChain() =
default;
2131 IVChain(
const IVInc &Head,
const SCEV *
Base)
2132 : Incs(1, Head), ExprBase(
Base) {}
2137 const_iterator
begin()
const {
2139 return std::next(Incs.
begin());
2141 const_iterator
end()
const {
2146 bool hasIncs()
const {
return Incs.
size() >= 2; }
2155 bool isProfitableIncrement(
const SCEV *OperExpr,
2156 const SCEV *IncExpr,
2164 SmallPtrSet<Instruction*, 4> FarUsers;
2165 SmallPtrSet<Instruction*, 4> NearUsers;
2171 ScalarEvolution &SE;
2174 AssumptionCache &AC;
2175 TargetLibraryInfo &TLI;
2176 const TargetTransformInfo &
TTI;
2178 MemorySSAUpdater *MSSAU;
2182 bool HardwareLoopProfitable =
false;
2196 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2203 SmallSetVector<Type *, 4> Types;
2209 RegUseTracker RegUses;
2214 static const unsigned MaxChains = 8;
2220 SmallPtrSet<Use*, MaxChains> IVIncSet;
2223 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2229 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2231 void OptimizeShadowIV();
2232 bool FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse);
2234 void OptimizeLoopTermCond();
2236 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2237 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2238 void FinalizeChain(IVChain &Chain);
2239 void CollectChains();
2240 void GenerateIVChain(
const IVChain &Chain,
2241 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2243 void CollectInterestingTypesAndFactors();
2244 void CollectFixupsAndInitialFormulae();
2247 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2250 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2251 LSRUse::KindType Kind, MemAccessTy AccessTy);
2253 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2254 MemAccessTy AccessTy);
2256 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2258 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2260 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2261 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2262 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2263 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2264 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2266 void CollectLoopInvariantFixupsAndFormulae();
2268 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2269 unsigned Depth = 0);
2271 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2273 size_t Idx,
bool IsScaledReg =
false);
2274 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2275 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2276 const Formula &
Base,
size_t Idx,
2277 bool IsScaledReg =
false);
2278 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2279 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2280 const Formula &
Base,
2281 const SmallVectorImpl<Immediate> &Worklist,
2282 size_t Idx,
bool IsScaledReg =
false);
2283 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2284 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2285 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2286 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2287 void GenerateCrossUseConstantOffsets();
2288 void GenerateAllReuseFormulae();
2290 void FilterOutUndesirableDedicatedRegisters();
2292 size_t EstimateSearchSpaceComplexity()
const;
2293 void NarrowSearchSpaceByDetectingSupersets();
2294 void NarrowSearchSpaceByCollapsingUnrolledCode();
2295 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2296 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2297 void NarrowSearchSpaceByFilterPostInc();
2298 void NarrowSearchSpaceByMergingUsesOutsideLoop();
2299 void NarrowSearchSpaceByDeletingCostlyFormulas();
2300 void NarrowSearchSpaceByPickingWinnerRegs();
2301 void NarrowSearchSpaceUsingHeuristics();
2303 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2305 SmallVectorImpl<const Formula *> &Workspace,
2306 const Cost &CurCost,
2307 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2308 DenseSet<const SCEV *> &VisitedRegs)
const;
2309 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2313 const SmallVectorImpl<Instruction *> &Inputs)
const;
2316 const LSRUse &LU)
const;
2318 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2320 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2321 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2323 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2324 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2325 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2326 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2329 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2330 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2331 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2333 bool getChanged()
const {
return Changed; }
2334 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2335 return ScalarEvolutionIVs;
2338 void print_factors_and_types(raw_ostream &OS)
const;
2339 void print_fixups(raw_ostream &OS)
const;
2340 void print_uses(raw_ostream &OS)
const;
2341 void print(raw_ostream &OS)
const;
2349void LSRInstance::OptimizeShadowIV() {
2359 Type *DestTy =
nullptr;
2360 bool IsSigned =
false;
2376 DestTy = UCast->getDestTy();
2380 DestTy = SCast->getDestTy();
2382 if (!DestTy)
continue;
2402 if (Mantissa == -1)
continue;
2406 unsigned Entry, Latch;
2416 if (!Init)
continue;
2417 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2421 BinaryOperator *Incr =
2423 if (!Incr)
continue;
2424 if (Incr->
getOpcode() != Instruction::Add
2425 && Incr->
getOpcode() != Instruction::Sub)
2429 ConstantInt *
C =
nullptr;
2441 if (!
C->getValue().isStrictlyPositive())
2449 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2451 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2452 : Instruction::FSub,
2469bool LSRInstance::FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse) {
2470 for (IVStrideUse &U : IU)
2471 if (
U.getUser() ==
Cond) {
2529Instruction *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2544 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2545 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2551 const SCEVNAryExpr *
Max =
nullptr;
2553 Pred = ICmpInst::ICMP_SLE;
2556 Pred = ICmpInst::ICMP_SLT;
2559 Pred = ICmpInst::ICMP_ULT;
2568 if (
Max->getNumOperands() != 2)
2571 const SCEV *MaxLHS =
Max->getOperand(0);
2572 const SCEV *MaxRHS =
Max->getOperand(1);
2577 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2588 "Loop condition operand is an addrec in a different loop!");
2592 Value *NewRHS =
nullptr;
2593 if (ICmpInst::isTrueWhenEqual(Pred)) {
2597 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2598 NewRHS = BO->getOperand(0);
2601 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2602 NewRHS = BO->getOperand(0);
2610 NewRHS = SU->getValue();
2622 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2623 Cond->getOperand(0), NewRHS,
"scmp");
2627 Cond->replaceAllUsesWith(NewCond);
2630 Cond->eraseFromParent();
2632 if (
Cmp->use_empty()) {
2634 Cmp->eraseFromParent();
2641LSRInstance::OptimizeLoopTermCond() {
2642 SmallPtrSet<Instruction *, 4> PostIncs;
2657 SmallVector<BasicBlock*, 8> ExitingBlocks;
2658 L->getExitingBlocks(ExitingBlocks);
2666 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2688 IVStrideUse *CondUse =
nullptr;
2689 if (!FindIVUserForCond(
Cond, CondUse))
2699 Cond = OptimizeMax(Cmp, CondUse);
2704 if (!DT.
dominates(ExitingBlock, LatchBlock))
2709 if (LatchBlock != ExitingBlock)
2710 for (
const IVStrideUse &UI : IU)
2713 if (&UI != CondUse &&
2717 const SCEV *
A = IU.getStride(*CondUse, L);
2718 const SCEV *
B = IU.getStride(UI, L);
2719 if (!
A || !
B)
continue;
2728 if (
const SCEVConstant *
D =
2730 const ConstantInt *
C =
D->getValue();
2732 if (
C->isOne() ||
C->isMinusOne())
2733 goto decline_post_inc;
2735 if (
C->getValue().getSignificantBits() >= 64 ||
2736 C->getValue().isMinSignedValue())
2737 goto decline_post_inc;
2740 MemAccessTy AccessTy =
2742 int64_t Scale =
C->getSExtValue();
2746 AccessTy.AddrSpace))
2747 goto decline_post_inc;
2752 AccessTy.AddrSpace))
2753 goto decline_post_inc;
2758 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2766 if (
Cond->hasOneUse()) {
2767 Cond->moveBefore(TermBr->getIterator());
2772 Cond->setName(
L->getHeader()->getName() +
".termcond");
2773 Cond->insertInto(ExitingBlock, TermBr->getIterator());
2777 TermBr->replaceUsesOfWith(OldCond,
Cond);
2794 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2795 for (Instruction *Inst : PostIncs)
2801bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2802 bool HasBaseReg, LSRUse::KindType Kind,
2803 MemAccessTy AccessTy) {
2804 Immediate NewMinOffset = LU.MinOffset;
2805 Immediate NewMaxOffset = LU.MaxOffset;
2806 MemAccessTy NewAccessTy = AccessTy;
2811 if (LU.Kind != Kind)
2817 if (Kind == LSRUse::Address) {
2818 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2819 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2820 AccessTy.AddrSpace);
2825 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2827 LU.MaxOffset - NewOffset, HasBaseReg))
2829 NewMinOffset = NewOffset;
2830 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2832 NewOffset - LU.MinOffset, HasBaseReg))
2834 NewMaxOffset = NewOffset;
2840 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2841 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2845 LU.MinOffset = NewMinOffset;
2846 LU.MaxOffset = NewMaxOffset;
2847 LU.AccessTy = NewAccessTy;
2854std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2855 LSRUse::KindType Kind,
2856 MemAccessTy AccessTy) {
2857 const SCEV *
Copy = Expr;
2860 ExprUse, SE, AccessTy.MemTy && AccessTy.MemTy->
isScalableTy());
2867 Offset = Immediate::getFixed(0);
2870 std::pair<UseMapTy::iterator, bool>
P =
2871 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2874 size_t LUIdx =
P.first->second;
2875 LSRUse &LU =
Uses[LUIdx];
2876 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2878 return std::make_pair(LUIdx,
Offset);
2882 size_t LUIdx =
Uses.size();
2883 P.first->second = LUIdx;
2884 Uses.push_back(LSRUse(Kind, AccessTy));
2885 LSRUse &LU =
Uses[LUIdx];
2889 return std::make_pair(LUIdx,
Offset);
2893void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2894 if (&LU != &
Uses.back())
2899 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2905LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2906 const LSRUse &OrigLU) {
2908 for (LSRUse &LU :
Uses) {
2914 if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero &&
2915 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2916 LU.HasFormulaWithSameRegs(OrigF)) {
2918 for (
const Formula &
F : LU.Formulae) {
2921 if (
F.BaseRegs == OrigF.BaseRegs &&
2922 F.ScaledReg == OrigF.ScaledReg &&
2923 F.BaseGV == OrigF.BaseGV &&
2924 F.Scale == OrigF.Scale &&
2925 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2926 if (
F.BaseOffset.isZero())
2941void LSRInstance::CollectInterestingTypesAndFactors() {
2942 SmallSetVector<const SCEV *, 4> Strides;
2946 for (
const IVStrideUse &U : IU) {
2947 const SCEV *Expr = IU.getExpr(U);
2965 }
while (!Worklist.
empty());
2969 for (SmallSetVector<const SCEV *, 4>::const_iterator
2971 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2972 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2973 const SCEV *OldStride = *
I;
2974 const SCEV *NewStride = *NewStrideIter;
2984 if (
const SCEVConstant *Factor =
2987 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2988 Factors.insert(Factor->getAPInt().getSExtValue());
2989 }
else if (
const SCEVConstant *Factor =
2993 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2994 Factors.insert(Factor->getAPInt().getSExtValue());
3000 if (Types.size() == 1)
3012 for(; OI != OE; ++OI) {
3031 return Trunc->getOperand(0);
3064 if (SubExpr->getSCEVType() ==
scAddExpr)
3067 if (SubExpr->getSCEVType() !=
scMulExpr)
3083bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3084 const SCEV *IncExpr,
3085 ScalarEvolution &SE) {
3098 SmallPtrSet<const SCEV*, 8> Processed;
3119 if (!Chain.hasIncs())
3122 if (!
Users.empty()) {
3123 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3125 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3128 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3137 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3140 const SCEV *LastIncExpr =
nullptr;
3141 unsigned NumConstIncrements = 0;
3142 unsigned NumVarIncrements = 0;
3143 unsigned NumReusedIncrements = 0;
3145 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3148 for (
const IVInc &Inc : Chain) {
3149 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3151 if (Inc.IncExpr->isZero())
3157 ++NumConstIncrements;
3161 if (Inc.IncExpr == LastIncExpr)
3162 ++NumReusedIncrements;
3166 LastIncExpr = Inc.IncExpr;
3171 if (NumConstIncrements > 1)
3178 cost += NumVarIncrements;
3182 cost -= NumReusedIncrements;
3184 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3191void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3192 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3196 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3197 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3201 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3202 const SCEV *LastIncExpr =
nullptr;
3203 for (; ChainIdx < NChains; ++ChainIdx) {
3204 IVChain &Chain = IVChainVec[ChainIdx];
3222 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3223 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3227 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3228 LastIncExpr = IncExpr;
3234 if (ChainIdx == NChains) {
3241 LastIncExpr = OperExpr;
3248 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3250 ChainUsersVec.
resize(NChains);
3251 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3252 <<
") IV=" << *LastIncExpr <<
"\n");
3254 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3255 <<
") IV+" << *LastIncExpr <<
"\n");
3257 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3259 IVChain &Chain = IVChainVec[ChainIdx];
3261 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3263 if (!LastIncExpr->
isZero()) {
3264 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3273 for (User *U : IVOper->
users()) {
3279 IVChain::const_iterator IncIter = Chain.Incs.begin();
3280 IVChain::const_iterator IncEnd = Chain.Incs.end();
3281 for( ; IncIter != IncEnd; ++IncIter) {
3282 if (IncIter->UserInst == OtherUse)
3285 if (IncIter != IncEnd)
3290 && IU.isIVUserOrOperand(OtherUse)) {
3293 NearUsers.
insert(OtherUse);
3298 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3323void LSRInstance::CollectChains() {
3327 SmallVector<BasicBlock *,8> LatchPath;
3330 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3336 for (BasicBlock *BB :
reverse(LatchPath)) {
3337 for (Instruction &
I : *BB) {
3343 if (IU.isEphemeral(&
I))
3353 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3354 ChainIdx < NChains; ++ChainIdx) {
3355 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3358 SmallPtrSet<Instruction*, 4> UniqueOperands;
3361 while (IVOpIter != IVOpEnd) {
3363 if (UniqueOperands.
insert(IVOpInst).second)
3364 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3365 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3370 for (PHINode &PN :
L->getHeader()->phis()) {
3377 ChainInstruction(&PN, IncV, ChainUsersVec);
3380 unsigned ChainIdx = 0;
3381 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3382 UsersIdx < NChains; ++UsersIdx) {
3384 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3387 if (ChainIdx != UsersIdx)
3388 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3389 FinalizeChain(IVChainVec[ChainIdx]);
3392 IVChainVec.resize(ChainIdx);
3395void LSRInstance::FinalizeChain(IVChain &Chain) {
3396 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3397 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3399 for (
const IVInc &Inc : Chain) {
3401 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3402 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3403 IVIncSet.insert(UseI);
3411 Immediate IncOffset = Immediate::getZero();
3420 C->getSignificantBits() > 64)
3422 IncOffset = Immediate::getScalable(
C->getSExtValue());
3438void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3439 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3442 const IVInc &Head = Chain.Incs[0];
3447 Value *IVSrc =
nullptr;
3448 while (IVOpIter != IVOpEnd) {
3459 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3460 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3463 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3465 if (IVOpIter == IVOpEnd) {
3467 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3470 assert(IVSrc &&
"Failed to find IV chain source");
3475 const SCEV *LeftOverExpr =
nullptr;
3476 const SCEV *Accum = SE.
getZero(IntTy);
3480 for (
const IVInc &Inc : Chain) {
3483 InsertPt =
L->getLoopLatch()->getTerminator();
3487 Value *IVOper = IVSrc;
3488 if (!Inc.IncExpr->isZero()) {
3493 LeftOverExpr = LeftOverExpr ?
3494 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3498 bool FoundBase =
false;
3499 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3500 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3502 if (!Remainder->
isZero()) {
3504 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3505 const SCEV *IVOperExpr =
3507 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3516 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3519 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3522 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3526 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3529 LeftOverExpr =
nullptr;
3533 if (IVTy != OperTy) {
3535 "cannot extend a chained IV");
3537 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3539 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3546 for (PHINode &Phi :
L->getHeader()->phis()) {
3550 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3553 Value *IVOper = IVSrc;
3555 if (IVTy != PostIncTy) {
3557 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3558 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3559 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3561 Phi.replaceUsesOfWith(PostIncV, IVOper);
3567void LSRInstance::CollectFixupsAndInitialFormulae() {
3568 CondBrInst *ExitBranch =
nullptr;
3569 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3572 SmallPtrSet<const SCEV *, 16> Regs;
3573 DenseSet<const SCEV *> VisitedRegs;
3574 DenseSet<size_t> VisitedLSRUse;
3576 for (
const IVStrideUse &U : IU) {
3581 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3582 if (IVIncSet.count(UseI)) {
3583 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3587 LSRUse::KindType
Kind = LSRUse::Basic;
3588 MemAccessTy AccessTy;
3590 Kind = LSRUse::Address;
3594 const SCEV *S = IU.getExpr(U);
3610 if (CI->isEquality()) {
3613 Value *
NV = CI->getOperand(1);
3614 if (NV ==
U.getOperandValToReplace()) {
3615 CI->setOperand(1, CI->getOperand(0));
3616 CI->setOperand(0, NV);
3617 NV = CI->getOperand(1);
3624 (!
NV->getType()->isPointerTy() ||
3631 Kind = LSRUse::ICmpZero;
3633 }
else if (
L->isLoopInvariant(NV) &&
3636 !
NV->getType()->isPointerTy()) {
3647 Kind = LSRUse::ICmpZero;
3654 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3655 if (Factors[i] != -1)
3656 Factors.insert(-(uint64_t)Factors[i]);
3662 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3663 size_t LUIdx =
P.first;
3665 LSRUse &LU =
Uses[LUIdx];
3668 LSRFixup &LF = LU.getNewFixup();
3669 LF.UserInst = UserInst;
3670 LF.OperandValToReplace =
U.getOperandValToReplace();
3671 LF.PostIncLoops = TmpPostIncLoops;
3673 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3674 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3677 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3679 F.initialMatch(S, L, SE);
3680 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3681 HardwareLoopProfitable);
3682 VisitedLSRUse.
insert(LUIdx);
3686 if (LU.Formulae.empty()) {
3687 InsertInitialFormula(S, LU, LUIdx);
3688 CountRegisters(LU.Formulae.back(), LUIdx);
3697void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3701 LU.RigidFormula =
true;
3704 F.initialMatch(S, L, SE);
3705 bool Inserted = InsertFormula(LU, LUIdx,
F);
3706 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3712LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3713 LSRUse &LU,
size_t LUIdx) {
3715 F.BaseRegs.push_back(S);
3716 F.HasBaseReg =
true;
3717 bool Inserted = InsertFormula(LU, LUIdx,
F);
3718 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3722void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3724 RegUses.countRegister(
F.ScaledReg, LUIdx);
3725 for (
const SCEV *BaseReg :
F.BaseRegs)
3726 RegUses.countRegister(BaseReg, LUIdx);
3731bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3734 "Formula is illegal");
3736 if (!LU.InsertFormula(
F, *L))
3739 CountRegisters(
F, LUIdx);
3745bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3757LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3759 SmallPtrSet<const SCEV *, 32> Visited;
3766 while (!Worklist.
empty()) {
3770 if (!Visited.
insert(S).second)
3781 const Value *
V = US->getValue();
3784 if (
L->contains(Inst))
continue;
3788 for (
const Use &U :
V->uses()) {
3798 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3820 bool HasIncompatibleEHPTerminatedBlock =
false;
3822 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3823 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3824 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3825 HasIncompatibleEHPTerminatedBlock =
true;
3830 if (HasIncompatibleEHPTerminatedBlock) {
3853 unsigned OtherIdx = !
U.getOperandNo();
3854 Value *OtherOp = ICI->getOperand(OtherIdx);
3864 std::pair<size_t, Immediate>
P =
3865 getUse(S, LSRUse::Basic, MemAccessTy());
3866 size_t LUIdx =
P.first;
3868 LSRUse &LU =
Uses[LUIdx];
3869 LSRFixup &LF = LU.getNewFixup();
3870 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3871 LF.OperandValToReplace =
U;
3873 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3874 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3875 InsertSupplementalFormula(US, LU, LUIdx);
3876 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3892 unsigned Depth = 0) {
3899 for (
const SCEV *S :
Add->operands()) {
3906 const SCEV *Start, *Step;
3911 if (Start->isZero())
3920 Remainder =
nullptr;
3922 if (Remainder != Start) {
3944 LSRUse &LU,
const SCEV *S,
const Loop *L,
3946 if (LU.Kind != LSRUse::Address ||
3947 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3953 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3962void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3963 const Formula &
Base,
3964 unsigned Depth,
size_t Idx,
3966 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3974 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3978 if (AddOps.
size() == 1)
3992 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3997 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
4001 if (InnerAddOps.size() == 1 &&
4003 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
4006 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
4011 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
4020 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4023 F.ScaledReg =
nullptr;
4026 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
4027 }
else if (IsScaledReg)
4028 F.ScaledReg = InnerSum;
4030 F.BaseRegs[Idx] = InnerSum;
4038 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4041 F.BaseRegs.push_back(*J);
4046 if (InsertFormula(LU, LUIdx,
F))
4053 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4059void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4061 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4066 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4067 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4069 if (
Base.Scale == 1)
4070 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4076void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4079 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4080 (
Base.UnfoldedOffset.isNonZero()) <=
4088 Formula NewBase =
Base;
4089 NewBase.BaseRegs.clear();
4090 Type *CombinedIntegerType =
nullptr;
4091 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4094 if (!CombinedIntegerType)
4096 Ops.push_back(BaseReg);
4099 NewBase.BaseRegs.push_back(BaseReg);
4103 if (
Ops.size() == 0)
4108 auto GenerateFormula = [&](
const SCEV *Sum) {
4109 Formula
F = NewBase;
4117 F.BaseRegs.push_back(Sum);
4119 (void)InsertFormula(LU, LUIdx,
F);
4123 if (
Ops.size() > 1) {
4130 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4131 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4133 NewBase.UnfoldedOffset.getFixedValue(),
true));
4134 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4140void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4141 const Formula &
Base,
size_t Idx,
4145 if (
G->isZero() || !GV)
4149 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4154 F.BaseRegs[Idx] =
G;
4155 (void)InsertFormula(LU, LUIdx,
F);
4159void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4162 if (
Base.BaseGV)
return;
4164 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4165 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4166 if (
Base.Scale == 1)
4167 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4172void LSRInstance::GenerateConstantOffsetsImpl(
4173 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4174 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4176 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4178 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4180 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4182 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4184 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4190 F.ScaledReg =
nullptr;
4192 F.deleteBaseReg(
F.BaseRegs[Idx]);
4194 }
else if (IsScaledReg)
4197 F.BaseRegs[Idx] = NewG;
4199 (void)InsertFormula(LU, LUIdx,
F);
4214 const APInt *StepInt;
4219 for (Immediate
Offset : Worklist) {
4221 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4227 for (Immediate
Offset : Worklist)
4234 if (
G->isZero() ||
Imm.isZero() ||
4235 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4238 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4239 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4244 F.BaseRegs[Idx] =
G;
4249 (void)InsertFormula(LU, LUIdx,
F);
4253void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4259 if (LU.MaxOffset != LU.MinOffset)
4262 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4263 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4264 if (
Base.Scale == 1)
4265 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4271void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4273 if (LU.Kind != LSRUse::ICmpZero)
return;
4281 if (LU.MinOffset != LU.MaxOffset)
return;
4284 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4286 for (
const SCEV *BaseReg :
Base.BaseRegs)
4287 if (
BaseReg->getType()->isPointerTy())
4289 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4292 for (int64_t Factor : Factors) {
4297 if (
Base.BaseOffset.isMin() && Factor == -1)
4300 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4302 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4303 assert(Factor != 0 &&
"Zero factor not expected!");
4304 if (NewBaseOffset.getFixedValue() / Factor !=
4305 Base.BaseOffset.getFixedValue())
4313 Immediate
Offset = LU.MinOffset;
4314 if (
Offset.isMin() && Factor == -1)
4317 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4325 F.BaseOffset = NewBaseOffset;
4332 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4334 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4337 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4351 if (
F.UnfoldedOffset.isNonZero()) {
4352 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4354 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4355 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4356 Base.UnfoldedOffset.getFixedValue())
4360 IntTy,
F.UnfoldedOffset.getFixedValue()))
4365 (void)InsertFormula(LU, LUIdx,
F);
4372void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4379 if (
Base.Scale != 0 && !
Base.unscale())
4382 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4385 for (int64_t Factor : Factors) {
4386 Base.Scale = Factor;
4387 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4389 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4393 if (LU.Kind == LSRUse::Basic &&
4394 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4395 LU.AccessTy,
Base) &&
4396 LU.AllFixupsOutsideLoop)
4397 LU.Kind = LSRUse::Special;
4403 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4404 Base.BaseOffset.isZero() && !
Base.BaseGV)
4407 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4409 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4410 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4415 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4416 if (!Quotient->isZero()) {
4419 F.ScaledReg = Quotient;
4420 F.deleteBaseReg(
F.BaseRegs[i]);
4424 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4425 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4429 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4431 (void)InsertFormula(LU, LUIdx,
F);
4447 const SCEV *Result =
nullptr;
4448 for (
auto &L :
Loops) {
4452 if (!New || (Result && New != Result))
4457 assert(Result &&
"failed to create expression");
4462void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4464 if (
Base.BaseGV)
return;
4474 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4477 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4481 for (
auto &LF : LU.Fixups)
4482 Loops.push_back(LF.PostIncLoops);
4484 for (
Type *SrcTy : Types) {
4493 const SCEV *NewScaledReg =
4495 if (!NewScaledReg || NewScaledReg->
isZero())
4497 F.ScaledReg = NewScaledReg;
4499 bool HasZeroBaseReg =
false;
4500 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4501 const SCEV *NewBaseReg =
4503 if (!NewBaseReg || NewBaseReg->
isZero()) {
4504 HasZeroBaseReg =
true;
4514 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4518 (void)InsertFormula(LU, LUIdx,
F);
4531 const SCEV *OrigReg;
4533 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4534 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4536 void print(raw_ostream &OS)
const;
4542#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4543void WorkItem::print(raw_ostream &OS)
const {
4544 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4545 <<
" , add offset " <<
Imm;
4555void LSRInstance::GenerateCrossUseConstantOffsets() {
4557 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4559 DenseMap<const SCEV *, ImmMapTy>
Map;
4560 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4562 for (
const SCEV *Use : RegUses) {
4566 auto Pair =
Map.try_emplace(
Reg);
4569 Pair.first->second.insert(std::make_pair(Imm, Use));
4570 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4577 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4579 for (
const SCEV *
Reg : Sequence) {
4580 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4583 if (Imms.size() == 1)
4587 for (
const auto &Entry
4589 <<
' ' <<
Entry.first;
4593 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4595 const SCEV *OrigReg = J->second;
4597 Immediate JImm = J->first;
4598 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4601 UsedByIndicesMap[
Reg].
count() == 1) {
4609 Immediate
First = Imms.begin()->first;
4610 Immediate
Last = std::prev(Imms.end())->first;
4611 if (!
First.isCompatibleImmediate(
Last)) {
4618 bool Scalable =
First.isScalable() ||
Last.isScalable();
4619 int64_t FI =
First.getKnownMinValue();
4620 int64_t LI =
Last.getKnownMinValue();
4623 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4626 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4627 ImmMapTy::const_iterator OtherImms[] = {
4628 Imms.begin(), std::prev(Imms.end()),
4629 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4630 for (
const auto &M : OtherImms) {
4631 if (M == J || M == JE)
continue;
4632 if (!JImm.isCompatibleImmediate(
M->first))
4636 Immediate
Imm = JImm.subUnsigned(
M->first);
4637 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4639 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4640 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4647 UsedByIndicesMap.
clear();
4648 UniqueItems.
clear();
4651 for (
const WorkItem &WI : WorkItems) {
4652 size_t LUIdx = WI.LUIdx;
4653 LSRUse &LU =
Uses[LUIdx];
4654 Immediate
Imm = WI.Imm;
4655 const SCEV *OrigReg = WI.OrigReg;
4658 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4662 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4663 Formula
F = LU.Formulae[
L];
4670 if (
F.ScaledReg == OrigReg) {
4671 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4673 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4675 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4676 if (
F.referencesReg(S))
4679 NewF.BaseOffset =
Offset;
4680 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4683 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4692 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4694 if (
C->getValue()->isNegative() !=
4695 (NewF.BaseOffset.isLessThanZero()) &&
4696 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4697 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4702 NewF.canonicalize(*this->L);
4703 (void)InsertFormula(LU, LUIdx, NewF);
4706 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4708 if (BaseReg != OrigReg)
4711 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4712 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4713 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4715 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4717 LU.Kind, LU.AccessTy, NewF)) {
4721 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4725 NewF.UnfoldedOffset = NewUnfoldedOffset;
4727 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4732 for (
const SCEV *NewReg : NewF.BaseRegs)
4734 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4736 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4738 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4739 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4742 NewF.BaseOffset.getFixedValue()))
4747 NewF.canonicalize(*this->L);
4748 (void)InsertFormula(LU, LUIdx, NewF);
4759LSRInstance::GenerateAllReuseFormulae() {
4762 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4763 LSRUse &LU =
Uses[LUIdx];
4764 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4765 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4766 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4767 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4769 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4770 LSRUse &LU =
Uses[LUIdx];
4771 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4772 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4773 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4774 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4775 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4776 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4777 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4778 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4780 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4781 LSRUse &LU =
Uses[LUIdx];
4782 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4783 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4786 GenerateCrossUseConstantOffsets();
4789 "After generating reuse formulae:\n";
4790 print_uses(
dbgs()));
4795void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4796 DenseSet<const SCEV *> VisitedRegs;
4797 SmallPtrSet<const SCEV *, 16> Regs;
4798 SmallPtrSet<const SCEV *, 16> LoserRegs;
4800 bool ChangedFormulae =
false;
4805 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4807 BestFormulaeTy BestFormulae;
4809 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4810 LSRUse &LU =
Uses[LUIdx];
4815 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4816 FIdx != NumForms; ++FIdx) {
4817 Formula &
F = LU.Formulae[FIdx];
4828 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4830 if (CostF.isLoser()) {
4842 for (
const SCEV *
Reg :
F.BaseRegs) {
4843 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4847 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4848 Key.push_back(
F.ScaledReg);
4853 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4854 BestFormulae.insert(std::make_pair(
Key, FIdx));
4858 Formula &Best = LU.Formulae[
P.first->second];
4860 Cost CostBest(L, SE,
TTI, AMK);
4862 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4863 HardwareLoopProfitable);
4864 if (CostF.isLess(CostBest))
4868 " in favor of formula ";
4869 Best.print(
dbgs());
dbgs() <<
'\n');
4872 ChangedFormulae =
true;
4874 LU.DeleteFormula(
F);
4882 LU.RecomputeRegs(LUIdx, RegUses);
4885 BestFormulae.clear();
4890 "After filtering out undesirable candidates:\n";
4898size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4900 for (
const LSRUse &LU :
Uses) {
4901 size_t FSize = LU.Formulae.size();
4916void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4920 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4921 "which use a superset of registers used by other "
4924 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4925 LSRUse &LU =
Uses[LUIdx];
4927 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4928 Formula &
F = LU.Formulae[i];
4929 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4935 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4941 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4942 (uint64_t)
C->getValue()->getSExtValue());
4943 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4944 (
I -
F.BaseRegs.begin()));
4945 if (LU.HasFormulaWithSameRegs(NewF)) {
4948 LU.DeleteFormula(
F);
4959 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4960 (
I -
F.BaseRegs.begin()));
4961 if (LU.HasFormulaWithSameRegs(NewF)) {
4964 LU.DeleteFormula(
F);
4975 LU.RecomputeRegs(LUIdx, RegUses);
4984void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4989 dbgs() <<
"The search space is too complex.\n"
4990 "Narrowing the search space by assuming that uses separated "
4991 "by a constant offset will use the same registers.\n");
4995 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4996 LSRUse &LU =
Uses[LUIdx];
4997 for (
const Formula &
F : LU.Formulae) {
4998 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
5000 assert((LU.Kind == LSRUse::Address || LU.Kind == LSRUse::ICmpZero) &&
5001 "Only address and cmp uses expected to have nonzero BaseOffset");
5003 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
5007 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
5008 LU.Kind, LU.AccessTy))
5013 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
5014 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
5017 for (LSRFixup &
Fixup : LU.Fixups) {
5018 Fixup.Offset +=
F.BaseOffset;
5019 LUThatHas->pushFixup(
Fixup);
5024 Type *FixupType = LUThatHas->Fixups[0].OperandValToReplace->getType();
5025 for (LSRFixup &
Fixup : LUThatHas->Fixups)
5026 assert(
Fixup.OperandValToReplace->getType() == FixupType &&
5027 "Expected all fixups to have the same type");
5032 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
5033 Formula &
F = LUThatHas->Formulae[i];
5034 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
5035 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
5037 LUThatHas->DeleteFormula(
F);
5045 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
5048 DeleteUse(LU, LUIdx);
5061void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5065 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5066 "undesirable dedicated registers.\n");
5068 FilterOutUndesirableDedicatedRegisters();
5083void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5088 dbgs() <<
"The search space is too complex.\n"
5089 "Narrowing the search space by choosing the best Formula "
5090 "from the Formulae with the same Scale and ScaledReg.\n");
5093 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5095 BestFormulaeTy BestFormulae;
5097 bool ChangedFormulae =
false;
5099 DenseSet<const SCEV *> VisitedRegs;
5100 SmallPtrSet<const SCEV *, 16> Regs;
5102 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5103 LSRUse &LU =
Uses[LUIdx];
5108 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5113 size_t FARegNum = 0;
5114 for (
const SCEV *
Reg : FA.BaseRegs) {
5115 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5116 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5118 size_t FBRegNum = 0;
5119 for (
const SCEV *
Reg : FB.BaseRegs) {
5120 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5121 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5123 if (FARegNum != FBRegNum)
5124 return FARegNum < FBRegNum;
5131 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5133 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5134 return CostFA.isLess(CostFB);
5138 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5140 Formula &
F = LU.Formulae[FIdx];
5143 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5147 Formula &Best = LU.Formulae[
P.first->second];
5148 if (IsBetterThan(
F, Best))
5152 " in favor of formula ";
5153 Best.print(
dbgs());
dbgs() <<
'\n');
5155 ChangedFormulae =
true;
5157 LU.DeleteFormula(
F);
5163 LU.RecomputeRegs(LUIdx, RegUses);
5166 BestFormulae.clear();
5171 "After filtering out undesirable candidates:\n";
5178void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5185 "Narrowing the search space by choosing the lowest "
5186 "register Formula for PostInc Uses.\n");
5188 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5189 LSRUse &LU =
Uses[LUIdx];
5191 if (LU.Kind != LSRUse::Address)
5197 size_t MinRegs = std::numeric_limits<size_t>::max();
5198 for (
const Formula &
F : LU.Formulae)
5199 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5202 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5204 Formula &
F = LU.Formulae[FIdx];
5205 if (
F.getNumRegs() > MinRegs) {
5208 LU.DeleteFormula(
F);
5215 LU.RecomputeRegs(LUIdx, RegUses);
5224void LSRInstance::NarrowSearchSpaceByMergingUsesOutsideLoop() {
5229 dbgs() <<
"The search space is too complex.\n"
5230 "Narrowing the search space by merging uses with fixups "
5231 "entirely outside the loop with uses inside the loop.\n");
5233 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5234 LSRUse &LU =
Uses[LUIdx];
5235 if (!LU.AllFixupsOutsideLoop || LU.Formulae.empty())
5242 LSRUse *LUToMergeWith =
nullptr;
5243 const Formula &ThisF = LU.Formulae[0];
5244 for (LSRUse &OtherLU :
Uses) {
5246 if (OtherLU.AllFixupsOutsideLoop)
5250 if (OtherLU.Kind == LSRUse::ICmpZero)
5253 if (OtherLU.Formulae.empty())
5256 if (
any_of(OtherLU.Formulae, [&](
const Formula &
F) {
5257 return !isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, OtherLU.Kind,
5258 OtherLU.AccessTy, F);
5265 const Formula &OtherF = OtherLU.Formulae[0];
5266 if (ThisF.BaseRegs == OtherF.BaseRegs &&
5267 ThisF.ScaledReg == OtherF.ScaledReg &&
5268 ThisF.BaseGV == OtherF.BaseGV && ThisF.Scale == OtherF.Scale &&
5269 ThisF.UnfoldedOffset == OtherF.UnfoldedOffset &&
5270 ThisF.BaseOffset == OtherF.BaseOffset) {
5271 LUToMergeWith = &OtherLU;
5282 for (LSRFixup &
Fixup : LU.Fixups) {
5283 LUToMergeWith->pushFixup(
Fixup);
5287 DeleteUse(LU, LUIdx);
5337void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5346 SmallPtrSet<const SCEV *, 4> UniqRegs;
5350 DenseMap <const SCEV *, float> RegNumMap;
5351 for (
const SCEV *
Reg : RegUses) {
5355 for (
const LSRUse &LU :
Uses) {
5356 if (!LU.Regs.count(
Reg))
5358 float P = LU.getNotSelectedProbability(
Reg);
5364 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5368 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5371 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5372 LSRUse &LU =
Uses[LUIdx];
5374 if (LU.Formulae.size() < 2)
5379 float FMinRegNum = LU.Formulae[0].getNumRegs();
5380 float FMinARegNum = LU.Formulae[0].getNumRegs();
5382 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5383 Formula &
F = LU.Formulae[i];
5386 for (
const SCEV *BaseReg :
F.BaseRegs) {
5387 if (UniqRegs.
count(BaseReg))
5389 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5392 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5394 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5395 if (!UniqRegs.
count(ScaledReg)) {
5397 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5400 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5403 if (FMinRegNum > FRegNum ||
5404 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5405 FMinRegNum = FRegNum;
5406 FMinARegNum = FARegNum;
5411 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5413 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5414 while (LU.Formulae.size() != 1) {
5417 LU.Formulae.pop_back();
5419 LU.RecomputeRegs(LUIdx, RegUses);
5420 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5421 Formula &
F = LU.Formulae[0];
5437 MemAccessTy AccessType) {
5447 return TTI.isLegalAddressingMode(
5448 AccessType.MemTy,
nullptr,
5449 Diff->getSExtValue(),
5450 true, 0, AccessType.AddrSpace) &&
5451 !
TTI.isLegalAddressingMode(
5452 AccessType.MemTy,
nullptr,
5453 -Diff->getSExtValue(),
5454 true, 0, AccessType.AddrSpace);
5460void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5463 SmallPtrSet<const SCEV *, 4> Taken;
5471 const SCEV *Best =
nullptr;
5472 unsigned BestNum = 0;
5473 for (
const SCEV *
Reg : RegUses) {
5478 BestNum = RegUses.getUsedByIndices(
Reg).count();
5480 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5481 if (
Count > BestNum) {
5489 if (
Count == BestNum) {
5490 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5491 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5493 Uses[LUIdx].AccessTy)) {
5500 assert(Best &&
"Failed to find best LSRUse candidate");
5502 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5503 <<
" will yield profitable reuse.\n");
5508 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5509 LSRUse &LU =
Uses[LUIdx];
5510 if (!LU.Regs.count(Best))
continue;
5513 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5514 Formula &
F = LU.Formulae[i];
5515 if (!
F.referencesReg(Best)) {
5517 LU.DeleteFormula(
F);
5521 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5527 LU.RecomputeRegs(LUIdx, RegUses);
5538void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5539 NarrowSearchSpaceByDetectingSupersets();
5540 NarrowSearchSpaceByCollapsingUnrolledCode();
5541 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5543 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5544 NarrowSearchSpaceByFilterPostInc();
5545 NarrowSearchSpaceByMergingUsesOutsideLoop();
5547 NarrowSearchSpaceByDeletingCostlyFormulas();
5549 NarrowSearchSpaceByPickingWinnerRegs();
5553void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5555 SmallVectorImpl<const Formula *> &Workspace,
5556 const Cost &CurCost,
5557 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5558 DenseSet<const SCEV *> &VisitedRegs)
const {
5569 const LSRUse &LU =
Uses[Workspace.
size()];
5575 SmallSetVector<const SCEV *, 4> ReqRegs;
5576 for (
const SCEV *S : CurRegs)
5577 if (LU.Regs.count(S))
5580 SmallPtrSet<const SCEV *, 16> NewRegs;
5581 Cost NewCost(L, SE,
TTI, AMK);
5582 for (
const Formula &
F : LU.Formulae) {
5590 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5591 for (
const SCEV *
Reg : ReqRegs) {
5592 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5595 if (NumReqRegsToFind == 0)
5599 if (NumReqRegsToFind != 0) {
5610 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5611 if (NewCost.isLess(SolutionCost)) {
5613 if (Workspace.
size() !=
Uses.size()) {
5614 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5615 NewRegs, VisitedRegs);
5616 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5617 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5620 dbgs() <<
".\nRegs:\n";
5621 for (
const SCEV *S : NewRegs)
dbgs()
5622 <<
"- " << *S <<
"\n";
5625 SolutionCost = NewCost;
5626 Solution = Workspace;
5635void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5637 Cost SolutionCost(L, SE,
TTI, AMK);
5638 SolutionCost.Lose();
5639 Cost CurCost(L, SE,
TTI, AMK);
5640 SmallPtrSet<const SCEV *, 16> CurRegs;
5641 DenseSet<const SCEV *> VisitedRegs;
5645 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5646 CurRegs, VisitedRegs);
5647 if (Solution.
empty()) {
5654 "The chosen solution requires ";
5655 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5656 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5661 Solution[i]->print(
dbgs());
5667 const bool EnableDropUnprofitableSolution = [&] {
5679 if (BaselineCost.isLess(SolutionCost)) {
5680 if (!EnableDropUnprofitableSolution)
5682 dbgs() <<
"Baseline is more profitable than chosen solution, "
5683 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5686 "solution, dropping LSR solution.\n";);
5697 const SmallVectorImpl<Instruction *> &Inputs)
5701 bool AllDominate =
true;
5708 for (Instruction *Inst : Inputs) {
5709 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5710 AllDominate =
false;
5715 if (Tentative->
getParent() == Inst->getParent() &&
5716 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5726 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5727 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5731 if (!Rung)
return IP;
5732 Rung = Rung->getIDom();
5733 if (!Rung)
return IP;
5734 IDom = Rung->getBlock();
5737 const Loop *IDomLoop = LI.getLoopFor(IDom);
5738 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5739 if (IDomDepth <= IPLoopDepth &&
5740 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5757 SmallVector<Instruction *, 4> Inputs;
5760 if (LU.Kind == LSRUse::ICmpZero)
5761 if (Instruction *
I =
5764 if (LF.PostIncLoops.
count(L)) {
5765 if (LF.isUseFullyOutsideLoop(L))
5766 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5772 for (
const Loop *PIL : LF.PostIncLoops) {
5773 if (PIL == L)
continue;
5778 if (!ExitingBlocks.
empty()) {
5780 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5787 "Insertion point must be a normal instruction");
5797 while (IP->isEHPad()) ++IP;
5802 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5810Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5812 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5813 if (LU.RigidFormula)
5814 return LF.OperandValToReplace;
5818 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5823 Rewriter.setPostInc(LF.PostIncLoops);
5829 if (LU.Kind == LSRUse::ICmpZero && OpTy->
isPointerTy())
5832 Type *Ty =
F.getType();
5846 for (
const SCEV *
Reg :
F.BaseRegs) {
5847 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5855 Value *ICmpScaledV =
nullptr;
5857 const SCEV *ScaledS =
F.ScaledReg;
5863 if (LU.Kind == LSRUse::ICmpZero) {
5873 "The only scale supported by ICmpZero uses is -1!");
5874 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5882 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5892 Ops.push_back(ScaledS);
5918 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5919 "Expanding mismatched offsets\n");
5921 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5922 if (
Offset.isNonZero()) {
5923 if (LU.Kind == LSRUse::ICmpZero) {
5930 IntTy, -(uint64_t)
Offset.getFixedValue(),
true);
5939 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5944 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5945 if (UnfoldedOffset.isNonZero()) {
5947 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5951 const SCEV *FullS =
Ops.empty() ?
5962 if (LU.Kind == LSRUse::ICmpZero) {
5966 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5967 "a scale at the same time!");
5968 if (
F.Scale == -1) {
5969 if (ICmpScaledV->
getType() != OpTy) {
5979 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5980 "ICmp does not support folding a global value and "
5981 "a scale at the same time!");
5985 -(uint64_t)
Offset.getFixedValue(),
5987 if (
C->getType() != OpTy) {
5991 assert(
C &&
"Cast of ConstantInt should have folded");
6004void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
6005 const LSRFixup &LF,
const Formula &
F,
6006 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6007 DenseMap<BasicBlock *, Value *>
Inserted;
6011 bool needUpdateFixups =
false;
6022 Loop *PNLoop = LI.getLoopFor(Parent);
6023 if (!PNLoop || Parent != PNLoop->
getHeader()) {
6029 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
6030 .setMergeIdenticalEdges()
6031 .setKeepOneInputPHIs());
6034 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
6045 if (
L->contains(BB) && !
L->contains(PN))
6053 needUpdateFixups =
true;
6058 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
6071 LF.OperandValToReplace->
getType(),
"tmp",
6078 if (
L->contains(
I) && !
L->contains(BB))
6079 InsertedNonLCSSAInsts.insert(
I);
6082 Pair.first->second = FullV;
6089 if (needUpdateFixups) {
6090 for (LSRUse &LU :
Uses)
6091 for (LSRFixup &
Fixup : LU.Fixups)
6095 if (
Fixup.UserInst == PN) {
6098 bool foundInOriginalPHI =
false;
6100 if (val ==
Fixup.OperandValToReplace) {
6101 foundInOriginalPHI =
true;
6106 if (foundInOriginalPHI)
6117 if (val ==
Fixup.OperandValToReplace)
6118 Fixup.UserInst = NewPN;
6128void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6130 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6134 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6142 if (FullV->
getType() != OpTy &&
6143 !(LU.Kind == LSRUse::ICmpZero && OpTy->
isPointerTy())) {
6155 if (LU.Kind == LSRUse::ICmpZero)
6171 const LSRFixup &
Fixup,
const LSRUse &LU,
6175 if (LU.Kind != LSRUse::Address)
6176 return IVIncInsertPos;
6180 Type *Ty =
I->getType();
6183 return IVIncInsertPos;
6190 return IVIncInsertPos;
6197void LSRInstance::ImplementSolution(
6198 const SmallVectorImpl<const Formula *> &Solution) {
6204 for (
const IVChain &Chain : IVChainVec) {
6210 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6211 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6214 Rewriter.setIVIncInsertPos(L, InsertPos);
6215 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6219 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6222 for (
const IVChain &Chain : IVChainVec) {
6223 GenerateIVChain(Chain, DeadInsts);
6227 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6245 for (PHINode &PN :
L->getHeader()->phis()) {
6246 BinaryOperator *BO =
nullptr;
6252 case Instruction::Sub:
6257 case Instruction::Add:
6274 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6283LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6284 DominatorTree &DT, LoopInfo &LI,
6285 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6286 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6287 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6290 :
TTI.getPreferredAddressingMode(
L, &SE)),
6293 if (!
L->isLoopSimplifyForm())
6301 unsigned NumUsers = 0;
6305 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6313 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6323 L->getHeader()->printAsOperand(
dbgs(),
false);
6329 HardwareLoopProfitable =
6330 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6334#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6337 Rewriter.disableCanonicalMode();
6338 Rewriter.enableLSRMode();
6342 OptimizeLoopTermCond();
6345 if (IU.empty())
return;
6348 if (!
L->isInnermost()) {
6361 CollectInterestingTypesAndFactors();
6362 CollectFixupsAndInitialFormulae();
6363 CollectLoopInvariantFixupsAndFormulae();
6369 print_uses(
dbgs()));
6371 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6375 GenerateAllReuseFormulae();
6377 FilterOutUndesirableDedicatedRegisters();
6378 NarrowSearchSpaceUsingHeuristics();
6388 if (Solution.
empty())
6393 for (
const LSRUse &LU :
Uses) {
6394 for (
const Formula &
F : LU.Formulae)
6396 F) &&
"Illegal formula generated!");
6401 ImplementSolution(Solution);
6404#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6405void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6406 if (Factors.empty() &&
Types.empty())
return;
6408 OS <<
"LSR has identified the following interesting factors and types: ";
6411 for (int64_t Factor : Factors)
6412 OS <<
LS <<
'*' << Factor;
6414 for (
Type *Ty : Types)
6415 OS <<
LS <<
'(' << *Ty <<
')';
6419void LSRInstance::print_fixups(raw_ostream &OS)
const {
6420 OS <<
"LSR is examining the following fixup sites:\n";
6421 for (
const LSRUse &LU :
Uses)
6422 for (
const LSRFixup &LF : LU.Fixups) {
6429void LSRInstance::print_uses(raw_ostream &OS)
const {
6430 OS <<
"LSR is examining the following uses:\n";
6431 for (
const LSRUse &LU :
Uses) {
6435 for (
const Formula &
F : LU.Formulae) {
6443void LSRInstance::print(raw_ostream &OS)
const {
6444 print_factors_and_types(OS);
6456class LoopStrengthReduce :
public LoopPass {
6460 LoopStrengthReduce();
6463 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6464 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6469LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6473void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6500ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6501 llvm::DIExpression::expr_op_iterator Begin =
6502 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6503 llvm::DIExpression::expr_op_iterator End =
6504 llvm::DIExpression::expr_op_iterator(Expr.
end());
6505 return {Begin, End};
6508struct SCEVDbgValueBuilder {
6509 SCEVDbgValueBuilder() =
default;
6510 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6512 void clone(
const SCEVDbgValueBuilder &
Base) {
6513 LocationOps =
Base.LocationOps;
6518 LocationOps.
clear();
6525 SmallVector<Value *, 2> LocationOps;
6528 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6535 unsigned ArgIndex = 0;
6536 if (It != LocationOps.
end()) {
6537 ArgIndex = std::distance(LocationOps.
begin(), It);
6539 ArgIndex = LocationOps.
size();
6545 void pushValue(
const SCEVUnknown *U) {
6550 bool pushConst(
const SCEVConstant *
C) {
6551 if (
C->getAPInt().getSignificantBits() > 64)
6553 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6554 Expr.
push_back(
C->getAPInt().getSExtValue());
6561 return ToDwarfOpIter(Expr);
6566 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6569 "Expected arithmetic SCEV type");
6571 unsigned EmitOperator = 0;
6572 for (
const auto &
Op : CommExpr->
operands()) {
6575 if (EmitOperator >= 1)
6576 pushOperator(DwarfOp);
6583 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6584 const llvm::SCEV *Inner =
C->getOperand(0);
6585 const llvm::Type *
Type =
C->getType();
6586 uint64_t ToWidth =
Type->getIntegerBitWidth();
6587 bool Success = pushSCEV(Inner);
6589 IsSigned ? llvm::dwarf::DW_ATE_signed
6590 : llvm::dwarf::DW_ATE_unsigned};
6591 for (
const auto &
Op : CastOps)
6597 bool pushSCEV(
const llvm::SCEV *S) {
6600 Success &= pushConst(StartInt);
6605 pushLocation(
U->getValue());
6608 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6611 Success &= pushSCEV(UDiv->getLHS());
6612 Success &= pushSCEV(UDiv->getRHS());
6613 pushOperator(llvm::dwarf::DW_OP_div);
6620 "Unexpected cast type in SCEV.");
6624 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6639 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6641 if (
C->getAPInt().getSignificantBits() > 64)
6643 int64_t
I =
C->getAPInt().getSExtValue();
6645 case llvm::dwarf::DW_OP_plus:
6646 case llvm::dwarf::DW_OP_minus:
6648 case llvm::dwarf::DW_OP_mul:
6649 case llvm::dwarf::DW_OP_div:
6662 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6668 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6669 if (!pushSCEV(Stride))
6671 pushOperator(llvm::dwarf::DW_OP_mul);
6673 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6674 if (!pushSCEV(Start))
6676 pushOperator(llvm::dwarf::DW_OP_plus);
6682 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6683 pushLocation(OffsetValue);
6686 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6687 << std::to_string(
Offset) <<
"\n");
6693 bool createIterCountExpr(
const SCEV *S,
6694 const SCEVDbgValueBuilder &IterationCount,
6695 ScalarEvolution &SE) {
6704 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6708 if (!Rec->isAffine())
6716 clone(IterationCount);
6717 if (!SCEVToValueExpr(*Rec, SE))
6728 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6729 ScalarEvolution &SE) {
6735 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6736 if (!pushSCEV(Start))
6738 pushOperator(llvm::dwarf::DW_OP_minus);
6740 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6741 if (!pushSCEV(Stride))
6743 pushOperator(llvm::dwarf::DW_OP_div);
6751 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6752 SmallVectorImpl<Value *> &DestLocations) {
6754 "Expected the locations vector to contain the IV");
6759 "Expected the location ops to contain the IV.");
6763 for (
const auto &
Op : LocationOps) {
6764 auto It =
find(DestLocations,
Op);
6765 if (It != DestLocations.
end()) {
6767 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6775 for (
const auto &
Op : expr_ops()) {
6777 Op.appendToVector(DestExpr);
6784 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6792struct DVIRecoveryRec {
6793 DVIRecoveryRec(DbgVariableRecord *DVR)
6794 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6796 DbgVariableRecord *DbgRef;
6798 bool HadLocationArgList;
6804 for (
auto &RE : RecoveryExprs)
6806 RecoveryExprs.clear();
6809 ~DVIRecoveryRec() { clear(); }
6817 auto expr_ops = ToDwarfOpIter(Expr);
6819 for (
auto Op : expr_ops)
6828template <
typename T>
6832 "contain any DW_OP_llvm_arg operands.");
6839template <
typename T>
6844 "Expected expression that references DIArglist locations using "
6845 "DW_OP_llvm_arg operands.");
6847 for (
Value *V : Locations)
6864 if (NumLLVMArgs == 0) {
6871 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6901 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6902 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6903 assert(DVIRec.Expr &&
"Expected an expression");
6908 if (!DVIRec.HadLocationArgList) {
6909 assert(DVIRec.LocationOps.size() == 1 &&
6910 "Unexpected number of location ops.");
6914 Value *CachedValue =
6919 for (
WeakVH VH : DVIRec.LocationOps) {
6927 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6932 const SCEV *SCEVInductionVar,
6933 SCEVDbgValueBuilder IterCountExpr) {
6947 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6949 NewLocationOps.
push_back(LSRInductionVar);
6951 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6952 WeakVH VH = DVIRec.LocationOps[i];
6958 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6960 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6968 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6969 <<
" refers to a location that is now undef or erased. "
6970 "Salvage abandoned.\n");
6974 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6975 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6977 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6978 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6982 if (std::optional<APInt>
Offset =
6984 if (
Offset->getSignificantBits() <= 64)
6985 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6988 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6997 assert(DVIRec.RecoveryExprs.size() == 1 &&
6998 "Expected only a single recovery expression for an empty "
7000 assert(DVIRec.RecoveryExprs[0] &&
7001 "Expected a SCEVDbgSalvageBuilder for location 0");
7002 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
7003 B->appendToVectors(
NewExpr, NewLocationOps);
7005 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
7013 SCEVDbgValueBuilder *DbgBuilder =
7014 DVIRec.RecoveryExprs[LocationArgIndex].get();
7020 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
7021 "Expected a positive index for the location-op position.");
7022 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
7026 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
7030 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
7038 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
7039 if (DVIToUpdate.empty())
7043 assert(SCEVInductionVar &&
7044 "Anticipated a SCEV for the post-LSR induction variable");
7048 if (!IVAddRec->isAffine())
7056 SCEVDbgValueBuilder IterCountExpr;
7057 IterCountExpr.pushLocation(LSRInductionVar);
7058 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
7061 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
7064 for (
auto &DVIRec : DVIToUpdate) {
7065 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
7076 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
7077 for (
const auto &
B : L->getBlocks()) {
7078 for (
auto &
I : *
B) {
7080 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
7085 if (DbgVal.isKillLocation())
7090 const auto &HasTranslatableLocationOps =
7092 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
7106 if (!HasTranslatableLocationOps(DbgVal))
7109 std::unique_ptr<DVIRecoveryRec> NewRec =
7110 std::make_unique<DVIRecoveryRec>(&DbgVal);
7114 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
7115 for (
const auto LocOp : DbgVal.location_ops()) {
7116 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7117 NewRec->LocationOps.push_back(LocOp);
7118 NewRec->HadLocationArgList = DbgVal.hasArgList();
7120 SalvageableDVISCEVs.push_back(std::move(NewRec));
7130 const LSRInstance &LSR) {
7132 auto IsSuitableIV = [&](
PHINode *
P) {
7143 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7150 if (IsSuitableIV(
P))
7154 for (
PHINode &
P : L.getHeader()->phis()) {
7155 if (IsSuitableIV(&
P))
7173 std::unique_ptr<MemorySSAUpdater> MSSAU;
7175 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7178 const LSRInstance &Reducer =
7179 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7180 Changed |= Reducer.getChanged();
7187#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7190 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7204 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7218 if (SalvageableDVIRecords.
empty())
7224 for (
const auto &L : LI) {
7228 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7229 "could not be identified.\n");
7233 for (
auto &Rec : SalvageableDVIRecords)
7235 SalvageableDVIRecords.
clear();
7239bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7243 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7244 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7245 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7246 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7247 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7248 *
L->getHeader()->getParent());
7249 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7250 *
L->getHeader()->getParent());
7251 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7252 *
L->getHeader()->getParent());
7253 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7256 MSSA = &MSSAAnalysis->getMSSA();
7273char LoopStrengthReduce::ID = 0;
7276 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< SCEVUse > &Good, SmallVectorImpl< SCEVUse > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static Immediate ExtractImmediate(SCEVUse &S, ScalarEvolution &SE, bool PreferScalable=false)
If S involves the addition of a constant integer value, return that integer value,...
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static Immediate ExtractImmediateOperand(MutableArrayRef< SCEVUse > Ops, ScalarEvolution &SE, bool PreferScalable)
Extracts an immediate operand from Ops and replaces the operand with zero.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static GlobalValue * ExtractSymbol(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth, const TargetTransformInfo &TTI)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Value * getCondition() const
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
LLVM_ABI PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
ArrayRef< SCEVUse > operands() const
bool hasNoSignedWrap() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
static constexpr auto FlagAnyWrap
LLVM_ABI ArrayRef< SCEVUse > operands() const
Return operands of this SCEV expression.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
match_bind< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.