132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
363 const std::pair<size_t, Immediate> &
RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
387 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
399 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
415RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
421 RSD.UsedByIndices.
set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
430 RSD.UsedByIndices.
reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
440 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
451 if (
I == RegUsesMap.end())
453 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
460const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
461 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
477 GlobalValue *BaseGV =
nullptr;
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
515 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 bool countsDownToZero()
const;
527 size_t getNumRegs()
const;
530 void deleteBaseReg(
const SCEV *&S);
532 bool referencesReg(
const SCEV *S)
const;
533 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
534 const RegUseTracker &RegUses)
const;
536 void print(raw_ostream &OS)
const;
554 for (
const SCEV *S :
Add->operands())
560 const SCEV *Start, *Step;
575 if (
Mul->getOperand(0)->isAllOnesValue()) {
584 for (
const SCEV *S : MyGood)
586 for (
const SCEV *S : MyBad)
598void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
605 BaseRegs.push_back(Sum);
611 BaseRegs.push_back(Sum);
626bool Formula::isCanonical(
const Loop &L)
const {
627 assert((Scale == 0 || ScaledReg) &&
628 "ScaledReg must be non-null if Scale is non-zero");
631 return BaseRegs.size() <= 1;
636 if (Scale == 1 && BaseRegs.empty())
645 return none_of(BaseRegs, [&L](
const SCEV *S) {
656void Formula::canonicalize(
const Loop &L) {
660 if (BaseRegs.empty()) {
662 assert(ScaledReg &&
"Expected 1*reg => reg");
663 assert(Scale == 1 &&
"Expected 1*reg => reg");
664 BaseRegs.push_back(ScaledReg);
672 ScaledReg = BaseRegs.pop_back_val();
680 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
683 if (
I != BaseRegs.end())
693bool Formula::unscale() {
697 BaseRegs.push_back(ScaledReg);
702bool Formula::hasZeroEnd()
const {
703 if (UnfoldedOffset || BaseOffset)
705 if (BaseRegs.size() != 1 || ScaledReg)
710bool Formula::countsDownToZero()
const {
713 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
714 const APInt *StepInt;
722size_t Formula::getNumRegs()
const {
723 return !!ScaledReg + BaseRegs.size();
728Type *Formula::getType()
const {
729 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
730 ScaledReg ? ScaledReg->
getType() :
736void Formula::deleteBaseReg(
const SCEV *&S) {
737 if (&S != &BaseRegs.back())
743bool Formula::referencesReg(
const SCEV *S)
const {
749bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
750 const RegUseTracker &RegUses)
const {
752 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
754 for (
const SCEV *BaseReg : BaseRegs)
755 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
761void Formula::print(raw_ostream &OS)
const {
762 ListSeparator
Plus(
" + ");
767 if (BaseOffset.isNonZero())
768 OS <<
Plus << BaseOffset;
770 for (
const SCEV *BaseReg : BaseRegs)
773 if (HasBaseReg && BaseRegs.empty())
774 OS <<
Plus <<
"**error: HasBaseReg**";
775 else if (!HasBaseReg && !BaseRegs.empty())
776 OS <<
Plus <<
"**error: !HasBaseReg**";
779 OS <<
Plus << Scale <<
"*reg(";
786 if (UnfoldedOffset.isNonZero())
787 OS <<
Plus <<
"imm(" << UnfoldedOffset <<
')';
827 bool IgnoreSignificantBits =
false) {
838 if (
RA.isAllOnes()) {
839 if (
LHS->getType()->isPointerTy())
852 const APInt &LA =
C->getAPInt();
861 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
863 IgnoreSignificantBits);
864 if (!Step)
return nullptr;
866 IgnoreSignificantBits);
867 if (!Start)
return nullptr;
880 for (
const SCEV *S :
Add->operands()) {
882 if (!
Op)
return nullptr;
910 for (
const SCEV *S :
Mul->operands()) {
913 IgnoreSignificantBits)) {
933 if (
C->getSignificantBits() <= 64) {
935 return Immediate::getFixed(
C->getSExtValue());
940 if (Result.isNonZero())
946 if (Result.isNonZero())
954 return Immediate::getScalable(
C->getSExtValue());
956 return Immediate::getZero();
991 if (
SI->getPointerOperand() == OperandVal)
996 switch (
II->getIntrinsicID()) {
997 case Intrinsic::memset:
998 case Intrinsic::prefetch:
999 case Intrinsic::masked_load:
1000 if (
II->getArgOperand(0) == OperandVal)
1003 case Intrinsic::masked_store:
1004 if (
II->getArgOperand(1) == OperandVal)
1007 case Intrinsic::memmove:
1008 case Intrinsic::memcpy:
1009 if (
II->getArgOperand(0) == OperandVal ||
1010 II->getArgOperand(1) == OperandVal)
1015 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1016 if (IntrInfo.
PtrVal == OperandVal)
1022 if (RMW->getPointerOperand() == OperandVal)
1025 if (CmpX->getPointerOperand() == OperandVal)
1034 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1038 AccessTy.MemTy = Ty;
1042 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1044 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1046 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1048 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1050 switch (
II->getIntrinsicID()) {
1051 case Intrinsic::prefetch:
1052 case Intrinsic::memset:
1053 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1054 AccessTy.MemTy = OperandVal->
getType();
1056 case Intrinsic::memmove:
1057 case Intrinsic::memcpy:
1059 AccessTy.MemTy = OperandVal->
getType();
1061 case Intrinsic::masked_load:
1062 AccessTy.AddrSpace =
1063 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1065 case Intrinsic::masked_store:
1066 AccessTy.AddrSpace =
1067 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1071 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1127 if (!Processed.
insert(S).second)
1131 for (
const SCEV *S :
Add->operands()) {
1138 const SCEV *Op0, *Op1;
1147 Value *UVal = U->getValue();
1151 if (UI && UI->
getOpcode() == Instruction::Mul &&
1184 const LSRUse &LU,
const Formula &
F);
1188 const LSRUse &LU,
const Formula &
F,
1195 const Loop *
L =
nullptr;
1196 ScalarEvolution *SE =
nullptr;
1197 const TargetTransformInfo *
TTI =
nullptr;
1198 TargetTransformInfo::LSRCost
C;
1203 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1205 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1223 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1224 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1225 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1226 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1232 return C.NumRegs == ~0
u;
1235 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1236 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1237 bool HardwareLoopProfitable,
1238 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1240 void print(raw_ostream &OS)
const;
1244 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1245 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1246 bool HardwareLoopProfitable);
1247 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1248 SmallPtrSetImpl<const SCEV *> &Regs,
1249 const LSRUse &LU,
bool HardwareLoopProfitable,
1250 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1261 Value *OperandValToReplace =
nullptr;
1271 Immediate
Offset = Immediate::getZero();
1273 LSRFixup() =
default;
1275 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1277 void print(raw_ostream &OS)
const;
1287 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1300 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1303 MemAccessTy AccessTy;
1309 Immediate MinOffset = Immediate::getFixedMax();
1310 Immediate MaxOffset = Immediate::getFixedMin();
1314 bool AllFixupsOutsideLoop =
true;
1319 bool AllFixupsUnconditional =
true;
1326 bool RigidFormula =
false;
1334 SmallPtrSet<const SCEV *, 4> Regs;
1336 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1338 LSRFixup &getNewFixup() {
1339 Fixups.push_back(LSRFixup());
1343 void pushFixup(LSRFixup &f) {
1345 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1346 MaxOffset =
f.Offset;
1347 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1348 MinOffset =
f.Offset;
1351 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1352 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1353 bool InsertFormula(
const Formula &
F,
const Loop &L);
1354 void DeleteFormula(Formula &
F);
1355 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1357 void print(raw_ostream &OS)
const;
1364 LSRUse::KindType Kind, MemAccessTy AccessTy,
1365 GlobalValue *BaseGV, Immediate BaseOffset,
1366 bool HasBaseReg, int64_t Scale,
1367 Instruction *
Fixup =
nullptr);
1374 if (
TTI.getIntImmCost(
C->getAPInt(),
C->getType(),
1388 [&](
unsigned i,
const SCEV *
Reg) {
1389 return i + getSetupCost(Reg, Depth - 1, TTI);
1398void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1399 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1400 bool HardwareLoopProfitable) {
1405 if (AR->getLoop() != L) {
1412 if (!AR->getLoop()->contains(L)) {
1422 unsigned LoopCost = 1;
1431 F.BaseOffset.isFixed() &&
1432 *Step ==
F.BaseOffset.getFixedValue();
1437 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1444 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1445 HardwareLoopProfitable)
1447 C.AddRecCost += LoopCost;
1452 if (!Regs.
count(AR->getOperand(1))) {
1453 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1465 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1474void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1475 SmallPtrSetImpl<const SCEV *> &Regs,
1476 const LSRUse &LU,
bool HardwareLoopProfitable,
1477 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1478 if (LoserRegs && LoserRegs->
count(
Reg)) {
1483 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1484 if (LoserRegs && isLoser())
1489void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1490 const DenseSet<const SCEV *> &VisitedRegs,
1491 const LSRUse &LU,
bool HardwareLoopProfitable,
1492 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1495 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1497 unsigned PrevAddRecCost =
C.AddRecCost;
1498 unsigned PrevNumRegs =
C.NumRegs;
1499 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1500 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1501 if (VisitedRegs.
count(ScaledReg)) {
1505 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1510 for (
const SCEV *BaseReg :
F.BaseRegs) {
1511 if (VisitedRegs.
count(BaseReg)) {
1515 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1522 size_t NumBaseParts =
F.getNumRegs();
1523 if (NumBaseParts > 1)
1528 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1534 for (
const LSRFixup &
Fixup : LU.Fixups) {
1535 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1536 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1540 else if (
Offset.isNonZero())
1542 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1546 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1567 if (
C.NumRegs > TTIRegNum) {
1570 if (PrevNumRegs > TTIRegNum)
1571 C.Insns += (
C.NumRegs - PrevNumRegs);
1573 C.Insns += (
C.NumRegs - TTIRegNum);
1586 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1590 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1593 if (LU.Kind != LSRUse::ICmpZero)
1594 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1600 C.Insns = std::numeric_limits<unsigned>::max();
1601 C.NumRegs = std::numeric_limits<unsigned>::max();
1602 C.AddRecCost = std::numeric_limits<unsigned>::max();
1603 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1604 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1605 C.ImmCost = std::numeric_limits<unsigned>::max();
1606 C.SetupCost = std::numeric_limits<unsigned>::max();
1607 C.ScaleCost = std::numeric_limits<unsigned>::max();
1611bool Cost::isLess(
const Cost &
Other)
const {
1613 C.Insns !=
Other.C.Insns)
1614 return C.Insns <
Other.C.Insns;
1618#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1621 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1622 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1623 if (
C.AddRecCost != 0)
1624 OS <<
", with addrec cost " <<
C.AddRecCost;
1625 if (
C.NumIVMuls != 0)
1626 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1627 << (
C.NumIVMuls == 1 ?
"" :
"s");
1628 if (
C.NumBaseAdds != 0)
1629 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1630 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1631 if (
C.ScaleCost != 0)
1632 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1634 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1635 if (
C.SetupCost != 0)
1636 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1645bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1648 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1649 if (PN->getIncomingValue(i) == OperandValToReplace &&
1650 L->contains(PN->getIncomingBlock(i)))
1655 return !
L->contains(UserInst);
1658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1659void LSRFixup::print(raw_ostream &OS)
const {
1664 Store->getOperand(0)->printAsOperand(OS,
false);
1670 OS <<
", OperandValToReplace=";
1673 for (
const Loop *PIL : PostIncLoops) {
1674 OS <<
", PostIncLoop=";
1675 PIL->getHeader()->printAsOperand(OS,
false);
1679 OS <<
", Offset=" <<
Offset;
1689bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1691 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1698float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1700 for (
const Formula &
F : Formulae)
1701 if (
F.referencesReg(
Reg))
1703 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1708bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1709 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1711 if (!Formulae.empty() && RigidFormula)
1715 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1723 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1724 "Zero allocated in a scaled register!");
1726 for (
const SCEV *BaseReg :
F.BaseRegs)
1727 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1731 Formulae.push_back(
F);
1742void LSRUse::DeleteFormula(Formula &
F) {
1743 if (&
F != &Formulae.back())
1745 Formulae.pop_back();
1749void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1751 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1753 for (
const Formula &
F : Formulae) {
1754 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1759 for (
const SCEV *S : OldRegs)
1761 RegUses.dropRegister(S, LUIdx);
1764#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1765void LSRUse::print(raw_ostream &OS)
const {
1766 OS <<
"LSR Use: Kind=";
1768 case Basic: OS <<
"Basic";
break;
1769 case Special: OS <<
"Special";
break;
1770 case ICmpZero: OS <<
"ICmpZero";
break;
1772 OS <<
"Address of ";
1776 OS << *AccessTy.MemTy;
1779 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1782 OS <<
", Offsets={";
1783 bool NeedComma =
false;
1784 for (
const LSRFixup &
Fixup : Fixups) {
1785 if (NeedComma) OS <<
',';
1791 if (AllFixupsOutsideLoop)
1792 OS <<
", all-fixups-outside-loop";
1794 if (AllFixupsUnconditional)
1795 OS <<
", all-fixups-unconditional";
1804 LSRUse::KindType Kind, MemAccessTy AccessTy,
1806 bool HasBaseReg, int64_t Scale,
1809 case LSRUse::Address: {
1810 int64_t FixedOffset =
1811 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1812 int64_t ScalableOffset =
1813 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1814 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1815 HasBaseReg, Scale, AccessTy.AddrSpace,
1816 Fixup, ScalableOffset);
1818 case LSRUse::ICmpZero:
1825 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1830 if (Scale != 0 && Scale != -1)
1835 if (BaseOffset.isNonZero()) {
1838 if (BaseOffset.isScalable())
1848 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1849 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1857 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1859 case LSRUse::Special:
1861 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1868 Immediate MinOffset, Immediate MaxOffset,
1869 LSRUse::KindType Kind, MemAccessTy AccessTy,
1871 bool HasBaseReg, int64_t Scale) {
1872 if (BaseOffset.isNonZero() &&
1873 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1874 BaseOffset.isScalable() != MaxOffset.isScalable()))
1877 int64_t
Base = BaseOffset.getKnownMinValue();
1878 int64_t Min = MinOffset.getKnownMinValue();
1879 int64_t Max = MaxOffset.getKnownMinValue();
1882 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1885 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1888 HasBaseReg, Scale) &&
1894 Immediate MinOffset, Immediate MaxOffset,
1895 LSRUse::KindType Kind, MemAccessTy AccessTy,
1896 const Formula &
F,
const Loop &L) {
1904 assert((
F.isCanonical(L) ||
F.Scale != 0));
1906 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1911 Immediate MaxOffset, LSRUse::KindType Kind,
1913 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1916 BaseOffset, HasBaseReg, Scale) ||
1921 BaseGV, BaseOffset,
true, 0));
1925 Immediate MaxOffset, LSRUse::KindType Kind,
1926 MemAccessTy AccessTy,
const Formula &
F) {
1927 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1928 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1934 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1936 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1940 const LSRUse &LU,
const Formula &
F) {
1942 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1943 for (
const LSRFixup &
Fixup : LU.Fixups)
1945 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1946 F.Scale,
Fixup.UserInst))
1952 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1957 const LSRUse &LU,
const Formula &
F,
1966 return F.Scale != 1;
1969 case LSRUse::Address: {
1971 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1972 if (
F.BaseOffset.isScalable()) {
1973 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1974 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1976 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1977 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1981 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1984 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1987 "Legal addressing mode has an illegal cost!");
1988 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1990 case LSRUse::ICmpZero:
1992 case LSRUse::Special:
2002 LSRUse::KindType Kind, MemAccessTy AccessTy,
2006 if (BaseOffset.isZero() && !BaseGV)
2011 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2015 if (!HasBaseReg && Scale == 1) {
2025 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2035 Immediate MaxOffset, LSRUse::KindType Kind,
2036 MemAccessTy AccessTy,
const SCEV *S,
2039 if (S->
isZero())
return true;
2052 if (BaseOffset.isZero() && !BaseGV)
2055 if (BaseOffset.isScalable())
2060 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2063 BaseOffset, HasBaseReg, Scale);
2080 const SCEV *IncExpr;
2082 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2083 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2090 const SCEV *ExprBase =
nullptr;
2092 IVChain() =
default;
2093 IVChain(
const IVInc &Head,
const SCEV *
Base)
2094 : Incs(1, Head), ExprBase(
Base) {}
2099 const_iterator
begin()
const {
2101 return std::next(Incs.
begin());
2103 const_iterator
end()
const {
2108 bool hasIncs()
const {
return Incs.
size() >= 2; }
2117 bool isProfitableIncrement(
const SCEV *OperExpr,
2118 const SCEV *IncExpr,
2126 SmallPtrSet<Instruction*, 4> FarUsers;
2127 SmallPtrSet<Instruction*, 4> NearUsers;
2133 ScalarEvolution &SE;
2136 AssumptionCache &AC;
2137 TargetLibraryInfo &TLI;
2138 const TargetTransformInfo &
TTI;
2140 MemorySSAUpdater *MSSAU;
2144 bool HardwareLoopProfitable =
false;
2158 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2165 SmallSetVector<Type *, 4> Types;
2171 RegUseTracker RegUses;
2176 static const unsigned MaxChains = 8;
2182 SmallPtrSet<Use*, MaxChains> IVIncSet;
2185 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2191 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2193 void OptimizeShadowIV();
2194 bool FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse);
2196 void OptimizeLoopTermCond();
2198 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2199 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2200 void FinalizeChain(IVChain &Chain);
2201 void CollectChains();
2202 void GenerateIVChain(
const IVChain &Chain,
2203 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2205 void CollectInterestingTypesAndFactors();
2206 void CollectFixupsAndInitialFormulae();
2209 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2212 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2213 LSRUse::KindType Kind, MemAccessTy AccessTy);
2215 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2216 MemAccessTy AccessTy);
2218 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2220 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2222 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2223 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2224 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2225 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2226 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2228 void CollectLoopInvariantFixupsAndFormulae();
2230 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2231 unsigned Depth = 0);
2233 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2235 size_t Idx,
bool IsScaledReg =
false);
2236 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2237 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2238 const Formula &
Base,
size_t Idx,
2239 bool IsScaledReg =
false);
2240 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2241 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2242 const Formula &
Base,
2243 const SmallVectorImpl<Immediate> &Worklist,
2244 size_t Idx,
bool IsScaledReg =
false);
2245 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2246 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2247 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2248 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateCrossUseConstantOffsets();
2250 void GenerateAllReuseFormulae();
2252 void FilterOutUndesirableDedicatedRegisters();
2254 size_t EstimateSearchSpaceComplexity()
const;
2255 void NarrowSearchSpaceByDetectingSupersets();
2256 void NarrowSearchSpaceByCollapsingUnrolledCode();
2257 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2258 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2259 void NarrowSearchSpaceByFilterPostInc();
2260 void NarrowSearchSpaceByDeletingCostlyFormulas();
2261 void NarrowSearchSpaceByPickingWinnerRegs();
2262 void NarrowSearchSpaceUsingHeuristics();
2264 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2266 SmallVectorImpl<const Formula *> &Workspace,
2267 const Cost &CurCost,
2268 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2269 DenseSet<const SCEV *> &VisitedRegs)
const;
2270 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2274 const SmallVectorImpl<Instruction *> &Inputs)
const;
2277 const LSRUse &LU)
const;
2279 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2281 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2282 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2284 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2285 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2286 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2287 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2290 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2291 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2292 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2294 bool getChanged()
const {
return Changed; }
2295 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2296 return ScalarEvolutionIVs;
2299 void print_factors_and_types(raw_ostream &OS)
const;
2300 void print_fixups(raw_ostream &OS)
const;
2301 void print_uses(raw_ostream &OS)
const;
2302 void print(raw_ostream &OS)
const;
2310void LSRInstance::OptimizeShadowIV() {
2320 Type *DestTy =
nullptr;
2321 bool IsSigned =
false;
2337 DestTy = UCast->getDestTy();
2341 DestTy = SCast->getDestTy();
2343 if (!DestTy)
continue;
2363 if (Mantissa == -1)
continue;
2367 unsigned Entry, Latch;
2377 if (!Init)
continue;
2378 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2382 BinaryOperator *Incr =
2384 if (!Incr)
continue;
2385 if (Incr->
getOpcode() != Instruction::Add
2386 && Incr->
getOpcode() != Instruction::Sub)
2390 ConstantInt *
C =
nullptr;
2402 if (!
C->getValue().isStrictlyPositive())
2410 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2412 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2413 : Instruction::FSub,
2430bool LSRInstance::FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse) {
2431 for (IVStrideUse &U : IU)
2432 if (
U.getUser() ==
Cond) {
2490Instruction *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2505 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2506 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2512 const SCEVNAryExpr *
Max =
nullptr;
2514 Pred = ICmpInst::ICMP_SLE;
2517 Pred = ICmpInst::ICMP_SLT;
2520 Pred = ICmpInst::ICMP_ULT;
2529 if (
Max->getNumOperands() != 2)
2532 const SCEV *MaxLHS =
Max->getOperand(0);
2533 const SCEV *MaxRHS =
Max->getOperand(1);
2538 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2549 "Loop condition operand is an addrec in a different loop!");
2553 Value *NewRHS =
nullptr;
2554 if (ICmpInst::isTrueWhenEqual(Pred)) {
2558 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2559 NewRHS = BO->getOperand(0);
2562 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2563 NewRHS = BO->getOperand(0);
2571 NewRHS = SU->getValue();
2583 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2584 Cond->getOperand(0), NewRHS,
"scmp");
2588 Cond->replaceAllUsesWith(NewCond);
2591 Cond->eraseFromParent();
2593 if (
Cmp->use_empty()) {
2595 Cmp->eraseFromParent();
2602LSRInstance::OptimizeLoopTermCond() {
2603 SmallPtrSet<Instruction *, 4> PostIncs;
2618 SmallVector<BasicBlock*, 8> ExitingBlocks;
2619 L->getExitingBlocks(ExitingBlocks);
2627 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2649 IVStrideUse *CondUse =
nullptr;
2650 if (!FindIVUserForCond(
Cond, CondUse))
2660 Cond = OptimizeMax(Cmp, CondUse);
2665 if (!DT.
dominates(ExitingBlock, LatchBlock))
2670 if (LatchBlock != ExitingBlock)
2671 for (
const IVStrideUse &UI : IU)
2674 if (&UI != CondUse &&
2678 const SCEV *
A = IU.getStride(*CondUse, L);
2679 const SCEV *
B = IU.getStride(UI, L);
2680 if (!
A || !
B)
continue;
2689 if (
const SCEVConstant *
D =
2691 const ConstantInt *
C =
D->getValue();
2693 if (
C->isOne() ||
C->isMinusOne())
2694 goto decline_post_inc;
2696 if (
C->getValue().getSignificantBits() >= 64 ||
2697 C->getValue().isMinSignedValue())
2698 goto decline_post_inc;
2701 MemAccessTy AccessTy =
2703 int64_t Scale =
C->getSExtValue();
2707 AccessTy.AddrSpace))
2708 goto decline_post_inc;
2713 AccessTy.AddrSpace))
2714 goto decline_post_inc;
2719 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2727 if (
Cond->hasOneUse()) {
2728 Cond->moveBefore(TermBr->getIterator());
2733 Cond->setName(
L->getHeader()->getName() +
".termcond");
2734 Cond->insertInto(ExitingBlock, TermBr->getIterator());
2738 TermBr->replaceUsesOfWith(OldCond,
Cond);
2755 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2756 for (Instruction *Inst : PostIncs)
2762bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2763 bool HasBaseReg, LSRUse::KindType Kind,
2764 MemAccessTy AccessTy) {
2765 Immediate NewMinOffset = LU.MinOffset;
2766 Immediate NewMaxOffset = LU.MaxOffset;
2767 MemAccessTy NewAccessTy = AccessTy;
2772 if (LU.Kind != Kind)
2778 if (Kind == LSRUse::Address) {
2779 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2780 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2781 AccessTy.AddrSpace);
2786 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2788 LU.MaxOffset - NewOffset, HasBaseReg))
2790 NewMinOffset = NewOffset;
2791 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2793 NewOffset - LU.MinOffset, HasBaseReg))
2795 NewMaxOffset = NewOffset;
2801 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2802 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2806 LU.MinOffset = NewMinOffset;
2807 LU.MaxOffset = NewMaxOffset;
2808 LU.AccessTy = NewAccessTy;
2815std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2816 LSRUse::KindType Kind,
2817 MemAccessTy AccessTy) {
2818 const SCEV *
Copy = Expr;
2827 Offset = Immediate::getFixed(0);
2830 std::pair<UseMapTy::iterator, bool>
P =
2831 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2834 size_t LUIdx =
P.first->second;
2835 LSRUse &LU =
Uses[LUIdx];
2836 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2838 return std::make_pair(LUIdx,
Offset);
2842 size_t LUIdx =
Uses.size();
2843 P.first->second = LUIdx;
2844 Uses.push_back(LSRUse(Kind, AccessTy));
2845 LSRUse &LU =
Uses[LUIdx];
2849 return std::make_pair(LUIdx,
Offset);
2853void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2854 if (&LU != &
Uses.back())
2859 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2865LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2866 const LSRUse &OrigLU) {
2868 for (LSRUse &LU :
Uses) {
2874 if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero &&
2875 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2876 LU.HasFormulaWithSameRegs(OrigF)) {
2878 for (
const Formula &
F : LU.Formulae) {
2881 if (
F.BaseRegs == OrigF.BaseRegs &&
2882 F.ScaledReg == OrigF.ScaledReg &&
2883 F.BaseGV == OrigF.BaseGV &&
2884 F.Scale == OrigF.Scale &&
2885 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2886 if (
F.BaseOffset.isZero())
2901void LSRInstance::CollectInterestingTypesAndFactors() {
2902 SmallSetVector<const SCEV *, 4> Strides;
2906 for (
const IVStrideUse &U : IU) {
2907 const SCEV *Expr = IU.getExpr(U);
2925 }
while (!Worklist.
empty());
2929 for (SmallSetVector<const SCEV *, 4>::const_iterator
2931 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2932 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2933 const SCEV *OldStride = *
I;
2934 const SCEV *NewStride = *NewStrideIter;
2944 if (
const SCEVConstant *Factor =
2947 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2948 Factors.insert(Factor->getAPInt().getSExtValue());
2949 }
else if (
const SCEVConstant *Factor =
2953 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2954 Factors.insert(Factor->getAPInt().getSExtValue());
2960 if (Types.size() == 1)
2972 for(; OI != OE; ++OI) {
2991 return Trunc->getOperand(0);
3024 if (SubExpr->getSCEVType() ==
scAddExpr)
3027 if (SubExpr->getSCEVType() !=
scMulExpr)
3043bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3044 const SCEV *IncExpr,
3045 ScalarEvolution &SE) {
3058 SmallPtrSet<const SCEV*, 8> Processed;
3079 if (!Chain.hasIncs())
3082 if (!
Users.empty()) {
3083 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3085 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3088 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3097 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3100 const SCEV *LastIncExpr =
nullptr;
3101 unsigned NumConstIncrements = 0;
3102 unsigned NumVarIncrements = 0;
3103 unsigned NumReusedIncrements = 0;
3105 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3108 for (
const IVInc &Inc : Chain) {
3109 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3111 if (Inc.IncExpr->isZero())
3117 ++NumConstIncrements;
3121 if (Inc.IncExpr == LastIncExpr)
3122 ++NumReusedIncrements;
3126 LastIncExpr = Inc.IncExpr;
3131 if (NumConstIncrements > 1)
3138 cost += NumVarIncrements;
3142 cost -= NumReusedIncrements;
3144 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3151void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3152 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3156 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3157 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3161 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3162 const SCEV *LastIncExpr =
nullptr;
3163 for (; ChainIdx < NChains; ++ChainIdx) {
3164 IVChain &Chain = IVChainVec[ChainIdx];
3182 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3183 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3187 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3188 LastIncExpr = IncExpr;
3194 if (ChainIdx == NChains) {
3201 LastIncExpr = OperExpr;
3208 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3210 ChainUsersVec.
resize(NChains);
3211 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3212 <<
") IV=" << *LastIncExpr <<
"\n");
3214 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3215 <<
") IV+" << *LastIncExpr <<
"\n");
3217 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3219 IVChain &Chain = IVChainVec[ChainIdx];
3221 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3223 if (!LastIncExpr->
isZero()) {
3224 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3233 for (User *U : IVOper->
users()) {
3239 IVChain::const_iterator IncIter = Chain.Incs.begin();
3240 IVChain::const_iterator IncEnd = Chain.Incs.end();
3241 for( ; IncIter != IncEnd; ++IncIter) {
3242 if (IncIter->UserInst == OtherUse)
3245 if (IncIter != IncEnd)
3250 && IU.isIVUserOrOperand(OtherUse)) {
3253 NearUsers.
insert(OtherUse);
3258 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3283void LSRInstance::CollectChains() {
3287 SmallVector<BasicBlock *,8> LatchPath;
3290 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3296 for (BasicBlock *BB :
reverse(LatchPath)) {
3297 for (Instruction &
I : *BB) {
3303 if (IU.isEphemeral(&
I))
3313 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3314 ChainIdx < NChains; ++ChainIdx) {
3315 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3318 SmallPtrSet<Instruction*, 4> UniqueOperands;
3321 while (IVOpIter != IVOpEnd) {
3323 if (UniqueOperands.
insert(IVOpInst).second)
3324 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3325 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3330 for (PHINode &PN :
L->getHeader()->phis()) {
3337 ChainInstruction(&PN, IncV, ChainUsersVec);
3340 unsigned ChainIdx = 0;
3341 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3342 UsersIdx < NChains; ++UsersIdx) {
3344 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3347 if (ChainIdx != UsersIdx)
3348 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3349 FinalizeChain(IVChainVec[ChainIdx]);
3352 IVChainVec.resize(ChainIdx);
3355void LSRInstance::FinalizeChain(IVChain &Chain) {
3356 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3357 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3359 for (
const IVInc &Inc : Chain) {
3361 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3362 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3363 IVIncSet.insert(UseI);
3371 Immediate IncOffset = Immediate::getZero();
3380 C->getSignificantBits() > 64)
3382 IncOffset = Immediate::getScalable(
C->getSExtValue());
3398void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3399 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3402 const IVInc &Head = Chain.Incs[0];
3407 Value *IVSrc =
nullptr;
3408 while (IVOpIter != IVOpEnd) {
3419 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3420 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3423 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3425 if (IVOpIter == IVOpEnd) {
3427 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3430 assert(IVSrc &&
"Failed to find IV chain source");
3435 const SCEV *LeftOverExpr =
nullptr;
3436 const SCEV *Accum = SE.
getZero(IntTy);
3440 for (
const IVInc &Inc : Chain) {
3443 InsertPt =
L->getLoopLatch()->getTerminator();
3447 Value *IVOper = IVSrc;
3448 if (!Inc.IncExpr->isZero()) {
3453 LeftOverExpr = LeftOverExpr ?
3454 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3458 bool FoundBase =
false;
3459 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3460 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3462 if (!Remainder->
isZero()) {
3464 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3465 const SCEV *IVOperExpr =
3467 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3476 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3479 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3482 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3486 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3489 LeftOverExpr =
nullptr;
3493 if (IVTy != OperTy) {
3495 "cannot extend a chained IV");
3497 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3499 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3506 for (PHINode &Phi :
L->getHeader()->phis()) {
3510 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3513 Value *IVOper = IVSrc;
3515 if (IVTy != PostIncTy) {
3517 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3518 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3519 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3521 Phi.replaceUsesOfWith(PostIncV, IVOper);
3527void LSRInstance::CollectFixupsAndInitialFormulae() {
3528 CondBrInst *ExitBranch =
nullptr;
3529 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3532 SmallPtrSet<const SCEV *, 16> Regs;
3533 DenseSet<const SCEV *> VisitedRegs;
3534 DenseSet<size_t> VisitedLSRUse;
3536 for (
const IVStrideUse &U : IU) {
3541 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3542 if (IVIncSet.count(UseI)) {
3543 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3547 LSRUse::KindType
Kind = LSRUse::Basic;
3548 MemAccessTy AccessTy;
3550 Kind = LSRUse::Address;
3554 const SCEV *S = IU.getExpr(U);
3570 if (CI->isEquality()) {
3573 Value *
NV = CI->getOperand(1);
3574 if (NV ==
U.getOperandValToReplace()) {
3575 CI->setOperand(1, CI->getOperand(0));
3576 CI->setOperand(0, NV);
3577 NV = CI->getOperand(1);
3584 (!
NV->getType()->isPointerTy() ||
3591 Kind = LSRUse::ICmpZero;
3593 }
else if (
L->isLoopInvariant(NV) &&
3596 !
NV->getType()->isPointerTy()) {
3607 Kind = LSRUse::ICmpZero;
3614 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3615 if (Factors[i] != -1)
3616 Factors.insert(-(uint64_t)Factors[i]);
3622 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3623 size_t LUIdx =
P.first;
3625 LSRUse &LU =
Uses[LUIdx];
3628 LSRFixup &LF = LU.getNewFixup();
3629 LF.UserInst = UserInst;
3630 LF.OperandValToReplace =
U.getOperandValToReplace();
3631 LF.PostIncLoops = TmpPostIncLoops;
3633 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3634 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3637 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3639 F.initialMatch(S, L, SE);
3640 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3641 HardwareLoopProfitable);
3642 VisitedLSRUse.
insert(LUIdx);
3646 if (LU.Formulae.empty()) {
3647 InsertInitialFormula(S, LU, LUIdx);
3648 CountRegisters(LU.Formulae.back(), LUIdx);
3657void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3661 LU.RigidFormula =
true;
3664 F.initialMatch(S, L, SE);
3665 bool Inserted = InsertFormula(LU, LUIdx,
F);
3666 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3672LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3673 LSRUse &LU,
size_t LUIdx) {
3675 F.BaseRegs.push_back(S);
3676 F.HasBaseReg =
true;
3677 bool Inserted = InsertFormula(LU, LUIdx,
F);
3678 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3682void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3684 RegUses.countRegister(
F.ScaledReg, LUIdx);
3685 for (
const SCEV *BaseReg :
F.BaseRegs)
3686 RegUses.countRegister(BaseReg, LUIdx);
3691bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3694 "Formula is illegal");
3696 if (!LU.InsertFormula(
F, *L))
3699 CountRegisters(
F, LUIdx);
3705bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3717LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3719 SmallPtrSet<const SCEV *, 32> Visited;
3726 while (!Worklist.
empty()) {
3730 if (!Visited.
insert(S).second)
3741 const Value *
V = US->getValue();
3744 if (
L->contains(Inst))
continue;
3748 for (
const Use &U :
V->uses()) {
3758 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3780 bool HasIncompatibleEHPTerminatedBlock =
false;
3782 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3783 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3784 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3785 HasIncompatibleEHPTerminatedBlock =
true;
3790 if (HasIncompatibleEHPTerminatedBlock) {
3813 unsigned OtherIdx = !
U.getOperandNo();
3814 Value *OtherOp = ICI->getOperand(OtherIdx);
3824 std::pair<size_t, Immediate>
P =
3825 getUse(S, LSRUse::Basic, MemAccessTy());
3826 size_t LUIdx =
P.first;
3828 LSRUse &LU =
Uses[LUIdx];
3829 LSRFixup &LF = LU.getNewFixup();
3830 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3831 LF.OperandValToReplace =
U;
3833 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3834 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3835 InsertSupplementalFormula(US, LU, LUIdx);
3836 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3852 unsigned Depth = 0) {
3859 for (
const SCEV *S :
Add->operands()) {
3866 const SCEV *Start, *Step;
3871 if (Start->isZero())
3880 Remainder =
nullptr;
3882 if (Remainder != Start) {
3904 LSRUse &LU,
const SCEV *S,
const Loop *L,
3906 if (LU.Kind != LSRUse::Address ||
3907 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3913 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3922void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3923 const Formula &
Base,
3924 unsigned Depth,
size_t Idx,
3926 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3934 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3938 if (AddOps.
size() == 1)
3952 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3957 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3961 if (InnerAddOps.size() == 1 &&
3963 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3966 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
3971 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3980 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3983 F.ScaledReg =
nullptr;
3986 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3987 }
else if (IsScaledReg)
3988 F.ScaledReg = InnerSum;
3990 F.BaseRegs[Idx] = InnerSum;
3998 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4001 F.BaseRegs.push_back(*J);
4006 if (InsertFormula(LU, LUIdx,
F))
4013 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4019void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4021 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4026 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4027 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4029 if (
Base.Scale == 1)
4030 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4036void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4039 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4040 (
Base.UnfoldedOffset.isNonZero()) <=
4048 Formula NewBase =
Base;
4049 NewBase.BaseRegs.clear();
4050 Type *CombinedIntegerType =
nullptr;
4051 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4054 if (!CombinedIntegerType)
4056 Ops.push_back(BaseReg);
4059 NewBase.BaseRegs.push_back(BaseReg);
4063 if (
Ops.size() == 0)
4068 auto GenerateFormula = [&](
const SCEV *Sum) {
4069 Formula
F = NewBase;
4077 F.BaseRegs.push_back(Sum);
4079 (void)InsertFormula(LU, LUIdx,
F);
4083 if (
Ops.size() > 1) {
4090 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4091 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4093 NewBase.UnfoldedOffset.getFixedValue(),
true));
4094 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4100void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4101 const Formula &
Base,
size_t Idx,
4105 if (
G->isZero() || !GV)
4109 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4114 F.BaseRegs[Idx] =
G;
4115 (void)InsertFormula(LU, LUIdx,
F);
4119void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4122 if (
Base.BaseGV)
return;
4124 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4125 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4126 if (
Base.Scale == 1)
4127 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4132void LSRInstance::GenerateConstantOffsetsImpl(
4133 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4134 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4136 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4138 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4140 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4142 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4144 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4150 F.ScaledReg =
nullptr;
4152 F.deleteBaseReg(
F.BaseRegs[Idx]);
4154 }
else if (IsScaledReg)
4157 F.BaseRegs[Idx] = NewG;
4159 (void)InsertFormula(LU, LUIdx,
F);
4174 const APInt *StepInt;
4179 for (Immediate
Offset : Worklist) {
4181 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4187 for (Immediate
Offset : Worklist)
4191 if (
G->isZero() ||
Imm.isZero() ||
4192 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4195 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4196 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4201 F.BaseRegs[Idx] =
G;
4206 (void)InsertFormula(LU, LUIdx,
F);
4210void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4216 if (LU.MaxOffset != LU.MinOffset)
4219 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4220 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4221 if (
Base.Scale == 1)
4222 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4228void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4230 if (LU.Kind != LSRUse::ICmpZero)
return;
4238 if (LU.MinOffset != LU.MaxOffset)
return;
4241 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4243 for (
const SCEV *BaseReg :
Base.BaseRegs)
4244 if (
BaseReg->getType()->isPointerTy())
4246 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4249 for (int64_t Factor : Factors) {
4254 if (
Base.BaseOffset.isMin() && Factor == -1)
4257 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4259 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4260 assert(Factor != 0 &&
"Zero factor not expected!");
4261 if (NewBaseOffset.getFixedValue() / Factor !=
4262 Base.BaseOffset.getFixedValue())
4270 Immediate
Offset = LU.MinOffset;
4271 if (
Offset.isMin() && Factor == -1)
4274 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4282 F.BaseOffset = NewBaseOffset;
4289 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4291 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4294 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4308 if (
F.UnfoldedOffset.isNonZero()) {
4309 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4311 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4312 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4313 Base.UnfoldedOffset.getFixedValue())
4317 IntTy,
F.UnfoldedOffset.getFixedValue()))
4322 (void)InsertFormula(LU, LUIdx,
F);
4329void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4336 if (
Base.Scale != 0 && !
Base.unscale())
4339 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4342 for (int64_t Factor : Factors) {
4343 Base.Scale = Factor;
4344 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4346 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4350 if (LU.Kind == LSRUse::Basic &&
4351 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4352 LU.AccessTy,
Base) &&
4353 LU.AllFixupsOutsideLoop)
4354 LU.Kind = LSRUse::Special;
4360 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4361 Base.BaseOffset.isZero() && !
Base.BaseGV)
4364 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4366 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4367 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4372 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4373 if (!Quotient->isZero()) {
4376 F.ScaledReg = Quotient;
4377 F.deleteBaseReg(
F.BaseRegs[i]);
4381 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4382 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4386 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4388 (void)InsertFormula(LU, LUIdx,
F);
4404 const SCEV *Result =
nullptr;
4405 for (
auto &L :
Loops) {
4409 if (!New || (Result && New != Result))
4414 assert(Result &&
"failed to create expression");
4419void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4421 if (
Base.BaseGV)
return;
4431 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4434 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4438 for (
auto &LF : LU.Fixups)
4439 Loops.push_back(LF.PostIncLoops);
4441 for (
Type *SrcTy : Types) {
4450 const SCEV *NewScaledReg =
4452 if (!NewScaledReg || NewScaledReg->
isZero())
4454 F.ScaledReg = NewScaledReg;
4456 bool HasZeroBaseReg =
false;
4457 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4458 const SCEV *NewBaseReg =
4460 if (!NewBaseReg || NewBaseReg->
isZero()) {
4461 HasZeroBaseReg =
true;
4471 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4475 (void)InsertFormula(LU, LUIdx,
F);
4488 const SCEV *OrigReg;
4490 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4491 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4493 void print(raw_ostream &OS)
const;
4499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4500void WorkItem::print(raw_ostream &OS)
const {
4501 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4502 <<
" , add offset " <<
Imm;
4512void LSRInstance::GenerateCrossUseConstantOffsets() {
4514 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4516 DenseMap<const SCEV *, ImmMapTy>
Map;
4517 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4519 for (
const SCEV *Use : RegUses) {
4522 auto Pair =
Map.try_emplace(
Reg);
4525 Pair.first->second.insert(std::make_pair(Imm, Use));
4526 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4533 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4535 for (
const SCEV *
Reg : Sequence) {
4536 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4539 if (Imms.size() == 1)
4543 for (
const auto &Entry
4545 <<
' ' <<
Entry.first;
4549 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4551 const SCEV *OrigReg = J->second;
4553 Immediate JImm = J->first;
4554 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4557 UsedByIndicesMap[
Reg].
count() == 1) {
4565 Immediate
First = Imms.begin()->first;
4566 Immediate
Last = std::prev(Imms.end())->first;
4567 if (!
First.isCompatibleImmediate(
Last)) {
4574 bool Scalable =
First.isScalable() ||
Last.isScalable();
4575 int64_t FI =
First.getKnownMinValue();
4576 int64_t LI =
Last.getKnownMinValue();
4579 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4582 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4583 ImmMapTy::const_iterator OtherImms[] = {
4584 Imms.begin(), std::prev(Imms.end()),
4585 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4586 for (
const auto &M : OtherImms) {
4587 if (M == J || M == JE)
continue;
4588 if (!JImm.isCompatibleImmediate(
M->first))
4592 Immediate
Imm = JImm.subUnsigned(
M->first);
4593 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4595 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4596 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4603 UsedByIndicesMap.
clear();
4604 UniqueItems.
clear();
4607 for (
const WorkItem &WI : WorkItems) {
4608 size_t LUIdx = WI.LUIdx;
4609 LSRUse &LU =
Uses[LUIdx];
4610 Immediate
Imm = WI.Imm;
4611 const SCEV *OrigReg = WI.OrigReg;
4614 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4618 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4619 Formula
F = LU.Formulae[
L];
4626 if (
F.ScaledReg == OrigReg) {
4627 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4629 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4631 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4632 if (
F.referencesReg(S))
4635 NewF.BaseOffset =
Offset;
4636 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4639 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4648 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4650 if (
C->getValue()->isNegative() !=
4651 (NewF.BaseOffset.isLessThanZero()) &&
4652 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4653 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4658 NewF.canonicalize(*this->L);
4659 (void)InsertFormula(LU, LUIdx, NewF);
4662 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4664 if (BaseReg != OrigReg)
4667 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4668 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4669 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4671 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4673 LU.Kind, LU.AccessTy, NewF)) {
4677 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4681 NewF.UnfoldedOffset = NewUnfoldedOffset;
4683 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4688 for (
const SCEV *NewReg : NewF.BaseRegs)
4690 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4692 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4694 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4695 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4698 NewF.BaseOffset.getFixedValue()))
4703 NewF.canonicalize(*this->L);
4704 (void)InsertFormula(LU, LUIdx, NewF);
4715LSRInstance::GenerateAllReuseFormulae() {
4718 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4719 LSRUse &LU =
Uses[LUIdx];
4720 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4721 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4722 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4723 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4725 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4726 LSRUse &LU =
Uses[LUIdx];
4727 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4728 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4729 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4730 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4731 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4732 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4733 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4734 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4736 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4737 LSRUse &LU =
Uses[LUIdx];
4738 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4739 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4742 GenerateCrossUseConstantOffsets();
4745 "After generating reuse formulae:\n";
4746 print_uses(
dbgs()));
4751void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4752 DenseSet<const SCEV *> VisitedRegs;
4753 SmallPtrSet<const SCEV *, 16> Regs;
4754 SmallPtrSet<const SCEV *, 16> LoserRegs;
4756 bool ChangedFormulae =
false;
4761 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4763 BestFormulaeTy BestFormulae;
4765 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4766 LSRUse &LU =
Uses[LUIdx];
4771 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4772 FIdx != NumForms; ++FIdx) {
4773 Formula &
F = LU.Formulae[FIdx];
4784 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4786 if (CostF.isLoser()) {
4798 for (
const SCEV *
Reg :
F.BaseRegs) {
4799 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4803 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4804 Key.push_back(
F.ScaledReg);
4809 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4810 BestFormulae.insert(std::make_pair(
Key, FIdx));
4814 Formula &Best = LU.Formulae[
P.first->second];
4816 Cost CostBest(L, SE,
TTI, AMK);
4818 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4819 HardwareLoopProfitable);
4820 if (CostF.isLess(CostBest))
4824 " in favor of formula ";
4825 Best.print(
dbgs());
dbgs() <<
'\n');
4828 ChangedFormulae =
true;
4830 LU.DeleteFormula(
F);
4838 LU.RecomputeRegs(LUIdx, RegUses);
4841 BestFormulae.clear();
4846 "After filtering out undesirable candidates:\n";
4854size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4856 for (
const LSRUse &LU :
Uses) {
4857 size_t FSize = LU.Formulae.size();
4872void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4876 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4877 "which use a superset of registers used by other "
4880 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4881 LSRUse &LU =
Uses[LUIdx];
4883 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4884 Formula &
F = LU.Formulae[i];
4885 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4891 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4897 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4898 (uint64_t)
C->getValue()->getSExtValue());
4899 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4900 (
I -
F.BaseRegs.begin()));
4901 if (LU.HasFormulaWithSameRegs(NewF)) {
4904 LU.DeleteFormula(
F);
4915 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4916 (
I -
F.BaseRegs.begin()));
4917 if (LU.HasFormulaWithSameRegs(NewF)) {
4920 LU.DeleteFormula(
F);
4931 LU.RecomputeRegs(LUIdx, RegUses);
4940void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4945 dbgs() <<
"The search space is too complex.\n"
4946 "Narrowing the search space by assuming that uses separated "
4947 "by a constant offset will use the same registers.\n");
4951 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4952 LSRUse &LU =
Uses[LUIdx];
4953 for (
const Formula &
F : LU.Formulae) {
4954 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4956 assert((LU.Kind == LSRUse::Address || LU.Kind == LSRUse::ICmpZero) &&
4957 "Only address and cmp uses expected to have nonzero BaseOffset");
4959 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4963 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4964 LU.Kind, LU.AccessTy))
4969 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4970 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4973 for (LSRFixup &
Fixup : LU.Fixups) {
4974 Fixup.Offset +=
F.BaseOffset;
4975 LUThatHas->pushFixup(
Fixup);
4980 Type *FixupType = LUThatHas->Fixups[0].OperandValToReplace->getType();
4981 for (LSRFixup &
Fixup : LUThatHas->Fixups)
4982 assert(
Fixup.OperandValToReplace->getType() == FixupType &&
4983 "Expected all fixups to have the same type");
4988 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4989 Formula &
F = LUThatHas->Formulae[i];
4990 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4991 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4993 LUThatHas->DeleteFormula(
F);
5001 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
5004 DeleteUse(LU, LUIdx);
5017void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5021 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5022 "undesirable dedicated registers.\n");
5024 FilterOutUndesirableDedicatedRegisters();
5039void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5044 dbgs() <<
"The search space is too complex.\n"
5045 "Narrowing the search space by choosing the best Formula "
5046 "from the Formulae with the same Scale and ScaledReg.\n");
5049 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5051 BestFormulaeTy BestFormulae;
5053 bool ChangedFormulae =
false;
5055 DenseSet<const SCEV *> VisitedRegs;
5056 SmallPtrSet<const SCEV *, 16> Regs;
5058 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5059 LSRUse &LU =
Uses[LUIdx];
5064 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5069 size_t FARegNum = 0;
5070 for (
const SCEV *
Reg : FA.BaseRegs) {
5071 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5072 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5074 size_t FBRegNum = 0;
5075 for (
const SCEV *
Reg : FB.BaseRegs) {
5076 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5077 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5079 if (FARegNum != FBRegNum)
5080 return FARegNum < FBRegNum;
5087 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5089 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5090 return CostFA.isLess(CostFB);
5094 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5096 Formula &
F = LU.Formulae[FIdx];
5099 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5103 Formula &Best = LU.Formulae[
P.first->second];
5104 if (IsBetterThan(
F, Best))
5108 " in favor of formula ";
5109 Best.print(
dbgs());
dbgs() <<
'\n');
5111 ChangedFormulae =
true;
5113 LU.DeleteFormula(
F);
5119 LU.RecomputeRegs(LUIdx, RegUses);
5122 BestFormulae.clear();
5127 "After filtering out undesirable candidates:\n";
5134void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5141 "Narrowing the search space by choosing the lowest "
5142 "register Formula for PostInc Uses.\n");
5144 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5145 LSRUse &LU =
Uses[LUIdx];
5147 if (LU.Kind != LSRUse::Address)
5153 size_t MinRegs = std::numeric_limits<size_t>::max();
5154 for (
const Formula &
F : LU.Formulae)
5155 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5158 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5160 Formula &
F = LU.Formulae[FIdx];
5161 if (
F.getNumRegs() > MinRegs) {
5164 LU.DeleteFormula(
F);
5171 LU.RecomputeRegs(LUIdx, RegUses);
5222void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5231 SmallPtrSet<const SCEV *, 4> UniqRegs;
5235 DenseMap <const SCEV *, float> RegNumMap;
5236 for (
const SCEV *
Reg : RegUses) {
5240 for (
const LSRUse &LU :
Uses) {
5241 if (!LU.Regs.count(
Reg))
5243 float P = LU.getNotSelectedProbability(
Reg);
5249 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5253 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5256 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5257 LSRUse &LU =
Uses[LUIdx];
5259 if (LU.Formulae.size() < 2)
5264 float FMinRegNum = LU.Formulae[0].getNumRegs();
5265 float FMinARegNum = LU.Formulae[0].getNumRegs();
5267 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5268 Formula &
F = LU.Formulae[i];
5271 for (
const SCEV *BaseReg :
F.BaseRegs) {
5272 if (UniqRegs.
count(BaseReg))
5274 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5277 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5279 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5280 if (!UniqRegs.
count(ScaledReg)) {
5282 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5285 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5288 if (FMinRegNum > FRegNum ||
5289 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5290 FMinRegNum = FRegNum;
5291 FMinARegNum = FARegNum;
5296 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5298 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5299 while (LU.Formulae.size() != 1) {
5302 LU.Formulae.pop_back();
5304 LU.RecomputeRegs(LUIdx, RegUses);
5305 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5306 Formula &
F = LU.Formulae[0];
5322 MemAccessTy AccessType) {
5332 return TTI.isLegalAddressingMode(
5333 AccessType.MemTy,
nullptr,
5334 Diff->getSExtValue(),
5335 true, 0, AccessType.AddrSpace) &&
5336 !
TTI.isLegalAddressingMode(
5337 AccessType.MemTy,
nullptr,
5338 -Diff->getSExtValue(),
5339 true, 0, AccessType.AddrSpace);
5345void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5348 SmallPtrSet<const SCEV *, 4> Taken;
5356 const SCEV *Best =
nullptr;
5357 unsigned BestNum = 0;
5358 for (
const SCEV *
Reg : RegUses) {
5363 BestNum = RegUses.getUsedByIndices(
Reg).count();
5365 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5366 if (
Count > BestNum) {
5374 if (
Count == BestNum) {
5375 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5376 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5378 Uses[LUIdx].AccessTy)) {
5385 assert(Best &&
"Failed to find best LSRUse candidate");
5387 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5388 <<
" will yield profitable reuse.\n");
5393 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5394 LSRUse &LU =
Uses[LUIdx];
5395 if (!LU.Regs.count(Best))
continue;
5398 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5399 Formula &
F = LU.Formulae[i];
5400 if (!
F.referencesReg(Best)) {
5402 LU.DeleteFormula(
F);
5406 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5412 LU.RecomputeRegs(LUIdx, RegUses);
5423void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5424 NarrowSearchSpaceByDetectingSupersets();
5425 NarrowSearchSpaceByCollapsingUnrolledCode();
5426 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5428 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5429 NarrowSearchSpaceByFilterPostInc();
5431 NarrowSearchSpaceByDeletingCostlyFormulas();
5433 NarrowSearchSpaceByPickingWinnerRegs();
5437void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5439 SmallVectorImpl<const Formula *> &Workspace,
5440 const Cost &CurCost,
5441 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5442 DenseSet<const SCEV *> &VisitedRegs)
const {
5453 const LSRUse &LU =
Uses[Workspace.
size()];
5459 SmallSetVector<const SCEV *, 4> ReqRegs;
5460 for (
const SCEV *S : CurRegs)
5461 if (LU.Regs.count(S))
5464 SmallPtrSet<const SCEV *, 16> NewRegs;
5465 Cost NewCost(L, SE,
TTI, AMK);
5466 for (
const Formula &
F : LU.Formulae) {
5474 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5475 for (
const SCEV *
Reg : ReqRegs) {
5476 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5479 if (NumReqRegsToFind == 0)
5483 if (NumReqRegsToFind != 0) {
5494 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5495 if (NewCost.isLess(SolutionCost)) {
5497 if (Workspace.
size() !=
Uses.size()) {
5498 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5499 NewRegs, VisitedRegs);
5500 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5501 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5504 dbgs() <<
".\nRegs:\n";
5505 for (
const SCEV *S : NewRegs)
dbgs()
5506 <<
"- " << *S <<
"\n";
5509 SolutionCost = NewCost;
5510 Solution = Workspace;
5519void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5521 Cost SolutionCost(L, SE,
TTI, AMK);
5522 SolutionCost.Lose();
5523 Cost CurCost(L, SE,
TTI, AMK);
5524 SmallPtrSet<const SCEV *, 16> CurRegs;
5525 DenseSet<const SCEV *> VisitedRegs;
5529 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5530 CurRegs, VisitedRegs);
5531 if (Solution.
empty()) {
5538 "The chosen solution requires ";
5539 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5540 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5545 Solution[i]->print(
dbgs());
5551 const bool EnableDropUnprofitableSolution = [&] {
5563 if (BaselineCost.isLess(SolutionCost)) {
5564 if (!EnableDropUnprofitableSolution)
5566 dbgs() <<
"Baseline is more profitable than chosen solution, "
5567 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5570 "solution, dropping LSR solution.\n";);
5581 const SmallVectorImpl<Instruction *> &Inputs)
5585 bool AllDominate =
true;
5592 for (Instruction *Inst : Inputs) {
5593 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5594 AllDominate =
false;
5599 if (Tentative->
getParent() == Inst->getParent() &&
5600 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5610 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5611 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5615 if (!Rung)
return IP;
5616 Rung = Rung->getIDom();
5617 if (!Rung)
return IP;
5618 IDom = Rung->getBlock();
5621 const Loop *IDomLoop = LI.getLoopFor(IDom);
5622 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5623 if (IDomDepth <= IPLoopDepth &&
5624 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5641 SmallVector<Instruction *, 4> Inputs;
5644 if (LU.Kind == LSRUse::ICmpZero)
5645 if (Instruction *
I =
5648 if (LF.PostIncLoops.
count(L)) {
5649 if (LF.isUseFullyOutsideLoop(L))
5650 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5656 for (
const Loop *PIL : LF.PostIncLoops) {
5657 if (PIL == L)
continue;
5662 if (!ExitingBlocks.
empty()) {
5664 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5671 "Insertion point must be a normal instruction");
5681 while (IP->isEHPad()) ++IP;
5686 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5694Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5696 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5697 if (LU.RigidFormula)
5698 return LF.OperandValToReplace;
5702 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5707 Rewriter.setPostInc(LF.PostIncLoops);
5712 Type *Ty =
F.getType();
5726 for (
const SCEV *
Reg :
F.BaseRegs) {
5727 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5735 Value *ICmpScaledV =
nullptr;
5737 const SCEV *ScaledS =
F.ScaledReg;
5743 if (LU.Kind == LSRUse::ICmpZero) {
5753 "The only scale supported by ICmpZero uses is -1!");
5754 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5762 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5772 Ops.push_back(ScaledS);
5798 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5799 "Expanding mismatched offsets\n");
5801 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5802 if (
Offset.isNonZero()) {
5803 if (LU.Kind == LSRUse::ICmpZero) {
5810 IntTy, -(uint64_t)
Offset.getFixedValue(),
true);
5819 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5824 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5825 if (UnfoldedOffset.isNonZero()) {
5827 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5831 const SCEV *FullS =
Ops.empty() ?
5842 if (LU.Kind == LSRUse::ICmpZero) {
5846 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5847 "a scale at the same time!");
5848 if (
F.Scale == -1) {
5849 if (ICmpScaledV->
getType() != OpTy) {
5859 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5860 "ICmp does not support folding a global value and "
5861 "a scale at the same time!");
5865 -(uint64_t)
Offset.getFixedValue(),
5867 if (
C->getType() != OpTy) {
5871 assert(
C &&
"Cast of ConstantInt should have folded");
5884void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
5885 const LSRFixup &LF,
const Formula &
F,
5886 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5887 DenseMap<BasicBlock *, Value *>
Inserted;
5891 bool needUpdateFixups =
false;
5902 Loop *PNLoop = LI.getLoopFor(Parent);
5903 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5909 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5910 .setMergeIdenticalEdges()
5911 .setKeepOneInputPHIs());
5914 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5925 if (
L->contains(BB) && !
L->contains(PN))
5933 needUpdateFixups =
true;
5938 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5951 LF.OperandValToReplace->
getType(),
"tmp",
5958 if (
L->contains(
I) && !
L->contains(BB))
5959 InsertedNonLCSSAInsts.insert(
I);
5962 Pair.first->second = FullV;
5969 if (needUpdateFixups) {
5970 for (LSRUse &LU :
Uses)
5971 for (LSRFixup &
Fixup : LU.Fixups)
5975 if (
Fixup.UserInst == PN) {
5978 bool foundInOriginalPHI =
false;
5980 if (val ==
Fixup.OperandValToReplace) {
5981 foundInOriginalPHI =
true;
5986 if (foundInOriginalPHI)
5997 if (val ==
Fixup.OperandValToReplace)
5998 Fixup.UserInst = NewPN;
6008void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6010 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6014 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6020 if (FullV->
getType() != OpTy) {
6032 if (LU.Kind == LSRUse::ICmpZero)
6048 const LSRFixup &
Fixup,
const LSRUse &LU,
6052 if (LU.Kind != LSRUse::Address)
6053 return IVIncInsertPos;
6057 Type *Ty =
I->getType();
6060 return IVIncInsertPos;
6067 return IVIncInsertPos;
6074void LSRInstance::ImplementSolution(
6075 const SmallVectorImpl<const Formula *> &Solution) {
6081 for (
const IVChain &Chain : IVChainVec) {
6087 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6088 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6091 Rewriter.setIVIncInsertPos(L, InsertPos);
6092 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6096 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6099 for (
const IVChain &Chain : IVChainVec) {
6100 GenerateIVChain(Chain, DeadInsts);
6104 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6122 for (PHINode &PN :
L->getHeader()->phis()) {
6123 BinaryOperator *BO =
nullptr;
6129 case Instruction::Sub:
6134 case Instruction::Add:
6151 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6160LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6161 DominatorTree &DT, LoopInfo &LI,
6162 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6163 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6164 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6167 :
TTI.getPreferredAddressingMode(
L, &SE)),
6170 if (!
L->isLoopSimplifyForm())
6178 unsigned NumUsers = 0;
6182 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6190 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6200 L->getHeader()->printAsOperand(
dbgs(),
false);
6206 HardwareLoopProfitable =
6207 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6211#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6214 Rewriter.disableCanonicalMode();
6215 Rewriter.enableLSRMode();
6219 OptimizeLoopTermCond();
6222 if (IU.empty())
return;
6225 if (!
L->isInnermost()) {
6238 CollectInterestingTypesAndFactors();
6239 CollectFixupsAndInitialFormulae();
6240 CollectLoopInvariantFixupsAndFormulae();
6246 print_uses(
dbgs()));
6248 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6252 GenerateAllReuseFormulae();
6254 FilterOutUndesirableDedicatedRegisters();
6255 NarrowSearchSpaceUsingHeuristics();
6265 if (Solution.
empty())
6270 for (
const LSRUse &LU :
Uses) {
6271 for (
const Formula &
F : LU.Formulae)
6273 F) &&
"Illegal formula generated!");
6278 ImplementSolution(Solution);
6281#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6282void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6283 if (Factors.empty() &&
Types.empty())
return;
6285 OS <<
"LSR has identified the following interesting factors and types: ";
6288 for (int64_t Factor : Factors)
6289 OS <<
LS <<
'*' << Factor;
6291 for (
Type *Ty : Types)
6292 OS <<
LS <<
'(' << *Ty <<
')';
6296void LSRInstance::print_fixups(raw_ostream &OS)
const {
6297 OS <<
"LSR is examining the following fixup sites:\n";
6298 for (
const LSRUse &LU :
Uses)
6299 for (
const LSRFixup &LF : LU.Fixups) {
6306void LSRInstance::print_uses(raw_ostream &OS)
const {
6307 OS <<
"LSR is examining the following uses:\n";
6308 for (
const LSRUse &LU :
Uses) {
6312 for (
const Formula &
F : LU.Formulae) {
6320void LSRInstance::print(raw_ostream &OS)
const {
6321 print_factors_and_types(OS);
6333class LoopStrengthReduce :
public LoopPass {
6337 LoopStrengthReduce();
6340 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6341 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6346LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6350void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6377ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6378 llvm::DIExpression::expr_op_iterator Begin =
6379 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6380 llvm::DIExpression::expr_op_iterator End =
6381 llvm::DIExpression::expr_op_iterator(Expr.
end());
6382 return {Begin, End};
6385struct SCEVDbgValueBuilder {
6386 SCEVDbgValueBuilder() =
default;
6387 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6389 void clone(
const SCEVDbgValueBuilder &
Base) {
6390 LocationOps =
Base.LocationOps;
6395 LocationOps.
clear();
6402 SmallVector<Value *, 2> LocationOps;
6405 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6412 unsigned ArgIndex = 0;
6413 if (It != LocationOps.
end()) {
6414 ArgIndex = std::distance(LocationOps.
begin(), It);
6416 ArgIndex = LocationOps.
size();
6422 void pushValue(
const SCEVUnknown *U) {
6427 bool pushConst(
const SCEVConstant *
C) {
6428 if (
C->getAPInt().getSignificantBits() > 64)
6430 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6431 Expr.
push_back(
C->getAPInt().getSExtValue());
6438 return ToDwarfOpIter(Expr);
6443 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6446 "Expected arithmetic SCEV type");
6448 unsigned EmitOperator = 0;
6449 for (
const auto &
Op : CommExpr->
operands()) {
6452 if (EmitOperator >= 1)
6453 pushOperator(DwarfOp);
6460 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6461 const llvm::SCEV *Inner =
C->getOperand(0);
6462 const llvm::Type *
Type =
C->getType();
6463 uint64_t ToWidth =
Type->getIntegerBitWidth();
6464 bool Success = pushSCEV(Inner);
6466 IsSigned ? llvm::dwarf::DW_ATE_signed
6467 : llvm::dwarf::DW_ATE_unsigned};
6468 for (
const auto &
Op : CastOps)
6474 bool pushSCEV(
const llvm::SCEV *S) {
6477 Success &= pushConst(StartInt);
6482 pushLocation(
U->getValue());
6485 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6488 Success &= pushSCEV(UDiv->getLHS());
6489 Success &= pushSCEV(UDiv->getRHS());
6490 pushOperator(llvm::dwarf::DW_OP_div);
6497 "Unexpected cast type in SCEV.");
6501 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6516 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6518 if (
C->getAPInt().getSignificantBits() > 64)
6520 int64_t
I =
C->getAPInt().getSExtValue();
6522 case llvm::dwarf::DW_OP_plus:
6523 case llvm::dwarf::DW_OP_minus:
6525 case llvm::dwarf::DW_OP_mul:
6526 case llvm::dwarf::DW_OP_div:
6539 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6545 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6546 if (!pushSCEV(Stride))
6548 pushOperator(llvm::dwarf::DW_OP_mul);
6550 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6551 if (!pushSCEV(Start))
6553 pushOperator(llvm::dwarf::DW_OP_plus);
6559 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6560 pushLocation(OffsetValue);
6563 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6564 << std::to_string(
Offset) <<
"\n");
6570 bool createIterCountExpr(
const SCEV *S,
6571 const SCEVDbgValueBuilder &IterationCount,
6572 ScalarEvolution &SE) {
6581 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6585 if (!Rec->isAffine())
6593 clone(IterationCount);
6594 if (!SCEVToValueExpr(*Rec, SE))
6605 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6606 ScalarEvolution &SE) {
6612 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6613 if (!pushSCEV(Start))
6615 pushOperator(llvm::dwarf::DW_OP_minus);
6617 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6618 if (!pushSCEV(Stride))
6620 pushOperator(llvm::dwarf::DW_OP_div);
6628 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6629 SmallVectorImpl<Value *> &DestLocations) {
6631 "Expected the locations vector to contain the IV");
6636 "Expected the location ops to contain the IV.");
6640 for (
const auto &
Op : LocationOps) {
6641 auto It =
find(DestLocations,
Op);
6642 if (It != DestLocations.
end()) {
6644 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6652 for (
const auto &
Op : expr_ops()) {
6654 Op.appendToVector(DestExpr);
6661 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6669struct DVIRecoveryRec {
6670 DVIRecoveryRec(DbgVariableRecord *DVR)
6671 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6673 DbgVariableRecord *DbgRef;
6675 bool HadLocationArgList;
6681 for (
auto &RE : RecoveryExprs)
6683 RecoveryExprs.clear();
6686 ~DVIRecoveryRec() { clear(); }
6694 auto expr_ops = ToDwarfOpIter(Expr);
6696 for (
auto Op : expr_ops)
6705template <
typename T>
6709 "contain any DW_OP_llvm_arg operands.");
6716template <
typename T>
6721 "Expected expression that references DIArglist locations using "
6722 "DW_OP_llvm_arg operands.");
6724 for (
Value *V : Locations)
6741 if (NumLLVMArgs == 0) {
6748 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6778 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6779 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6780 assert(DVIRec.Expr &&
"Expected an expression");
6785 if (!DVIRec.HadLocationArgList) {
6786 assert(DVIRec.LocationOps.size() == 1 &&
6787 "Unexpected number of location ops.");
6791 Value *CachedValue =
6796 for (
WeakVH VH : DVIRec.LocationOps) {
6804 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6809 const SCEV *SCEVInductionVar,
6810 SCEVDbgValueBuilder IterCountExpr) {
6824 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6826 NewLocationOps.
push_back(LSRInductionVar);
6828 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6829 WeakVH VH = DVIRec.LocationOps[i];
6835 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6837 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6845 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6846 <<
" refers to a location that is now undef or erased. "
6847 "Salvage abandoned.\n");
6851 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6852 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6854 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6855 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6859 if (std::optional<APInt>
Offset =
6861 if (
Offset->getSignificantBits() <= 64)
6862 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6865 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6874 assert(DVIRec.RecoveryExprs.size() == 1 &&
6875 "Expected only a single recovery expression for an empty "
6877 assert(DVIRec.RecoveryExprs[0] &&
6878 "Expected a SCEVDbgSalvageBuilder for location 0");
6879 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6880 B->appendToVectors(
NewExpr, NewLocationOps);
6882 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
6890 SCEVDbgValueBuilder *DbgBuilder =
6891 DVIRec.RecoveryExprs[LocationArgIndex].get();
6897 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6898 "Expected a positive index for the location-op position.");
6899 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6903 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6907 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6915 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6916 if (DVIToUpdate.empty())
6920 assert(SCEVInductionVar &&
6921 "Anticipated a SCEV for the post-LSR induction variable");
6925 if (!IVAddRec->isAffine())
6933 SCEVDbgValueBuilder IterCountExpr;
6934 IterCountExpr.pushLocation(LSRInductionVar);
6935 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6938 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6941 for (
auto &DVIRec : DVIToUpdate) {
6942 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6953 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6954 for (
const auto &
B : L->getBlocks()) {
6955 for (
auto &
I : *
B) {
6957 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6962 if (DbgVal.isKillLocation())
6967 const auto &HasTranslatableLocationOps =
6969 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6983 if (!HasTranslatableLocationOps(DbgVal))
6986 std::unique_ptr<DVIRecoveryRec> NewRec =
6987 std::make_unique<DVIRecoveryRec>(&DbgVal);
6991 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6992 for (
const auto LocOp : DbgVal.location_ops()) {
6993 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6994 NewRec->LocationOps.push_back(LocOp);
6995 NewRec->HadLocationArgList = DbgVal.hasArgList();
6997 SalvageableDVISCEVs.push_back(std::move(NewRec));
7007 const LSRInstance &LSR) {
7009 auto IsSuitableIV = [&](
PHINode *
P) {
7020 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7027 if (IsSuitableIV(
P))
7031 for (
PHINode &
P : L.getHeader()->phis()) {
7032 if (IsSuitableIV(&
P))
7050 std::unique_ptr<MemorySSAUpdater> MSSAU;
7052 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7055 const LSRInstance &Reducer =
7056 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7057 Changed |= Reducer.getChanged();
7064#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7067 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7081 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7095 if (SalvageableDVIRecords.
empty())
7101 for (
const auto &L : LI) {
7105 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7106 "could not be identified.\n");
7110 for (
auto &Rec : SalvageableDVIRecords)
7112 SalvageableDVIRecords.
clear();
7116bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7120 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7121 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7122 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7123 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7124 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7125 *
L->getHeader()->getParent());
7126 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7127 *
L->getHeader()->getParent());
7128 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7129 *
L->getHeader()->getParent());
7130 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7133 MSSA = &MSSAAnalysis->getMSSA();
7150char LoopStrengthReduce::ID = 0;
7153 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static Immediate ExtractImmediate(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< SCEVUse > &Good, SmallVectorImpl< SCEVUse > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static GlobalValue * ExtractSymbol(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth, const TargetTransformInfo &TTI)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Value * getCondition() const
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
ArrayRef< SCEVUse > operands() const
bool hasNoSignedWrap() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
static constexpr auto FlagAnyWrap
LLVM_ABI ArrayRef< SCEVUse > operands() const
Return operands of this SCEV expression.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
match_bind< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.