132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
363 const std::pair<size_t, Immediate> &
RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
387 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
399 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
415RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
421 RSD.UsedByIndices.
set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
430 RSD.UsedByIndices.
reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
440 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
451 if (
I == RegUsesMap.end())
453 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
460const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
461 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
477 GlobalValue *BaseGV =
nullptr;
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
515 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 bool countsDownToZero()
const;
527 size_t getNumRegs()
const;
530 void deleteBaseReg(
const SCEV *&S);
532 bool referencesReg(
const SCEV *S)
const;
533 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
534 const RegUseTracker &RegUses)
const;
536 void print(raw_ostream &OS)
const;
554 for (
const SCEV *S :
Add->operands())
560 const SCEV *Start, *Step;
575 if (
Mul->getOperand(0)->isAllOnesValue()) {
584 for (
const SCEV *S : MyGood)
586 for (
const SCEV *S : MyBad)
598void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
605 BaseRegs.push_back(Sum);
611 BaseRegs.push_back(Sum);
626bool Formula::isCanonical(
const Loop &L)
const {
627 assert((Scale == 0 || ScaledReg) &&
628 "ScaledReg must be non-null if Scale is non-zero");
631 return BaseRegs.size() <= 1;
636 if (Scale == 1 && BaseRegs.empty())
645 return none_of(BaseRegs, [&L](
const SCEV *S) {
656void Formula::canonicalize(
const Loop &L) {
660 if (BaseRegs.empty()) {
662 assert(ScaledReg &&
"Expected 1*reg => reg");
663 assert(Scale == 1 &&
"Expected 1*reg => reg");
664 BaseRegs.push_back(ScaledReg);
672 ScaledReg = BaseRegs.pop_back_val();
680 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
683 if (
I != BaseRegs.end())
693bool Formula::unscale() {
697 BaseRegs.push_back(ScaledReg);
702bool Formula::hasZeroEnd()
const {
703 if (UnfoldedOffset || BaseOffset)
705 if (BaseRegs.size() != 1 || ScaledReg)
710bool Formula::countsDownToZero()
const {
713 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
714 const APInt *StepInt;
722size_t Formula::getNumRegs()
const {
723 return !!ScaledReg + BaseRegs.size();
728Type *Formula::getType()
const {
729 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
730 ScaledReg ? ScaledReg->
getType() :
736void Formula::deleteBaseReg(
const SCEV *&S) {
737 if (&S != &BaseRegs.back())
743bool Formula::referencesReg(
const SCEV *S)
const {
749bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
750 const RegUseTracker &RegUses)
const {
752 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
754 for (
const SCEV *BaseReg : BaseRegs)
755 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
761void Formula::print(raw_ostream &OS)
const {
762 ListSeparator
Plus(
" + ");
767 if (BaseOffset.isNonZero())
768 OS <<
Plus << BaseOffset;
770 for (
const SCEV *BaseReg : BaseRegs)
773 if (HasBaseReg && BaseRegs.empty())
774 OS <<
Plus <<
"**error: HasBaseReg**";
775 else if (!HasBaseReg && !BaseRegs.empty())
776 OS <<
Plus <<
"**error: !HasBaseReg**";
779 OS <<
Plus << Scale <<
"*reg(";
786 if (UnfoldedOffset.isNonZero())
787 OS <<
Plus <<
"imm(" << UnfoldedOffset <<
')';
827 bool IgnoreSignificantBits =
false) {
838 if (
RA.isAllOnes()) {
839 if (
LHS->getType()->isPointerTy())
852 const APInt &LA =
C->getAPInt();
861 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
863 IgnoreSignificantBits);
864 if (!Step)
return nullptr;
866 IgnoreSignificantBits);
867 if (!Start)
return nullptr;
880 for (
const SCEV *S :
Add->operands()) {
882 if (!
Op)
return nullptr;
910 for (
const SCEV *S :
Mul->operands()) {
913 IgnoreSignificantBits)) {
933 if (
C->getSignificantBits() <= 64) {
935 return Immediate::getFixed(
C->getSExtValue());
940 if (Result.isNonZero())
946 if (Result.isNonZero())
954 return Immediate::getScalable(
C->getSExtValue());
956 return Immediate::getZero();
991 if (
SI->getPointerOperand() == OperandVal)
996 switch (
II->getIntrinsicID()) {
997 case Intrinsic::memset:
998 case Intrinsic::prefetch:
999 case Intrinsic::masked_load:
1000 if (
II->getArgOperand(0) == OperandVal)
1003 case Intrinsic::masked_store:
1004 if (
II->getArgOperand(1) == OperandVal)
1007 case Intrinsic::memmove:
1008 case Intrinsic::memcpy:
1009 if (
II->getArgOperand(0) == OperandVal ||
1010 II->getArgOperand(1) == OperandVal)
1015 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1016 if (IntrInfo.
PtrVal == OperandVal)
1022 if (RMW->getPointerOperand() == OperandVal)
1025 if (CmpX->getPointerOperand() == OperandVal)
1034 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1038 AccessTy.MemTy = Ty;
1042 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1044 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1046 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1048 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1050 switch (
II->getIntrinsicID()) {
1051 case Intrinsic::prefetch:
1052 case Intrinsic::memset:
1053 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1054 AccessTy.MemTy = OperandVal->
getType();
1056 case Intrinsic::memmove:
1057 case Intrinsic::memcpy:
1059 AccessTy.MemTy = OperandVal->
getType();
1061 case Intrinsic::masked_load:
1062 AccessTy.AddrSpace =
1063 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1065 case Intrinsic::masked_store:
1066 AccessTy.AddrSpace =
1067 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1071 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1127 if (!Processed.
insert(S).second)
1131 for (
const SCEV *S :
Add->operands()) {
1138 const SCEV *Op0, *Op1;
1147 Value *UVal = U->getValue();
1151 if (UI && UI->
getOpcode() == Instruction::Mul &&
1184 const LSRUse &LU,
const Formula &
F);
1188 const LSRUse &LU,
const Formula &
F,
1195 const Loop *
L =
nullptr;
1196 ScalarEvolution *SE =
nullptr;
1197 const TargetTransformInfo *
TTI =
nullptr;
1198 TargetTransformInfo::LSRCost
C;
1203 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1205 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1223 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1224 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1225 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1226 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1232 return C.NumRegs == ~0
u;
1235 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1236 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1237 bool HardwareLoopProfitable,
1238 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1240 void print(raw_ostream &OS)
const;
1244 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1245 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1246 bool HardwareLoopProfitable);
1247 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1248 SmallPtrSetImpl<const SCEV *> &Regs,
1249 const LSRUse &LU,
bool HardwareLoopProfitable,
1250 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1261 Value *OperandValToReplace =
nullptr;
1271 Immediate
Offset = Immediate::getZero();
1273 LSRFixup() =
default;
1275 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1277 void print(raw_ostream &OS)
const;
1287 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1300 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1303 MemAccessTy AccessTy;
1309 Immediate MinOffset = Immediate::getFixedMax();
1310 Immediate MaxOffset = Immediate::getFixedMin();
1314 bool AllFixupsOutsideLoop =
true;
1319 bool AllFixupsUnconditional =
true;
1326 bool RigidFormula =
false;
1334 SmallPtrSet<const SCEV *, 4> Regs;
1336 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1338 LSRFixup &getNewFixup() {
1339 Fixups.push_back(LSRFixup());
1343 void pushFixup(LSRFixup &f) {
1345 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1346 MaxOffset =
f.Offset;
1347 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1348 MinOffset =
f.Offset;
1351 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1352 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1353 bool InsertFormula(
const Formula &
F,
const Loop &L);
1354 void DeleteFormula(Formula &
F);
1355 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1357 void print(raw_ostream &OS)
const;
1364 LSRUse::KindType Kind, MemAccessTy AccessTy,
1365 GlobalValue *BaseGV, Immediate BaseOffset,
1366 bool HasBaseReg, int64_t Scale,
1367 Instruction *
Fixup =
nullptr);
1380 [&](
unsigned i,
const SCEV *
Reg) {
1381 return i + getSetupCost(Reg, Depth - 1);
1390void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1391 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1392 bool HardwareLoopProfitable) {
1397 if (AR->getLoop() != L) {
1404 if (!AR->getLoop()->contains(L)) {
1414 unsigned LoopCost = 1;
1423 F.BaseOffset.isFixed() &&
1424 *Step ==
F.BaseOffset.getFixedValue();
1429 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1436 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1437 HardwareLoopProfitable)
1439 C.AddRecCost += LoopCost;
1444 if (!Regs.
count(AR->getOperand(1))) {
1445 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1457 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1466void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1467 SmallPtrSetImpl<const SCEV *> &Regs,
1468 const LSRUse &LU,
bool HardwareLoopProfitable,
1469 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1470 if (LoserRegs && LoserRegs->
count(
Reg)) {
1475 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1476 if (LoserRegs && isLoser())
1481void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1482 const DenseSet<const SCEV *> &VisitedRegs,
1483 const LSRUse &LU,
bool HardwareLoopProfitable,
1484 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1487 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1489 unsigned PrevAddRecCost =
C.AddRecCost;
1490 unsigned PrevNumRegs =
C.NumRegs;
1491 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1492 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1493 if (VisitedRegs.
count(ScaledReg)) {
1497 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1502 for (
const SCEV *BaseReg :
F.BaseRegs) {
1503 if (VisitedRegs.
count(BaseReg)) {
1507 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1514 size_t NumBaseParts =
F.getNumRegs();
1515 if (NumBaseParts > 1)
1520 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1526 for (
const LSRFixup &
Fixup : LU.Fixups) {
1527 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1528 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1532 else if (
Offset.isNonZero())
1534 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1538 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1559 if (
C.NumRegs > TTIRegNum) {
1562 if (PrevNumRegs > TTIRegNum)
1563 C.Insns += (
C.NumRegs - PrevNumRegs);
1565 C.Insns += (
C.NumRegs - TTIRegNum);
1578 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1582 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1585 if (LU.Kind != LSRUse::ICmpZero)
1586 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1592 C.Insns = std::numeric_limits<unsigned>::max();
1593 C.NumRegs = std::numeric_limits<unsigned>::max();
1594 C.AddRecCost = std::numeric_limits<unsigned>::max();
1595 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1596 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1597 C.ImmCost = std::numeric_limits<unsigned>::max();
1598 C.SetupCost = std::numeric_limits<unsigned>::max();
1599 C.ScaleCost = std::numeric_limits<unsigned>::max();
1603bool Cost::isLess(
const Cost &
Other)
const {
1605 C.Insns !=
Other.C.Insns)
1606 return C.Insns <
Other.C.Insns;
1610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1613 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1614 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1615 if (
C.AddRecCost != 0)
1616 OS <<
", with addrec cost " <<
C.AddRecCost;
1617 if (
C.NumIVMuls != 0)
1618 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1619 << (
C.NumIVMuls == 1 ?
"" :
"s");
1620 if (
C.NumBaseAdds != 0)
1621 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1622 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1623 if (
C.ScaleCost != 0)
1624 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1626 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1627 if (
C.SetupCost != 0)
1628 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1637bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1640 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1641 if (PN->getIncomingValue(i) == OperandValToReplace &&
1642 L->contains(PN->getIncomingBlock(i)))
1647 return !
L->contains(UserInst);
1650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1651void LSRFixup::print(raw_ostream &OS)
const {
1656 Store->getOperand(0)->printAsOperand(OS,
false);
1662 OS <<
", OperandValToReplace=";
1665 for (
const Loop *PIL : PostIncLoops) {
1666 OS <<
", PostIncLoop=";
1667 PIL->getHeader()->printAsOperand(OS,
false);
1671 OS <<
", Offset=" <<
Offset;
1681bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1683 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1690float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1692 for (
const Formula &
F : Formulae)
1693 if (
F.referencesReg(
Reg))
1695 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1700bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1701 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1703 if (!Formulae.empty() && RigidFormula)
1707 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1715 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1716 "Zero allocated in a scaled register!");
1718 for (
const SCEV *BaseReg :
F.BaseRegs)
1719 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1723 Formulae.push_back(
F);
1734void LSRUse::DeleteFormula(Formula &
F) {
1735 if (&
F != &Formulae.back())
1737 Formulae.pop_back();
1741void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1743 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1745 for (
const Formula &
F : Formulae) {
1746 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1751 for (
const SCEV *S : OldRegs)
1753 RegUses.dropRegister(S, LUIdx);
1756#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1757void LSRUse::print(raw_ostream &OS)
const {
1758 OS <<
"LSR Use: Kind=";
1760 case Basic: OS <<
"Basic";
break;
1761 case Special: OS <<
"Special";
break;
1762 case ICmpZero: OS <<
"ICmpZero";
break;
1764 OS <<
"Address of ";
1768 OS << *AccessTy.MemTy;
1771 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1774 OS <<
", Offsets={";
1775 bool NeedComma =
false;
1776 for (
const LSRFixup &
Fixup : Fixups) {
1777 if (NeedComma) OS <<
',';
1783 if (AllFixupsOutsideLoop)
1784 OS <<
", all-fixups-outside-loop";
1786 if (AllFixupsUnconditional)
1787 OS <<
", all-fixups-unconditional";
1796 LSRUse::KindType Kind, MemAccessTy AccessTy,
1798 bool HasBaseReg, int64_t Scale,
1801 case LSRUse::Address: {
1802 int64_t FixedOffset =
1803 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1804 int64_t ScalableOffset =
1805 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1806 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1807 HasBaseReg, Scale, AccessTy.AddrSpace,
1808 Fixup, ScalableOffset);
1810 case LSRUse::ICmpZero:
1817 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1822 if (Scale != 0 && Scale != -1)
1827 if (BaseOffset.isNonZero()) {
1830 if (BaseOffset.isScalable())
1840 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1841 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1849 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1851 case LSRUse::Special:
1853 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1860 Immediate MinOffset, Immediate MaxOffset,
1861 LSRUse::KindType Kind, MemAccessTy AccessTy,
1863 bool HasBaseReg, int64_t Scale) {
1864 if (BaseOffset.isNonZero() &&
1865 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1866 BaseOffset.isScalable() != MaxOffset.isScalable()))
1869 int64_t
Base = BaseOffset.getKnownMinValue();
1870 int64_t Min = MinOffset.getKnownMinValue();
1871 int64_t Max = MaxOffset.getKnownMinValue();
1874 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1877 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1880 HasBaseReg, Scale) &&
1886 Immediate MinOffset, Immediate MaxOffset,
1887 LSRUse::KindType Kind, MemAccessTy AccessTy,
1888 const Formula &
F,
const Loop &L) {
1896 assert((
F.isCanonical(L) ||
F.Scale != 0));
1898 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1903 Immediate MaxOffset, LSRUse::KindType Kind,
1905 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1908 BaseOffset, HasBaseReg, Scale) ||
1913 BaseGV, BaseOffset,
true, 0));
1917 Immediate MaxOffset, LSRUse::KindType Kind,
1918 MemAccessTy AccessTy,
const Formula &
F) {
1919 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1920 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1926 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1928 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1932 const LSRUse &LU,
const Formula &
F) {
1934 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1935 for (
const LSRFixup &
Fixup : LU.Fixups)
1937 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1938 F.Scale,
Fixup.UserInst))
1944 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1949 const LSRUse &LU,
const Formula &
F,
1958 return F.Scale != 1;
1961 case LSRUse::Address: {
1963 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1964 if (
F.BaseOffset.isScalable()) {
1965 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1966 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1968 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1969 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1973 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1976 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1979 "Legal addressing mode has an illegal cost!");
1980 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1982 case LSRUse::ICmpZero:
1984 case LSRUse::Special:
1994 LSRUse::KindType Kind, MemAccessTy AccessTy,
1998 if (BaseOffset.isZero() && !BaseGV)
2003 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2007 if (!HasBaseReg && Scale == 1) {
2017 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2027 Immediate MaxOffset, LSRUse::KindType Kind,
2028 MemAccessTy AccessTy,
const SCEV *S,
2031 if (S->
isZero())
return true;
2044 if (BaseOffset.isZero() && !BaseGV)
2047 if (BaseOffset.isScalable())
2052 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2055 BaseOffset, HasBaseReg, Scale);
2072 const SCEV *IncExpr;
2074 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2075 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2082 const SCEV *ExprBase =
nullptr;
2084 IVChain() =
default;
2085 IVChain(
const IVInc &Head,
const SCEV *
Base)
2086 : Incs(1, Head), ExprBase(
Base) {}
2091 const_iterator
begin()
const {
2093 return std::next(Incs.
begin());
2095 const_iterator
end()
const {
2100 bool hasIncs()
const {
return Incs.
size() >= 2; }
2109 bool isProfitableIncrement(
const SCEV *OperExpr,
2110 const SCEV *IncExpr,
2118 SmallPtrSet<Instruction*, 4> FarUsers;
2119 SmallPtrSet<Instruction*, 4> NearUsers;
2125 ScalarEvolution &SE;
2128 AssumptionCache &AC;
2129 TargetLibraryInfo &TLI;
2130 const TargetTransformInfo &
TTI;
2132 MemorySSAUpdater *MSSAU;
2136 bool HardwareLoopProfitable =
false;
2150 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2157 SmallSetVector<Type *, 4> Types;
2163 RegUseTracker RegUses;
2168 static const unsigned MaxChains = 8;
2174 SmallPtrSet<Use*, MaxChains> IVIncSet;
2177 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2183 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2185 void OptimizeShadowIV();
2186 bool FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse);
2188 void OptimizeLoopTermCond();
2190 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2191 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2192 void FinalizeChain(IVChain &Chain);
2193 void CollectChains();
2194 void GenerateIVChain(
const IVChain &Chain,
2195 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2197 void CollectInterestingTypesAndFactors();
2198 void CollectFixupsAndInitialFormulae();
2201 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2204 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2205 LSRUse::KindType Kind, MemAccessTy AccessTy);
2207 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2208 MemAccessTy AccessTy);
2210 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2212 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2214 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2215 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2216 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2217 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2218 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2220 void CollectLoopInvariantFixupsAndFormulae();
2222 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2223 unsigned Depth = 0);
2225 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2227 size_t Idx,
bool IsScaledReg =
false);
2228 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2229 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2230 const Formula &
Base,
size_t Idx,
2231 bool IsScaledReg =
false);
2232 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2233 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2234 const Formula &
Base,
2235 const SmallVectorImpl<Immediate> &Worklist,
2236 size_t Idx,
bool IsScaledReg =
false);
2237 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2238 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2239 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2240 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2241 void GenerateCrossUseConstantOffsets();
2242 void GenerateAllReuseFormulae();
2244 void FilterOutUndesirableDedicatedRegisters();
2246 size_t EstimateSearchSpaceComplexity()
const;
2247 void NarrowSearchSpaceByDetectingSupersets();
2248 void NarrowSearchSpaceByCollapsingUnrolledCode();
2249 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2250 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2251 void NarrowSearchSpaceByFilterPostInc();
2252 void NarrowSearchSpaceByDeletingCostlyFormulas();
2253 void NarrowSearchSpaceByPickingWinnerRegs();
2254 void NarrowSearchSpaceUsingHeuristics();
2256 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2258 SmallVectorImpl<const Formula *> &Workspace,
2259 const Cost &CurCost,
2260 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2261 DenseSet<const SCEV *> &VisitedRegs)
const;
2262 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2266 const SmallVectorImpl<Instruction *> &Inputs)
const;
2269 const LSRUse &LU)
const;
2271 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2273 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2274 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2276 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2277 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2278 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2279 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2282 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2283 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2284 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2286 bool getChanged()
const {
return Changed; }
2287 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2288 return ScalarEvolutionIVs;
2291 void print_factors_and_types(raw_ostream &OS)
const;
2292 void print_fixups(raw_ostream &OS)
const;
2293 void print_uses(raw_ostream &OS)
const;
2294 void print(raw_ostream &OS)
const;
2302void LSRInstance::OptimizeShadowIV() {
2312 Type *DestTy =
nullptr;
2313 bool IsSigned =
false;
2329 DestTy = UCast->getDestTy();
2333 DestTy = SCast->getDestTy();
2335 if (!DestTy)
continue;
2355 if (Mantissa == -1)
continue;
2359 unsigned Entry, Latch;
2369 if (!Init)
continue;
2370 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2374 BinaryOperator *Incr =
2376 if (!Incr)
continue;
2377 if (Incr->
getOpcode() != Instruction::Add
2378 && Incr->
getOpcode() != Instruction::Sub)
2382 ConstantInt *
C =
nullptr;
2394 if (!
C->getValue().isStrictlyPositive())
2402 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2404 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2405 : Instruction::FSub,
2422bool LSRInstance::FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse) {
2423 for (IVStrideUse &U : IU)
2424 if (
U.getUser() ==
Cond) {
2482Instruction *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2497 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2498 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2504 const SCEVNAryExpr *
Max =
nullptr;
2506 Pred = ICmpInst::ICMP_SLE;
2509 Pred = ICmpInst::ICMP_SLT;
2512 Pred = ICmpInst::ICMP_ULT;
2521 if (
Max->getNumOperands() != 2)
2524 const SCEV *MaxLHS =
Max->getOperand(0);
2525 const SCEV *MaxRHS =
Max->getOperand(1);
2530 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2541 "Loop condition operand is an addrec in a different loop!");
2545 Value *NewRHS =
nullptr;
2546 if (ICmpInst::isTrueWhenEqual(Pred)) {
2550 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2551 NewRHS = BO->getOperand(0);
2554 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2555 NewRHS = BO->getOperand(0);
2563 NewRHS = SU->getValue();
2575 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2576 Cond->getOperand(0), NewRHS,
"scmp");
2580 Cond->replaceAllUsesWith(NewCond);
2583 Cond->eraseFromParent();
2585 if (
Cmp->use_empty()) {
2587 Cmp->eraseFromParent();
2594LSRInstance::OptimizeLoopTermCond() {
2595 SmallPtrSet<Instruction *, 4> PostIncs;
2610 SmallVector<BasicBlock*, 8> ExitingBlocks;
2611 L->getExitingBlocks(ExitingBlocks);
2619 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2641 IVStrideUse *CondUse =
nullptr;
2642 if (!FindIVUserForCond(
Cond, CondUse))
2652 Cond = OptimizeMax(Cmp, CondUse);
2657 if (!DT.
dominates(ExitingBlock, LatchBlock))
2662 if (LatchBlock != ExitingBlock)
2663 for (
const IVStrideUse &UI : IU)
2666 if (&UI != CondUse &&
2670 const SCEV *
A = IU.getStride(*CondUse, L);
2671 const SCEV *
B = IU.getStride(UI, L);
2672 if (!
A || !
B)
continue;
2681 if (
const SCEVConstant *
D =
2683 const ConstantInt *
C =
D->getValue();
2685 if (
C->isOne() ||
C->isMinusOne())
2686 goto decline_post_inc;
2688 if (
C->getValue().getSignificantBits() >= 64 ||
2689 C->getValue().isMinSignedValue())
2690 goto decline_post_inc;
2693 MemAccessTy AccessTy =
2695 int64_t Scale =
C->getSExtValue();
2699 AccessTy.AddrSpace))
2700 goto decline_post_inc;
2705 AccessTy.AddrSpace))
2706 goto decline_post_inc;
2711 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2719 if (
Cond->hasOneUse()) {
2720 Cond->moveBefore(TermBr->getIterator());
2725 Cond->setName(
L->getHeader()->getName() +
".termcond");
2726 Cond->insertInto(ExitingBlock, TermBr->getIterator());
2730 TermBr->replaceUsesOfWith(OldCond,
Cond);
2747 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2748 for (Instruction *Inst : PostIncs)
2754bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2755 bool HasBaseReg, LSRUse::KindType Kind,
2756 MemAccessTy AccessTy) {
2757 Immediate NewMinOffset = LU.MinOffset;
2758 Immediate NewMaxOffset = LU.MaxOffset;
2759 MemAccessTy NewAccessTy = AccessTy;
2764 if (LU.Kind != Kind)
2770 if (Kind == LSRUse::Address) {
2771 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2772 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2773 AccessTy.AddrSpace);
2778 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2780 LU.MaxOffset - NewOffset, HasBaseReg))
2782 NewMinOffset = NewOffset;
2783 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2785 NewOffset - LU.MinOffset, HasBaseReg))
2787 NewMaxOffset = NewOffset;
2793 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2794 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2798 LU.MinOffset = NewMinOffset;
2799 LU.MaxOffset = NewMaxOffset;
2800 LU.AccessTy = NewAccessTy;
2807std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2808 LSRUse::KindType Kind,
2809 MemAccessTy AccessTy) {
2810 const SCEV *
Copy = Expr;
2811 SCEVUse ExprUse = Expr;
2819 Offset = Immediate::getFixed(0);
2822 std::pair<UseMapTy::iterator, bool>
P =
2823 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2826 size_t LUIdx =
P.first->second;
2827 LSRUse &LU =
Uses[LUIdx];
2828 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2830 return std::make_pair(LUIdx,
Offset);
2834 size_t LUIdx =
Uses.size();
2835 P.first->second = LUIdx;
2836 Uses.push_back(LSRUse(Kind, AccessTy));
2837 LSRUse &LU =
Uses[LUIdx];
2841 return std::make_pair(LUIdx,
Offset);
2845void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2846 if (&LU != &
Uses.back())
2851 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2857LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2858 const LSRUse &OrigLU) {
2860 for (LSRUse &LU :
Uses) {
2866 if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero &&
2867 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2868 LU.HasFormulaWithSameRegs(OrigF)) {
2870 for (
const Formula &
F : LU.Formulae) {
2873 if (
F.BaseRegs == OrigF.BaseRegs &&
2874 F.ScaledReg == OrigF.ScaledReg &&
2875 F.BaseGV == OrigF.BaseGV &&
2876 F.Scale == OrigF.Scale &&
2877 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2878 if (
F.BaseOffset.isZero())
2893void LSRInstance::CollectInterestingTypesAndFactors() {
2894 SmallSetVector<const SCEV *, 4> Strides;
2898 for (
const IVStrideUse &U : IU) {
2899 const SCEV *Expr = IU.getExpr(U);
2917 }
while (!Worklist.
empty());
2921 for (SmallSetVector<const SCEV *, 4>::const_iterator
2923 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2924 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2925 const SCEV *OldStride = *
I;
2926 const SCEV *NewStride = *NewStrideIter;
2936 if (
const SCEVConstant *Factor =
2939 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2940 Factors.insert(Factor->getAPInt().getSExtValue());
2941 }
else if (
const SCEVConstant *Factor =
2945 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2946 Factors.insert(Factor->getAPInt().getSExtValue());
2952 if (Types.size() == 1)
2964 for(; OI != OE; ++OI) {
2983 return Trunc->getOperand(0);
3016 if (SubExpr->getSCEVType() ==
scAddExpr)
3019 if (SubExpr->getSCEVType() !=
scMulExpr)
3035bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3036 const SCEV *IncExpr,
3037 ScalarEvolution &SE) {
3050 SmallPtrSet<const SCEV*, 8> Processed;
3071 if (!Chain.hasIncs())
3074 if (!
Users.empty()) {
3075 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3077 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3080 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3089 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3092 const SCEV *LastIncExpr =
nullptr;
3093 unsigned NumConstIncrements = 0;
3094 unsigned NumVarIncrements = 0;
3095 unsigned NumReusedIncrements = 0;
3097 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3100 for (
const IVInc &Inc : Chain) {
3101 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3103 if (Inc.IncExpr->isZero())
3109 ++NumConstIncrements;
3113 if (Inc.IncExpr == LastIncExpr)
3114 ++NumReusedIncrements;
3118 LastIncExpr = Inc.IncExpr;
3123 if (NumConstIncrements > 1)
3130 cost += NumVarIncrements;
3134 cost -= NumReusedIncrements;
3136 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3143void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3144 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3148 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3149 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3153 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3154 const SCEV *LastIncExpr =
nullptr;
3155 for (; ChainIdx < NChains; ++ChainIdx) {
3156 IVChain &Chain = IVChainVec[ChainIdx];
3174 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3175 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3179 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3180 LastIncExpr = IncExpr;
3186 if (ChainIdx == NChains) {
3193 LastIncExpr = OperExpr;
3200 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3202 ChainUsersVec.
resize(NChains);
3203 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3204 <<
") IV=" << *LastIncExpr <<
"\n");
3206 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3207 <<
") IV+" << *LastIncExpr <<
"\n");
3209 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3211 IVChain &Chain = IVChainVec[ChainIdx];
3213 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3215 if (!LastIncExpr->
isZero()) {
3216 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3225 for (User *U : IVOper->
users()) {
3231 IVChain::const_iterator IncIter = Chain.Incs.begin();
3232 IVChain::const_iterator IncEnd = Chain.Incs.end();
3233 for( ; IncIter != IncEnd; ++IncIter) {
3234 if (IncIter->UserInst == OtherUse)
3237 if (IncIter != IncEnd)
3242 && IU.isIVUserOrOperand(OtherUse)) {
3245 NearUsers.
insert(OtherUse);
3250 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3275void LSRInstance::CollectChains() {
3279 SmallVector<BasicBlock *,8> LatchPath;
3282 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3288 for (BasicBlock *BB :
reverse(LatchPath)) {
3289 for (Instruction &
I : *BB) {
3295 if (IU.isEphemeral(&
I))
3305 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3306 ChainIdx < NChains; ++ChainIdx) {
3307 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3310 SmallPtrSet<Instruction*, 4> UniqueOperands;
3313 while (IVOpIter != IVOpEnd) {
3315 if (UniqueOperands.
insert(IVOpInst).second)
3316 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3317 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3322 for (PHINode &PN :
L->getHeader()->phis()) {
3329 ChainInstruction(&PN, IncV, ChainUsersVec);
3332 unsigned ChainIdx = 0;
3333 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3334 UsersIdx < NChains; ++UsersIdx) {
3336 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3339 if (ChainIdx != UsersIdx)
3340 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3341 FinalizeChain(IVChainVec[ChainIdx]);
3344 IVChainVec.resize(ChainIdx);
3347void LSRInstance::FinalizeChain(IVChain &Chain) {
3348 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3349 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3351 for (
const IVInc &Inc : Chain) {
3353 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3354 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3355 IVIncSet.insert(UseI);
3363 Immediate IncOffset = Immediate::getZero();
3372 C->getSignificantBits() > 64)
3374 IncOffset = Immediate::getScalable(
C->getSExtValue());
3390void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3391 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3394 const IVInc &Head = Chain.Incs[0];
3399 Value *IVSrc =
nullptr;
3400 while (IVOpIter != IVOpEnd) {
3411 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3412 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3415 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3417 if (IVOpIter == IVOpEnd) {
3419 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3422 assert(IVSrc &&
"Failed to find IV chain source");
3427 const SCEV *LeftOverExpr =
nullptr;
3428 const SCEV *Accum = SE.
getZero(IntTy);
3432 for (
const IVInc &Inc : Chain) {
3435 InsertPt =
L->getLoopLatch()->getTerminator();
3439 Value *IVOper = IVSrc;
3440 if (!Inc.IncExpr->isZero()) {
3445 LeftOverExpr = LeftOverExpr ?
3446 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3450 bool FoundBase =
false;
3451 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3452 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3454 if (!Remainder->
isZero()) {
3456 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3457 const SCEV *IVOperExpr =
3459 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3468 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3471 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3474 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3478 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3481 LeftOverExpr =
nullptr;
3485 if (IVTy != OperTy) {
3487 "cannot extend a chained IV");
3489 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3491 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3498 for (PHINode &Phi :
L->getHeader()->phis()) {
3502 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3505 Value *IVOper = IVSrc;
3507 if (IVTy != PostIncTy) {
3509 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3510 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3511 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3513 Phi.replaceUsesOfWith(PostIncV, IVOper);
3519void LSRInstance::CollectFixupsAndInitialFormulae() {
3520 CondBrInst *ExitBranch =
nullptr;
3521 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3524 SmallPtrSet<const SCEV *, 16> Regs;
3525 DenseSet<const SCEV *> VisitedRegs;
3526 DenseSet<size_t> VisitedLSRUse;
3528 for (
const IVStrideUse &U : IU) {
3533 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3534 if (IVIncSet.count(UseI)) {
3535 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3539 LSRUse::KindType
Kind = LSRUse::Basic;
3540 MemAccessTy AccessTy;
3542 Kind = LSRUse::Address;
3546 const SCEV *S = IU.getExpr(U);
3562 if (CI->isEquality()) {
3565 Value *
NV = CI->getOperand(1);
3566 if (NV ==
U.getOperandValToReplace()) {
3567 CI->setOperand(1, CI->getOperand(0));
3568 CI->setOperand(0, NV);
3569 NV = CI->getOperand(1);
3576 (!
NV->getType()->isPointerTy() ||
3583 Kind = LSRUse::ICmpZero;
3585 }
else if (
L->isLoopInvariant(NV) &&
3588 !
NV->getType()->isPointerTy()) {
3599 Kind = LSRUse::ICmpZero;
3606 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3607 if (Factors[i] != -1)
3608 Factors.insert(-(uint64_t)Factors[i]);
3614 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3615 size_t LUIdx =
P.first;
3617 LSRUse &LU =
Uses[LUIdx];
3620 LSRFixup &LF = LU.getNewFixup();
3621 LF.UserInst = UserInst;
3622 LF.OperandValToReplace =
U.getOperandValToReplace();
3623 LF.PostIncLoops = TmpPostIncLoops;
3625 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3626 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3629 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3631 F.initialMatch(S, L, SE);
3632 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3633 HardwareLoopProfitable);
3634 VisitedLSRUse.
insert(LUIdx);
3638 if (LU.Formulae.empty()) {
3639 InsertInitialFormula(S, LU, LUIdx);
3640 CountRegisters(LU.Formulae.back(), LUIdx);
3649void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3653 LU.RigidFormula =
true;
3656 F.initialMatch(S, L, SE);
3657 bool Inserted = InsertFormula(LU, LUIdx,
F);
3658 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3664LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3665 LSRUse &LU,
size_t LUIdx) {
3667 F.BaseRegs.push_back(S);
3668 F.HasBaseReg =
true;
3669 bool Inserted = InsertFormula(LU, LUIdx,
F);
3670 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3674void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3676 RegUses.countRegister(
F.ScaledReg, LUIdx);
3677 for (
const SCEV *BaseReg :
F.BaseRegs)
3678 RegUses.countRegister(BaseReg, LUIdx);
3683bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3686 "Formula is illegal");
3688 if (!LU.InsertFormula(
F, *L))
3691 CountRegisters(
F, LUIdx);
3697bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3709LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3711 SmallPtrSet<const SCEV *, 32> Visited;
3718 while (!Worklist.
empty()) {
3722 if (!Visited.
insert(S).second)
3733 const Value *
V = US->getValue();
3736 if (
L->contains(Inst))
continue;
3740 for (
const Use &U :
V->uses()) {
3750 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3772 bool HasIncompatibleEHPTerminatedBlock =
false;
3774 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3775 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3776 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3777 HasIncompatibleEHPTerminatedBlock =
true;
3782 if (HasIncompatibleEHPTerminatedBlock) {
3805 unsigned OtherIdx = !
U.getOperandNo();
3806 Value *OtherOp = ICI->getOperand(OtherIdx);
3816 std::pair<size_t, Immediate>
P =
3817 getUse(S, LSRUse::Basic, MemAccessTy());
3818 size_t LUIdx =
P.first;
3820 LSRUse &LU =
Uses[LUIdx];
3821 LSRFixup &LF = LU.getNewFixup();
3822 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3823 LF.OperandValToReplace =
U;
3825 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3826 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3827 InsertSupplementalFormula(US, LU, LUIdx);
3828 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3844 unsigned Depth = 0) {
3851 for (
const SCEV *S :
Add->operands()) {
3858 const SCEV *Start, *Step;
3863 if (Start->isZero())
3872 Remainder =
nullptr;
3874 if (Remainder != Start) {
3896 LSRUse &LU,
const SCEV *S,
const Loop *L,
3898 if (LU.Kind != LSRUse::Address ||
3899 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3905 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3914void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3915 const Formula &
Base,
3916 unsigned Depth,
size_t Idx,
3918 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3926 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3930 if (AddOps.
size() == 1)
3944 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3949 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3953 if (InnerAddOps.size() == 1 &&
3955 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3958 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
3963 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3972 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3975 F.ScaledReg =
nullptr;
3978 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3979 }
else if (IsScaledReg)
3980 F.ScaledReg = InnerSum;
3982 F.BaseRegs[Idx] = InnerSum;
3990 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3993 F.BaseRegs.push_back(*J);
3998 if (InsertFormula(LU, LUIdx,
F))
4005 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4011void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4013 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4018 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4019 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4021 if (
Base.Scale == 1)
4022 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4028void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4031 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4032 (
Base.UnfoldedOffset.isNonZero()) <=
4040 Formula NewBase =
Base;
4041 NewBase.BaseRegs.clear();
4042 Type *CombinedIntegerType =
nullptr;
4043 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4046 if (!CombinedIntegerType)
4048 Ops.push_back(BaseReg);
4051 NewBase.BaseRegs.push_back(BaseReg);
4055 if (
Ops.size() == 0)
4060 auto GenerateFormula = [&](
const SCEV *Sum) {
4061 Formula
F = NewBase;
4069 F.BaseRegs.push_back(Sum);
4071 (void)InsertFormula(LU, LUIdx,
F);
4075 if (
Ops.size() > 1) {
4082 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4083 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4085 NewBase.UnfoldedOffset.getFixedValue(),
true));
4086 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4092void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4093 const Formula &
Base,
size_t Idx,
4095 SCEVUse
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4097 if (
G->isZero() || !GV)
4101 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4106 F.BaseRegs[Idx] =
G;
4107 (void)InsertFormula(LU, LUIdx,
F);
4111void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4114 if (
Base.BaseGV)
return;
4116 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4117 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4118 if (
Base.Scale == 1)
4119 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4124void LSRInstance::GenerateConstantOffsetsImpl(
4125 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4126 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4128 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4130 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4132 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4134 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4136 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4142 F.ScaledReg =
nullptr;
4144 F.deleteBaseReg(
F.BaseRegs[Idx]);
4146 }
else if (IsScaledReg)
4149 F.BaseRegs[Idx] = NewG;
4151 (void)InsertFormula(LU, LUIdx,
F);
4155 SCEVUse
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4166 const APInt *StepInt;
4171 for (Immediate
Offset : Worklist) {
4173 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4179 for (Immediate
Offset : Worklist)
4183 if (
G->isZero() ||
Imm.isZero() ||
4184 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4187 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4188 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4193 F.BaseRegs[Idx] =
G;
4198 (void)InsertFormula(LU, LUIdx,
F);
4202void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4208 if (LU.MaxOffset != LU.MinOffset)
4211 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4212 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4213 if (
Base.Scale == 1)
4214 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4220void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4222 if (LU.Kind != LSRUse::ICmpZero)
return;
4230 if (LU.MinOffset != LU.MaxOffset)
return;
4233 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4235 for (
const SCEV *BaseReg :
Base.BaseRegs)
4236 if (
BaseReg->getType()->isPointerTy())
4238 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4241 for (int64_t Factor : Factors) {
4246 if (
Base.BaseOffset.isMin() && Factor == -1)
4249 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4251 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4252 assert(Factor != 0 &&
"Zero factor not expected!");
4253 if (NewBaseOffset.getFixedValue() / Factor !=
4254 Base.BaseOffset.getFixedValue())
4262 Immediate
Offset = LU.MinOffset;
4263 if (
Offset.isMin() && Factor == -1)
4266 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4274 F.BaseOffset = NewBaseOffset;
4281 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4283 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4286 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4300 if (
F.UnfoldedOffset.isNonZero()) {
4301 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4303 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4304 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4305 Base.UnfoldedOffset.getFixedValue())
4309 IntTy,
F.UnfoldedOffset.getFixedValue()))
4314 (void)InsertFormula(LU, LUIdx,
F);
4321void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4328 if (
Base.Scale != 0 && !
Base.unscale())
4331 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4334 for (int64_t Factor : Factors) {
4335 Base.Scale = Factor;
4336 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4338 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4342 if (LU.Kind == LSRUse::Basic &&
4343 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4344 LU.AccessTy,
Base) &&
4345 LU.AllFixupsOutsideLoop)
4346 LU.Kind = LSRUse::Special;
4352 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4353 Base.BaseOffset.isZero() && !
Base.BaseGV)
4356 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4358 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4359 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4364 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4365 if (!Quotient->isZero()) {
4368 F.ScaledReg = Quotient;
4369 F.deleteBaseReg(
F.BaseRegs[i]);
4373 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4374 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4378 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4380 (void)InsertFormula(LU, LUIdx,
F);
4396 const SCEV *Result =
nullptr;
4397 for (
auto &L :
Loops) {
4401 if (!New || (Result && New != Result))
4406 assert(Result &&
"failed to create expression");
4411void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4413 if (
Base.BaseGV)
return;
4423 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4426 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4430 for (
auto &LF : LU.Fixups)
4431 Loops.push_back(LF.PostIncLoops);
4433 for (
Type *SrcTy : Types) {
4442 const SCEV *NewScaledReg =
4444 if (!NewScaledReg || NewScaledReg->
isZero())
4446 F.ScaledReg = NewScaledReg;
4448 bool HasZeroBaseReg =
false;
4449 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4450 const SCEV *NewBaseReg =
4452 if (!NewBaseReg || NewBaseReg->
isZero()) {
4453 HasZeroBaseReg =
true;
4463 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4467 (void)InsertFormula(LU, LUIdx,
F);
4480 const SCEV *OrigReg;
4482 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4483 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4485 void print(raw_ostream &OS)
const;
4491#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4492void WorkItem::print(raw_ostream &OS)
const {
4493 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4494 <<
" , add offset " <<
Imm;
4504void LSRInstance::GenerateCrossUseConstantOffsets() {
4506 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4508 DenseMap<const SCEV *, ImmMapTy>
Map;
4509 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4511 for (
const SCEV *Use : RegUses) {
4514 auto Pair =
Map.try_emplace(
Reg);
4517 Pair.first->second.insert(std::make_pair(Imm, Use));
4518 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4525 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4527 for (
const SCEV *
Reg : Sequence) {
4528 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4531 if (Imms.size() == 1)
4535 for (
const auto &Entry
4537 <<
' ' <<
Entry.first;
4541 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4543 const SCEV *OrigReg = J->second;
4545 Immediate JImm = J->first;
4546 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4549 UsedByIndicesMap[
Reg].
count() == 1) {
4557 Immediate
First = Imms.begin()->first;
4558 Immediate
Last = std::prev(Imms.end())->first;
4559 if (!
First.isCompatibleImmediate(
Last)) {
4566 bool Scalable =
First.isScalable() ||
Last.isScalable();
4567 int64_t FI =
First.getKnownMinValue();
4568 int64_t LI =
Last.getKnownMinValue();
4571 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4574 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4575 ImmMapTy::const_iterator OtherImms[] = {
4576 Imms.begin(), std::prev(Imms.end()),
4577 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4578 for (
const auto &M : OtherImms) {
4579 if (M == J || M == JE)
continue;
4580 if (!JImm.isCompatibleImmediate(
M->first))
4584 Immediate
Imm = JImm.subUnsigned(
M->first);
4585 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4587 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4588 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4595 UsedByIndicesMap.
clear();
4596 UniqueItems.
clear();
4599 for (
const WorkItem &WI : WorkItems) {
4600 size_t LUIdx = WI.LUIdx;
4601 LSRUse &LU =
Uses[LUIdx];
4602 Immediate
Imm = WI.Imm;
4603 const SCEV *OrigReg = WI.OrigReg;
4606 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4610 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4611 Formula
F = LU.Formulae[
L];
4618 if (
F.ScaledReg == OrigReg) {
4619 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4621 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4623 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4624 if (
F.referencesReg(S))
4627 NewF.BaseOffset =
Offset;
4628 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4631 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4640 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4642 if (
C->getValue()->isNegative() !=
4643 (NewF.BaseOffset.isLessThanZero()) &&
4644 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4645 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4650 NewF.canonicalize(*this->L);
4651 (void)InsertFormula(LU, LUIdx, NewF);
4654 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4656 if (BaseReg != OrigReg)
4659 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4660 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4661 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4663 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4665 LU.Kind, LU.AccessTy, NewF)) {
4669 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4673 NewF.UnfoldedOffset = NewUnfoldedOffset;
4675 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4680 for (
const SCEV *NewReg : NewF.BaseRegs)
4682 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4684 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4686 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4687 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4690 NewF.BaseOffset.getFixedValue()))
4695 NewF.canonicalize(*this->L);
4696 (void)InsertFormula(LU, LUIdx, NewF);
4707LSRInstance::GenerateAllReuseFormulae() {
4710 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4711 LSRUse &LU =
Uses[LUIdx];
4712 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4713 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4714 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4715 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4717 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4718 LSRUse &LU =
Uses[LUIdx];
4719 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4721 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4722 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4723 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4724 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4725 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4726 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4728 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4729 LSRUse &LU =
Uses[LUIdx];
4730 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4734 GenerateCrossUseConstantOffsets();
4737 "After generating reuse formulae:\n";
4738 print_uses(
dbgs()));
4743void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4744 DenseSet<const SCEV *> VisitedRegs;
4745 SmallPtrSet<const SCEV *, 16> Regs;
4746 SmallPtrSet<const SCEV *, 16> LoserRegs;
4748 bool ChangedFormulae =
false;
4753 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4755 BestFormulaeTy BestFormulae;
4757 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4758 LSRUse &LU =
Uses[LUIdx];
4763 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4764 FIdx != NumForms; ++FIdx) {
4765 Formula &
F = LU.Formulae[FIdx];
4776 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4778 if (CostF.isLoser()) {
4790 for (
const SCEV *
Reg :
F.BaseRegs) {
4791 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4795 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4796 Key.push_back(
F.ScaledReg);
4801 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4802 BestFormulae.insert(std::make_pair(
Key, FIdx));
4806 Formula &Best = LU.Formulae[
P.first->second];
4808 Cost CostBest(L, SE,
TTI, AMK);
4810 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4811 HardwareLoopProfitable);
4812 if (CostF.isLess(CostBest))
4816 " in favor of formula ";
4817 Best.print(
dbgs());
dbgs() <<
'\n');
4820 ChangedFormulae =
true;
4822 LU.DeleteFormula(
F);
4830 LU.RecomputeRegs(LUIdx, RegUses);
4833 BestFormulae.clear();
4838 "After filtering out undesirable candidates:\n";
4846size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4848 for (
const LSRUse &LU :
Uses) {
4849 size_t FSize = LU.Formulae.size();
4864void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4868 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4869 "which use a superset of registers used by other "
4872 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4873 LSRUse &LU =
Uses[LUIdx];
4875 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4876 Formula &
F = LU.Formulae[i];
4877 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4883 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4889 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4890 (uint64_t)
C->getValue()->getSExtValue());
4891 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4892 (
I -
F.BaseRegs.begin()));
4893 if (LU.HasFormulaWithSameRegs(NewF)) {
4896 LU.DeleteFormula(
F);
4907 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4908 (
I -
F.BaseRegs.begin()));
4909 if (LU.HasFormulaWithSameRegs(NewF)) {
4912 LU.DeleteFormula(
F);
4923 LU.RecomputeRegs(LUIdx, RegUses);
4932void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4937 dbgs() <<
"The search space is too complex.\n"
4938 "Narrowing the search space by assuming that uses separated "
4939 "by a constant offset will use the same registers.\n");
4943 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4944 LSRUse &LU =
Uses[LUIdx];
4945 for (
const Formula &
F : LU.Formulae) {
4946 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4948 assert((LU.Kind == LSRUse::Address || LU.Kind == LSRUse::ICmpZero) &&
4949 "Only address and cmp uses expected to have nonzero BaseOffset");
4951 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4955 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4956 LU.Kind, LU.AccessTy))
4961 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4962 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4965 for (LSRFixup &
Fixup : LU.Fixups) {
4966 Fixup.Offset +=
F.BaseOffset;
4967 LUThatHas->pushFixup(
Fixup);
4972 Type *FixupType = LUThatHas->Fixups[0].OperandValToReplace->getType();
4973 for (LSRFixup &
Fixup : LUThatHas->Fixups)
4974 assert(
Fixup.OperandValToReplace->getType() == FixupType &&
4975 "Expected all fixups to have the same type");
4980 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4981 Formula &
F = LUThatHas->Formulae[i];
4982 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4983 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4985 LUThatHas->DeleteFormula(
F);
4993 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4996 DeleteUse(LU, LUIdx);
5009void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5013 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5014 "undesirable dedicated registers.\n");
5016 FilterOutUndesirableDedicatedRegisters();
5031void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5036 dbgs() <<
"The search space is too complex.\n"
5037 "Narrowing the search space by choosing the best Formula "
5038 "from the Formulae with the same Scale and ScaledReg.\n");
5041 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5043 BestFormulaeTy BestFormulae;
5045 bool ChangedFormulae =
false;
5047 DenseSet<const SCEV *> VisitedRegs;
5048 SmallPtrSet<const SCEV *, 16> Regs;
5050 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5051 LSRUse &LU =
Uses[LUIdx];
5056 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5061 size_t FARegNum = 0;
5062 for (
const SCEV *
Reg : FA.BaseRegs) {
5063 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5064 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5066 size_t FBRegNum = 0;
5067 for (
const SCEV *
Reg : FB.BaseRegs) {
5068 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5069 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5071 if (FARegNum != FBRegNum)
5072 return FARegNum < FBRegNum;
5079 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5081 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5082 return CostFA.isLess(CostFB);
5086 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5088 Formula &
F = LU.Formulae[FIdx];
5091 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5095 Formula &Best = LU.Formulae[
P.first->second];
5096 if (IsBetterThan(
F, Best))
5100 " in favor of formula ";
5101 Best.print(
dbgs());
dbgs() <<
'\n');
5103 ChangedFormulae =
true;
5105 LU.DeleteFormula(
F);
5111 LU.RecomputeRegs(LUIdx, RegUses);
5114 BestFormulae.clear();
5119 "After filtering out undesirable candidates:\n";
5126void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5133 "Narrowing the search space by choosing the lowest "
5134 "register Formula for PostInc Uses.\n");
5136 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5137 LSRUse &LU =
Uses[LUIdx];
5139 if (LU.Kind != LSRUse::Address)
5145 size_t MinRegs = std::numeric_limits<size_t>::max();
5146 for (
const Formula &
F : LU.Formulae)
5147 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5150 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5152 Formula &
F = LU.Formulae[FIdx];
5153 if (
F.getNumRegs() > MinRegs) {
5156 LU.DeleteFormula(
F);
5163 LU.RecomputeRegs(LUIdx, RegUses);
5214void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5223 SmallPtrSet<const SCEV *, 4> UniqRegs;
5227 DenseMap <const SCEV *, float> RegNumMap;
5228 for (
const SCEV *
Reg : RegUses) {
5232 for (
const LSRUse &LU :
Uses) {
5233 if (!LU.Regs.count(
Reg))
5235 float P = LU.getNotSelectedProbability(
Reg);
5241 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5245 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5248 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5249 LSRUse &LU =
Uses[LUIdx];
5251 if (LU.Formulae.size() < 2)
5256 float FMinRegNum = LU.Formulae[0].getNumRegs();
5257 float FMinARegNum = LU.Formulae[0].getNumRegs();
5259 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5260 Formula &
F = LU.Formulae[i];
5263 for (
const SCEV *BaseReg :
F.BaseRegs) {
5264 if (UniqRegs.
count(BaseReg))
5266 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5269 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5271 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5272 if (!UniqRegs.
count(ScaledReg)) {
5274 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5277 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5280 if (FMinRegNum > FRegNum ||
5281 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5282 FMinRegNum = FRegNum;
5283 FMinARegNum = FARegNum;
5288 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5290 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5291 while (LU.Formulae.size() != 1) {
5294 LU.Formulae.pop_back();
5296 LU.RecomputeRegs(LUIdx, RegUses);
5297 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5298 Formula &
F = LU.Formulae[0];
5314 MemAccessTy AccessType) {
5324 return TTI.isLegalAddressingMode(
5325 AccessType.MemTy,
nullptr,
5326 Diff->getSExtValue(),
5327 true, 0, AccessType.AddrSpace) &&
5328 !
TTI.isLegalAddressingMode(
5329 AccessType.MemTy,
nullptr,
5330 -Diff->getSExtValue(),
5331 true, 0, AccessType.AddrSpace);
5337void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5340 SmallPtrSet<const SCEV *, 4> Taken;
5348 const SCEV *Best =
nullptr;
5349 unsigned BestNum = 0;
5350 for (
const SCEV *
Reg : RegUses) {
5355 BestNum = RegUses.getUsedByIndices(
Reg).count();
5357 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5358 if (
Count > BestNum) {
5366 if (
Count == BestNum) {
5367 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5368 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5370 Uses[LUIdx].AccessTy)) {
5377 assert(Best &&
"Failed to find best LSRUse candidate");
5379 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5380 <<
" will yield profitable reuse.\n");
5385 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5386 LSRUse &LU =
Uses[LUIdx];
5387 if (!LU.Regs.count(Best))
continue;
5390 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5391 Formula &
F = LU.Formulae[i];
5392 if (!
F.referencesReg(Best)) {
5394 LU.DeleteFormula(
F);
5398 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5404 LU.RecomputeRegs(LUIdx, RegUses);
5415void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5416 NarrowSearchSpaceByDetectingSupersets();
5417 NarrowSearchSpaceByCollapsingUnrolledCode();
5418 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5420 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5421 NarrowSearchSpaceByFilterPostInc();
5423 NarrowSearchSpaceByDeletingCostlyFormulas();
5425 NarrowSearchSpaceByPickingWinnerRegs();
5429void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5431 SmallVectorImpl<const Formula *> &Workspace,
5432 const Cost &CurCost,
5433 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5434 DenseSet<const SCEV *> &VisitedRegs)
const {
5445 const LSRUse &LU =
Uses[Workspace.
size()];
5451 SmallSetVector<const SCEV *, 4> ReqRegs;
5452 for (
const SCEV *S : CurRegs)
5453 if (LU.Regs.count(S))
5456 SmallPtrSet<const SCEV *, 16> NewRegs;
5457 Cost NewCost(L, SE,
TTI, AMK);
5458 for (
const Formula &
F : LU.Formulae) {
5466 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5467 for (
const SCEV *
Reg : ReqRegs) {
5468 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5471 if (NumReqRegsToFind == 0)
5475 if (NumReqRegsToFind != 0) {
5486 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5487 if (NewCost.isLess(SolutionCost)) {
5489 if (Workspace.
size() !=
Uses.size()) {
5490 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5491 NewRegs, VisitedRegs);
5492 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5493 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5496 dbgs() <<
".\nRegs:\n";
5497 for (
const SCEV *S : NewRegs)
dbgs()
5498 <<
"- " << *S <<
"\n";
5501 SolutionCost = NewCost;
5502 Solution = Workspace;
5511void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5513 Cost SolutionCost(L, SE,
TTI, AMK);
5514 SolutionCost.Lose();
5515 Cost CurCost(L, SE,
TTI, AMK);
5516 SmallPtrSet<const SCEV *, 16> CurRegs;
5517 DenseSet<const SCEV *> VisitedRegs;
5521 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5522 CurRegs, VisitedRegs);
5523 if (Solution.
empty()) {
5530 "The chosen solution requires ";
5531 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5532 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5537 Solution[i]->print(
dbgs());
5543 const bool EnableDropUnprofitableSolution = [&] {
5555 if (BaselineCost.isLess(SolutionCost)) {
5556 if (!EnableDropUnprofitableSolution)
5558 dbgs() <<
"Baseline is more profitable than chosen solution, "
5559 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5562 "solution, dropping LSR solution.\n";);
5573 const SmallVectorImpl<Instruction *> &Inputs)
5577 bool AllDominate =
true;
5584 for (Instruction *Inst : Inputs) {
5585 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5586 AllDominate =
false;
5591 if (Tentative->
getParent() == Inst->getParent() &&
5592 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5602 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5603 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5607 if (!Rung)
return IP;
5608 Rung = Rung->getIDom();
5609 if (!Rung)
return IP;
5610 IDom = Rung->getBlock();
5613 const Loop *IDomLoop = LI.getLoopFor(IDom);
5614 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5615 if (IDomDepth <= IPLoopDepth &&
5616 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5633 SmallVector<Instruction *, 4> Inputs;
5636 if (LU.Kind == LSRUse::ICmpZero)
5637 if (Instruction *
I =
5640 if (LF.PostIncLoops.
count(L)) {
5641 if (LF.isUseFullyOutsideLoop(L))
5642 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5648 for (
const Loop *PIL : LF.PostIncLoops) {
5649 if (PIL == L)
continue;
5654 if (!ExitingBlocks.
empty()) {
5656 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5663 "Insertion point must be a normal instruction");
5673 while (IP->isEHPad()) ++IP;
5678 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5686Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5688 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5689 if (LU.RigidFormula)
5690 return LF.OperandValToReplace;
5694 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5699 Rewriter.setPostInc(LF.PostIncLoops);
5704 Type *Ty =
F.getType();
5718 for (
const SCEV *
Reg :
F.BaseRegs) {
5719 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5727 Value *ICmpScaledV =
nullptr;
5729 const SCEV *ScaledS =
F.ScaledReg;
5735 if (LU.Kind == LSRUse::ICmpZero) {
5745 "The only scale supported by ICmpZero uses is -1!");
5746 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5754 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5764 Ops.push_back(ScaledS);
5790 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5791 "Expanding mismatched offsets\n");
5793 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5794 if (
Offset.isNonZero()) {
5795 if (LU.Kind == LSRUse::ICmpZero) {
5802 IntTy, -(uint64_t)
Offset.getFixedValue(),
true);
5811 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5816 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5817 if (UnfoldedOffset.isNonZero()) {
5819 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5823 const SCEV *FullS =
Ops.empty() ?
5834 if (LU.Kind == LSRUse::ICmpZero) {
5838 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5839 "a scale at the same time!");
5840 if (
F.Scale == -1) {
5841 if (ICmpScaledV->
getType() != OpTy) {
5851 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5852 "ICmp does not support folding a global value and "
5853 "a scale at the same time!");
5857 -(uint64_t)
Offset.getFixedValue(),
5859 if (
C->getType() != OpTy) {
5863 assert(
C &&
"Cast of ConstantInt should have folded");
5876void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
5877 const LSRFixup &LF,
const Formula &
F,
5878 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5879 DenseMap<BasicBlock *, Value *>
Inserted;
5883 bool needUpdateFixups =
false;
5894 Loop *PNLoop = LI.getLoopFor(Parent);
5895 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5901 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5902 .setMergeIdenticalEdges()
5903 .setKeepOneInputPHIs());
5906 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5917 if (
L->contains(BB) && !
L->contains(PN))
5925 needUpdateFixups =
true;
5930 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5943 LF.OperandValToReplace->
getType(),
"tmp",
5950 if (
L->contains(
I) && !
L->contains(BB))
5951 InsertedNonLCSSAInsts.insert(
I);
5954 Pair.first->second = FullV;
5961 if (needUpdateFixups) {
5962 for (LSRUse &LU :
Uses)
5963 for (LSRFixup &
Fixup : LU.Fixups)
5967 if (
Fixup.UserInst == PN) {
5970 bool foundInOriginalPHI =
false;
5972 if (val ==
Fixup.OperandValToReplace) {
5973 foundInOriginalPHI =
true;
5978 if (foundInOriginalPHI)
5989 if (val ==
Fixup.OperandValToReplace)
5990 Fixup.UserInst = NewPN;
6000void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6002 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6006 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6012 if (FullV->
getType() != OpTy) {
6024 if (LU.Kind == LSRUse::ICmpZero)
6040 const LSRFixup &
Fixup,
const LSRUse &LU,
6044 if (LU.Kind != LSRUse::Address)
6045 return IVIncInsertPos;
6049 Type *Ty =
I->getType();
6052 return IVIncInsertPos;
6059 return IVIncInsertPos;
6066void LSRInstance::ImplementSolution(
6067 const SmallVectorImpl<const Formula *> &Solution) {
6073 for (
const IVChain &Chain : IVChainVec) {
6079 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6080 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6083 Rewriter.setIVIncInsertPos(L, InsertPos);
6084 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6088 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6091 for (
const IVChain &Chain : IVChainVec) {
6092 GenerateIVChain(Chain, DeadInsts);
6096 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6114 for (PHINode &PN :
L->getHeader()->phis()) {
6115 BinaryOperator *BO =
nullptr;
6121 case Instruction::Sub:
6126 case Instruction::Add:
6143 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6152LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6153 DominatorTree &DT, LoopInfo &LI,
6154 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6155 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6156 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6159 :
TTI.getPreferredAddressingMode(
L, &SE)),
6162 if (!
L->isLoopSimplifyForm())
6170 unsigned NumUsers = 0;
6174 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6182 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6192 L->getHeader()->printAsOperand(
dbgs(),
false);
6198 HardwareLoopProfitable =
6199 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6203#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6206 Rewriter.disableCanonicalMode();
6207 Rewriter.enableLSRMode();
6211 OptimizeLoopTermCond();
6214 if (IU.empty())
return;
6217 if (!
L->isInnermost()) {
6230 CollectInterestingTypesAndFactors();
6231 CollectFixupsAndInitialFormulae();
6232 CollectLoopInvariantFixupsAndFormulae();
6238 print_uses(
dbgs()));
6240 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6244 GenerateAllReuseFormulae();
6246 FilterOutUndesirableDedicatedRegisters();
6247 NarrowSearchSpaceUsingHeuristics();
6257 if (Solution.
empty())
6262 for (
const LSRUse &LU :
Uses) {
6263 for (
const Formula &
F : LU.Formulae)
6265 F) &&
"Illegal formula generated!");
6270 ImplementSolution(Solution);
6273#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6274void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6275 if (Factors.empty() &&
Types.empty())
return;
6277 OS <<
"LSR has identified the following interesting factors and types: ";
6280 for (int64_t Factor : Factors)
6281 OS <<
LS <<
'*' << Factor;
6283 for (
Type *Ty : Types)
6284 OS <<
LS <<
'(' << *Ty <<
')';
6288void LSRInstance::print_fixups(raw_ostream &OS)
const {
6289 OS <<
"LSR is examining the following fixup sites:\n";
6290 for (
const LSRUse &LU :
Uses)
6291 for (
const LSRFixup &LF : LU.Fixups) {
6298void LSRInstance::print_uses(raw_ostream &OS)
const {
6299 OS <<
"LSR is examining the following uses:\n";
6300 for (
const LSRUse &LU :
Uses) {
6304 for (
const Formula &
F : LU.Formulae) {
6312void LSRInstance::print(raw_ostream &OS)
const {
6313 print_factors_and_types(OS);
6325class LoopStrengthReduce :
public LoopPass {
6329 LoopStrengthReduce();
6332 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6333 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6338LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6342void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6369ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6370 llvm::DIExpression::expr_op_iterator Begin =
6371 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6372 llvm::DIExpression::expr_op_iterator End =
6373 llvm::DIExpression::expr_op_iterator(Expr.
end());
6374 return {Begin, End};
6377struct SCEVDbgValueBuilder {
6378 SCEVDbgValueBuilder() =
default;
6379 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6381 void clone(
const SCEVDbgValueBuilder &
Base) {
6382 LocationOps =
Base.LocationOps;
6387 LocationOps.
clear();
6394 SmallVector<Value *, 2> LocationOps;
6397 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6404 unsigned ArgIndex = 0;
6405 if (It != LocationOps.
end()) {
6406 ArgIndex = std::distance(LocationOps.
begin(), It);
6408 ArgIndex = LocationOps.
size();
6414 void pushValue(
const SCEVUnknown *U) {
6419 bool pushConst(
const SCEVConstant *
C) {
6420 if (
C->getAPInt().getSignificantBits() > 64)
6422 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6423 Expr.
push_back(
C->getAPInt().getSExtValue());
6430 return ToDwarfOpIter(Expr);
6435 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6438 "Expected arithmetic SCEV type");
6440 unsigned EmitOperator = 0;
6441 for (
const auto &
Op : CommExpr->
operands()) {
6444 if (EmitOperator >= 1)
6445 pushOperator(DwarfOp);
6452 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6453 const llvm::SCEV *Inner =
C->getOperand(0);
6454 const llvm::Type *
Type =
C->getType();
6455 uint64_t ToWidth =
Type->getIntegerBitWidth();
6456 bool Success = pushSCEV(Inner);
6458 IsSigned ? llvm::dwarf::DW_ATE_signed
6459 : llvm::dwarf::DW_ATE_unsigned};
6460 for (
const auto &
Op : CastOps)
6466 bool pushSCEV(
const llvm::SCEV *S) {
6469 Success &= pushConst(StartInt);
6474 pushLocation(
U->getValue());
6477 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6480 Success &= pushSCEV(UDiv->getLHS());
6481 Success &= pushSCEV(UDiv->getRHS());
6482 pushOperator(llvm::dwarf::DW_OP_div);
6489 "Unexpected cast type in SCEV.");
6493 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6508 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6510 if (
C->getAPInt().getSignificantBits() > 64)
6512 int64_t
I =
C->getAPInt().getSExtValue();
6514 case llvm::dwarf::DW_OP_plus:
6515 case llvm::dwarf::DW_OP_minus:
6517 case llvm::dwarf::DW_OP_mul:
6518 case llvm::dwarf::DW_OP_div:
6531 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6537 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6538 if (!pushSCEV(Stride))
6540 pushOperator(llvm::dwarf::DW_OP_mul);
6542 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6543 if (!pushSCEV(Start))
6545 pushOperator(llvm::dwarf::DW_OP_plus);
6551 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6552 pushLocation(OffsetValue);
6555 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6556 << std::to_string(
Offset) <<
"\n");
6562 bool createIterCountExpr(
const SCEV *S,
6563 const SCEVDbgValueBuilder &IterationCount,
6564 ScalarEvolution &SE) {
6573 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6577 if (!Rec->isAffine())
6585 clone(IterationCount);
6586 if (!SCEVToValueExpr(*Rec, SE))
6597 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6598 ScalarEvolution &SE) {
6604 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6605 if (!pushSCEV(Start))
6607 pushOperator(llvm::dwarf::DW_OP_minus);
6609 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6610 if (!pushSCEV(Stride))
6612 pushOperator(llvm::dwarf::DW_OP_div);
6620 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6621 SmallVectorImpl<Value *> &DestLocations) {
6623 "Expected the locations vector to contain the IV");
6628 "Expected the location ops to contain the IV.");
6632 for (
const auto &
Op : LocationOps) {
6633 auto It =
find(DestLocations,
Op);
6634 if (It != DestLocations.
end()) {
6636 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6644 for (
const auto &
Op : expr_ops()) {
6646 Op.appendToVector(DestExpr);
6653 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6661struct DVIRecoveryRec {
6662 DVIRecoveryRec(DbgVariableRecord *DVR)
6663 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6665 DbgVariableRecord *DbgRef;
6667 bool HadLocationArgList;
6673 for (
auto &RE : RecoveryExprs)
6675 RecoveryExprs.clear();
6678 ~DVIRecoveryRec() { clear(); }
6686 auto expr_ops = ToDwarfOpIter(Expr);
6688 for (
auto Op : expr_ops)
6697template <
typename T>
6701 "contain any DW_OP_llvm_arg operands.");
6708template <
typename T>
6713 "Expected expression that references DIArglist locations using "
6714 "DW_OP_llvm_arg operands.");
6716 for (
Value *V : Locations)
6733 if (NumLLVMArgs == 0) {
6740 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6770 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6771 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6772 assert(DVIRec.Expr &&
"Expected an expression");
6777 if (!DVIRec.HadLocationArgList) {
6778 assert(DVIRec.LocationOps.size() == 1 &&
6779 "Unexpected number of location ops.");
6783 Value *CachedValue =
6788 for (
WeakVH VH : DVIRec.LocationOps) {
6796 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6801 const SCEV *SCEVInductionVar,
6802 SCEVDbgValueBuilder IterCountExpr) {
6816 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6818 NewLocationOps.
push_back(LSRInductionVar);
6820 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6821 WeakVH VH = DVIRec.LocationOps[i];
6827 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6829 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6837 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6838 <<
" refers to a location that is now undef or erased. "
6839 "Salvage abandoned.\n");
6843 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6844 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6846 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6847 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6851 if (std::optional<APInt>
Offset =
6853 if (
Offset->getSignificantBits() <= 64)
6854 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6857 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6866 assert(DVIRec.RecoveryExprs.size() == 1 &&
6867 "Expected only a single recovery expression for an empty "
6869 assert(DVIRec.RecoveryExprs[0] &&
6870 "Expected a SCEVDbgSalvageBuilder for location 0");
6871 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6872 B->appendToVectors(
NewExpr, NewLocationOps);
6874 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
6882 SCEVDbgValueBuilder *DbgBuilder =
6883 DVIRec.RecoveryExprs[LocationArgIndex].get();
6889 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6890 "Expected a positive index for the location-op position.");
6891 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6895 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6899 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6907 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6908 if (DVIToUpdate.empty())
6912 assert(SCEVInductionVar &&
6913 "Anticipated a SCEV for the post-LSR induction variable");
6917 if (!IVAddRec->isAffine())
6925 SCEVDbgValueBuilder IterCountExpr;
6926 IterCountExpr.pushLocation(LSRInductionVar);
6927 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6930 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6933 for (
auto &DVIRec : DVIToUpdate) {
6934 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6945 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6946 for (
const auto &
B : L->getBlocks()) {
6947 for (
auto &
I : *
B) {
6949 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6954 if (DbgVal.isKillLocation())
6959 const auto &HasTranslatableLocationOps =
6961 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6975 if (!HasTranslatableLocationOps(DbgVal))
6978 std::unique_ptr<DVIRecoveryRec> NewRec =
6979 std::make_unique<DVIRecoveryRec>(&DbgVal);
6983 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6984 for (
const auto LocOp : DbgVal.location_ops()) {
6985 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6986 NewRec->LocationOps.push_back(LocOp);
6987 NewRec->HadLocationArgList = DbgVal.hasArgList();
6989 SalvageableDVISCEVs.push_back(std::move(NewRec));
6999 const LSRInstance &LSR) {
7001 auto IsSuitableIV = [&](
PHINode *
P) {
7012 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7019 if (IsSuitableIV(
P))
7023 for (
PHINode &
P : L.getHeader()->phis()) {
7024 if (IsSuitableIV(&
P))
7042 std::unique_ptr<MemorySSAUpdater> MSSAU;
7044 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7047 const LSRInstance &Reducer =
7048 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7049 Changed |= Reducer.getChanged();
7056#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7059 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7073 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7087 if (SalvageableDVIRecords.
empty())
7093 for (
const auto &L : LI) {
7097 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7098 "could not be identified.\n");
7102 for (
auto &Rec : SalvageableDVIRecords)
7104 SalvageableDVIRecords.
clear();
7108bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7112 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7113 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7114 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7115 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7116 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7117 *
L->getHeader()->getParent());
7118 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7119 *
L->getHeader()->getParent());
7120 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7121 *
L->getHeader()->getParent());
7122 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7125 MSSA = &MSSAAnalysis->getMSSA();
7142char LoopStrengthReduce::ID = 0;
7145 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static Immediate ExtractImmediate(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< SCEVUse > &Good, SmallVectorImpl< SCEVUse > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static GlobalValue * ExtractSymbol(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Value * getCondition() const
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
ArrayRef< SCEVUse > operands() const
bool hasNoSignedWrap() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI ArrayRef< SCEVUse > operands() const
Return operands of this SCEV expression.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.