132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
363 const std::pair<size_t, Immediate> &
RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
387 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
399 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
415RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
421 RSD.UsedByIndices.
set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
430 RSD.UsedByIndices.
reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
440 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
451 if (
I == RegUsesMap.end())
453 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
460const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
461 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
477 GlobalValue *BaseGV =
nullptr;
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
515 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 bool countsDownToZero()
const;
527 size_t getNumRegs()
const;
530 void deleteBaseReg(
const SCEV *&S);
532 bool referencesReg(
const SCEV *S)
const;
533 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
534 const RegUseTracker &RegUses)
const;
536 void print(raw_ostream &OS)
const;
554 for (
const SCEV *S :
Add->operands())
560 const SCEV *Start, *Step;
575 if (
Mul->getOperand(0)->isAllOnesValue()) {
584 for (
const SCEV *S : MyGood)
586 for (
const SCEV *S : MyBad)
598void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
605 BaseRegs.push_back(Sum);
611 BaseRegs.push_back(Sum);
626bool Formula::isCanonical(
const Loop &L)
const {
627 assert((Scale == 0 || ScaledReg) &&
628 "ScaledReg must be non-null if Scale is non-zero");
631 return BaseRegs.size() <= 1;
636 if (Scale == 1 && BaseRegs.empty())
645 return none_of(BaseRegs, [&L](
const SCEV *S) {
656void Formula::canonicalize(
const Loop &L) {
660 if (BaseRegs.empty()) {
662 assert(ScaledReg &&
"Expected 1*reg => reg");
663 assert(Scale == 1 &&
"Expected 1*reg => reg");
664 BaseRegs.push_back(ScaledReg);
672 ScaledReg = BaseRegs.pop_back_val();
680 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
683 if (
I != BaseRegs.end())
693bool Formula::unscale() {
697 BaseRegs.push_back(ScaledReg);
702bool Formula::hasZeroEnd()
const {
703 if (UnfoldedOffset || BaseOffset)
705 if (BaseRegs.size() != 1 || ScaledReg)
710bool Formula::countsDownToZero()
const {
713 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
714 const APInt *StepInt;
722size_t Formula::getNumRegs()
const {
723 return !!ScaledReg + BaseRegs.size();
728Type *Formula::getType()
const {
729 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
730 ScaledReg ? ScaledReg->
getType() :
736void Formula::deleteBaseReg(
const SCEV *&S) {
737 if (&S != &BaseRegs.back())
743bool Formula::referencesReg(
const SCEV *S)
const {
749bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
750 const RegUseTracker &RegUses)
const {
752 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
754 for (
const SCEV *BaseReg : BaseRegs)
755 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
761void Formula::print(raw_ostream &OS)
const {
762 ListSeparator
Plus(
" + ");
767 if (BaseOffset.isNonZero())
768 OS <<
Plus << BaseOffset;
770 for (
const SCEV *BaseReg : BaseRegs)
773 if (HasBaseReg && BaseRegs.empty())
774 OS <<
Plus <<
"**error: HasBaseReg**";
775 else if (!HasBaseReg && !BaseRegs.empty())
776 OS <<
Plus <<
"**error: !HasBaseReg**";
779 OS <<
Plus << Scale <<
"*reg(";
786 if (UnfoldedOffset.isNonZero())
787 OS <<
Plus <<
"imm(" << UnfoldedOffset <<
')';
827 bool IgnoreSignificantBits =
false) {
838 if (
RA.isAllOnes()) {
839 if (
LHS->getType()->isPointerTy())
852 const APInt &LA =
C->getAPInt();
861 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
863 IgnoreSignificantBits);
864 if (!Step)
return nullptr;
866 IgnoreSignificantBits);
867 if (!Start)
return nullptr;
880 for (
const SCEV *S :
Add->operands()) {
882 if (!
Op)
return nullptr;
910 for (
const SCEV *S :
Mul->operands()) {
913 IgnoreSignificantBits)) {
933 if (
C->getSignificantBits() <= 64) {
935 return Immediate::getFixed(
C->getSExtValue());
940 if (Result.isNonZero())
946 if (Result.isNonZero())
954 return Immediate::getScalable(
C->getSExtValue());
956 return Immediate::getZero();
991 if (
SI->getPointerOperand() == OperandVal)
996 switch (
II->getIntrinsicID()) {
997 case Intrinsic::memset:
998 case Intrinsic::prefetch:
999 case Intrinsic::masked_load:
1000 if (
II->getArgOperand(0) == OperandVal)
1003 case Intrinsic::masked_store:
1004 if (
II->getArgOperand(1) == OperandVal)
1007 case Intrinsic::memmove:
1008 case Intrinsic::memcpy:
1009 if (
II->getArgOperand(0) == OperandVal ||
1010 II->getArgOperand(1) == OperandVal)
1015 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1016 if (IntrInfo.
PtrVal == OperandVal)
1022 if (RMW->getPointerOperand() == OperandVal)
1025 if (CmpX->getPointerOperand() == OperandVal)
1034 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1038 AccessTy.MemTy = Ty;
1042 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1044 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1046 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1048 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1050 switch (
II->getIntrinsicID()) {
1051 case Intrinsic::prefetch:
1052 case Intrinsic::memset:
1053 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1054 AccessTy.MemTy = OperandVal->
getType();
1056 case Intrinsic::memmove:
1057 case Intrinsic::memcpy:
1059 AccessTy.MemTy = OperandVal->
getType();
1061 case Intrinsic::masked_load:
1062 AccessTy.AddrSpace =
1063 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1065 case Intrinsic::masked_store:
1066 AccessTy.AddrSpace =
1067 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1071 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1127 if (!Processed.
insert(S).second)
1131 for (
const SCEV *S :
Add->operands()) {
1138 const SCEV *Op0, *Op1;
1147 Value *UVal = U->getValue();
1151 if (UI && UI->
getOpcode() == Instruction::Mul &&
1184 const LSRUse &LU,
const Formula &
F);
1188 const LSRUse &LU,
const Formula &
F,
1195 const Loop *
L =
nullptr;
1196 ScalarEvolution *SE =
nullptr;
1197 const TargetTransformInfo *
TTI =
nullptr;
1198 TargetTransformInfo::LSRCost
C;
1203 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1205 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1223 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1224 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1225 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1226 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1232 return C.NumRegs == ~0
u;
1235 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1236 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1237 bool HardwareLoopProfitable,
1238 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1240 void print(raw_ostream &OS)
const;
1244 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1245 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1246 bool HardwareLoopProfitable);
1247 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1248 SmallPtrSetImpl<const SCEV *> &Regs,
1249 const LSRUse &LU,
bool HardwareLoopProfitable,
1250 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1261 Value *OperandValToReplace =
nullptr;
1271 Immediate
Offset = Immediate::getZero();
1273 LSRFixup() =
default;
1275 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1277 void print(raw_ostream &OS)
const;
1287 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1300 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1303 MemAccessTy AccessTy;
1309 Immediate MinOffset = Immediate::getFixedMax();
1310 Immediate MaxOffset = Immediate::getFixedMin();
1314 bool AllFixupsOutsideLoop =
true;
1319 bool AllFixupsUnconditional =
true;
1326 bool RigidFormula =
false;
1332 Type *WidestFixupType =
nullptr;
1340 SmallPtrSet<const SCEV *, 4> Regs;
1342 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1344 LSRFixup &getNewFixup() {
1345 Fixups.push_back(LSRFixup());
1349 void pushFixup(LSRFixup &f) {
1351 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1352 MaxOffset =
f.Offset;
1353 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1354 MinOffset =
f.Offset;
1357 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1358 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1359 bool InsertFormula(
const Formula &
F,
const Loop &L);
1360 void DeleteFormula(Formula &
F);
1361 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1363 void print(raw_ostream &OS)
const;
1370 LSRUse::KindType Kind, MemAccessTy AccessTy,
1371 GlobalValue *BaseGV, Immediate BaseOffset,
1372 bool HasBaseReg, int64_t Scale,
1373 Instruction *
Fixup =
nullptr);
1386 [&](
unsigned i,
const SCEV *
Reg) {
1387 return i + getSetupCost(Reg, Depth - 1);
1396void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1397 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1398 bool HardwareLoopProfitable) {
1403 if (AR->getLoop() != L) {
1410 if (!AR->getLoop()->contains(L)) {
1420 unsigned LoopCost = 1;
1429 F.BaseOffset.isFixed() &&
1430 *Step ==
F.BaseOffset.getFixedValue();
1435 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1442 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1443 HardwareLoopProfitable)
1445 C.AddRecCost += LoopCost;
1450 if (!Regs.
count(AR->getOperand(1))) {
1451 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1463 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1472void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1473 SmallPtrSetImpl<const SCEV *> &Regs,
1474 const LSRUse &LU,
bool HardwareLoopProfitable,
1475 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1476 if (LoserRegs && LoserRegs->
count(
Reg)) {
1481 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1482 if (LoserRegs && isLoser())
1487void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1488 const DenseSet<const SCEV *> &VisitedRegs,
1489 const LSRUse &LU,
bool HardwareLoopProfitable,
1490 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1493 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1495 unsigned PrevAddRecCost =
C.AddRecCost;
1496 unsigned PrevNumRegs =
C.NumRegs;
1497 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1498 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1499 if (VisitedRegs.
count(ScaledReg)) {
1503 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1508 for (
const SCEV *BaseReg :
F.BaseRegs) {
1509 if (VisitedRegs.
count(BaseReg)) {
1513 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1520 size_t NumBaseParts =
F.getNumRegs();
1521 if (NumBaseParts > 1)
1526 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1532 for (
const LSRFixup &
Fixup : LU.Fixups) {
1533 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1534 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1538 else if (
Offset.isNonZero())
1540 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1544 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1565 if (
C.NumRegs > TTIRegNum) {
1568 if (PrevNumRegs > TTIRegNum)
1569 C.Insns += (
C.NumRegs - PrevNumRegs);
1571 C.Insns += (
C.NumRegs - TTIRegNum);
1584 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1588 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1591 if (LU.Kind != LSRUse::ICmpZero)
1592 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1598 C.Insns = std::numeric_limits<unsigned>::max();
1599 C.NumRegs = std::numeric_limits<unsigned>::max();
1600 C.AddRecCost = std::numeric_limits<unsigned>::max();
1601 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1602 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1603 C.ImmCost = std::numeric_limits<unsigned>::max();
1604 C.SetupCost = std::numeric_limits<unsigned>::max();
1605 C.ScaleCost = std::numeric_limits<unsigned>::max();
1609bool Cost::isLess(
const Cost &
Other)
const {
1611 C.Insns !=
Other.C.Insns)
1612 return C.Insns <
Other.C.Insns;
1616#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1619 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1620 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1621 if (
C.AddRecCost != 0)
1622 OS <<
", with addrec cost " <<
C.AddRecCost;
1623 if (
C.NumIVMuls != 0)
1624 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1625 << (
C.NumIVMuls == 1 ?
"" :
"s");
1626 if (
C.NumBaseAdds != 0)
1627 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1628 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1629 if (
C.ScaleCost != 0)
1630 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1632 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1633 if (
C.SetupCost != 0)
1634 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1643bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1646 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1647 if (PN->getIncomingValue(i) == OperandValToReplace &&
1648 L->contains(PN->getIncomingBlock(i)))
1653 return !
L->contains(UserInst);
1656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1657void LSRFixup::print(raw_ostream &OS)
const {
1662 Store->getOperand(0)->printAsOperand(OS,
false);
1668 OS <<
", OperandValToReplace=";
1671 for (
const Loop *PIL : PostIncLoops) {
1672 OS <<
", PostIncLoop=";
1673 PIL->getHeader()->printAsOperand(OS,
false);
1677 OS <<
", Offset=" <<
Offset;
1687bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1689 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1696float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1698 for (
const Formula &
F : Formulae)
1699 if (
F.referencesReg(
Reg))
1701 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1706bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1707 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1709 if (!Formulae.empty() && RigidFormula)
1713 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1721 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1722 "Zero allocated in a scaled register!");
1724 for (
const SCEV *BaseReg :
F.BaseRegs)
1725 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1729 Formulae.push_back(
F);
1740void LSRUse::DeleteFormula(Formula &
F) {
1741 if (&
F != &Formulae.back())
1743 Formulae.pop_back();
1747void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1749 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1751 for (
const Formula &
F : Formulae) {
1752 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1757 for (
const SCEV *S : OldRegs)
1759 RegUses.dropRegister(S, LUIdx);
1762#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1763void LSRUse::print(raw_ostream &OS)
const {
1764 OS <<
"LSR Use: Kind=";
1766 case Basic: OS <<
"Basic";
break;
1767 case Special: OS <<
"Special";
break;
1768 case ICmpZero: OS <<
"ICmpZero";
break;
1770 OS <<
"Address of ";
1774 OS << *AccessTy.MemTy;
1777 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1780 OS <<
", Offsets={";
1781 bool NeedComma =
false;
1782 for (
const LSRFixup &
Fixup : Fixups) {
1783 if (NeedComma) OS <<
',';
1789 if (AllFixupsOutsideLoop)
1790 OS <<
", all-fixups-outside-loop";
1792 if (AllFixupsUnconditional)
1793 OS <<
", all-fixups-unconditional";
1795 if (WidestFixupType)
1796 OS <<
", widest fixup type: " << *WidestFixupType;
1805 LSRUse::KindType Kind, MemAccessTy AccessTy,
1807 bool HasBaseReg, int64_t Scale,
1810 case LSRUse::Address: {
1811 int64_t FixedOffset =
1812 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1813 int64_t ScalableOffset =
1814 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1815 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1816 HasBaseReg, Scale, AccessTy.AddrSpace,
1817 Fixup, ScalableOffset);
1819 case LSRUse::ICmpZero:
1826 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1831 if (Scale != 0 && Scale != -1)
1836 if (BaseOffset.isNonZero()) {
1839 if (BaseOffset.isScalable())
1849 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1850 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1858 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1860 case LSRUse::Special:
1862 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1869 Immediate MinOffset, Immediate MaxOffset,
1870 LSRUse::KindType Kind, MemAccessTy AccessTy,
1872 bool HasBaseReg, int64_t Scale) {
1873 if (BaseOffset.isNonZero() &&
1874 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1875 BaseOffset.isScalable() != MaxOffset.isScalable()))
1878 int64_t
Base = BaseOffset.getKnownMinValue();
1879 int64_t Min = MinOffset.getKnownMinValue();
1880 int64_t Max = MaxOffset.getKnownMinValue();
1883 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1886 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1889 HasBaseReg, Scale) &&
1895 Immediate MinOffset, Immediate MaxOffset,
1896 LSRUse::KindType Kind, MemAccessTy AccessTy,
1897 const Formula &
F,
const Loop &L) {
1905 assert((
F.isCanonical(L) ||
F.Scale != 0));
1907 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1912 Immediate MaxOffset, LSRUse::KindType Kind,
1914 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1917 BaseOffset, HasBaseReg, Scale) ||
1922 BaseGV, BaseOffset,
true, 0));
1926 Immediate MaxOffset, LSRUse::KindType Kind,
1927 MemAccessTy AccessTy,
const Formula &
F) {
1928 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1929 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1935 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1937 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1941 const LSRUse &LU,
const Formula &
F) {
1943 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1944 for (
const LSRFixup &
Fixup : LU.Fixups)
1946 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1947 F.Scale,
Fixup.UserInst))
1953 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1958 const LSRUse &LU,
const Formula &
F,
1967 return F.Scale != 1;
1970 case LSRUse::Address: {
1972 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1973 if (
F.BaseOffset.isScalable()) {
1974 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1975 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1977 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1978 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1982 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1985 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1988 "Legal addressing mode has an illegal cost!");
1989 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1991 case LSRUse::ICmpZero:
1993 case LSRUse::Special:
2003 LSRUse::KindType Kind, MemAccessTy AccessTy,
2007 if (BaseOffset.isZero() && !BaseGV)
2012 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2016 if (!HasBaseReg && Scale == 1) {
2026 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2036 Immediate MaxOffset, LSRUse::KindType Kind,
2037 MemAccessTy AccessTy,
const SCEV *S,
2040 if (S->
isZero())
return true;
2053 if (BaseOffset.isZero() && !BaseGV)
2056 if (BaseOffset.isScalable())
2061 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2064 BaseOffset, HasBaseReg, Scale);
2081 const SCEV *IncExpr;
2083 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2084 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2091 const SCEV *ExprBase =
nullptr;
2093 IVChain() =
default;
2094 IVChain(
const IVInc &Head,
const SCEV *
Base)
2095 : Incs(1, Head), ExprBase(
Base) {}
2100 const_iterator
begin()
const {
2102 return std::next(Incs.
begin());
2104 const_iterator
end()
const {
2109 bool hasIncs()
const {
return Incs.
size() >= 2; }
2118 bool isProfitableIncrement(
const SCEV *OperExpr,
2119 const SCEV *IncExpr,
2127 SmallPtrSet<Instruction*, 4> FarUsers;
2128 SmallPtrSet<Instruction*, 4> NearUsers;
2134 ScalarEvolution &SE;
2137 AssumptionCache &AC;
2138 TargetLibraryInfo &TLI;
2139 const TargetTransformInfo &
TTI;
2141 MemorySSAUpdater *MSSAU;
2145 bool HardwareLoopProfitable =
false;
2159 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2166 SmallSetVector<Type *, 4> Types;
2172 RegUseTracker RegUses;
2177 static const unsigned MaxChains = 8;
2183 SmallPtrSet<Use*, MaxChains> IVIncSet;
2186 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2192 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2194 void OptimizeShadowIV();
2195 bool FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse);
2197 void OptimizeLoopTermCond();
2199 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2200 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2201 void FinalizeChain(IVChain &Chain);
2202 void CollectChains();
2203 void GenerateIVChain(
const IVChain &Chain,
2204 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2206 void CollectInterestingTypesAndFactors();
2207 void CollectFixupsAndInitialFormulae();
2210 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2213 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2214 LSRUse::KindType Kind, MemAccessTy AccessTy);
2216 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2217 MemAccessTy AccessTy);
2219 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2221 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2223 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2224 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2225 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2226 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2227 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2229 void CollectLoopInvariantFixupsAndFormulae();
2231 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2232 unsigned Depth = 0);
2234 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2236 size_t Idx,
bool IsScaledReg =
false);
2237 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2238 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2239 const Formula &
Base,
size_t Idx,
2240 bool IsScaledReg =
false);
2241 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2242 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2243 const Formula &
Base,
2244 const SmallVectorImpl<Immediate> &Worklist,
2245 size_t Idx,
bool IsScaledReg =
false);
2246 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2247 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2248 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2250 void GenerateCrossUseConstantOffsets();
2251 void GenerateAllReuseFormulae();
2253 void FilterOutUndesirableDedicatedRegisters();
2255 size_t EstimateSearchSpaceComplexity()
const;
2256 void NarrowSearchSpaceByDetectingSupersets();
2257 void NarrowSearchSpaceByCollapsingUnrolledCode();
2258 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2259 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2260 void NarrowSearchSpaceByFilterPostInc();
2261 void NarrowSearchSpaceByDeletingCostlyFormulas();
2262 void NarrowSearchSpaceByPickingWinnerRegs();
2263 void NarrowSearchSpaceUsingHeuristics();
2265 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2267 SmallVectorImpl<const Formula *> &Workspace,
2268 const Cost &CurCost,
2269 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2270 DenseSet<const SCEV *> &VisitedRegs)
const;
2271 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2275 const SmallVectorImpl<Instruction *> &Inputs)
const;
2278 const LSRUse &LU)
const;
2280 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2282 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2283 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2285 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2286 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2287 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2288 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2291 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2292 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2293 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2295 bool getChanged()
const {
return Changed; }
2296 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2297 return ScalarEvolutionIVs;
2300 void print_factors_and_types(raw_ostream &OS)
const;
2301 void print_fixups(raw_ostream &OS)
const;
2302 void print_uses(raw_ostream &OS)
const;
2303 void print(raw_ostream &OS)
const;
2311void LSRInstance::OptimizeShadowIV() {
2321 Type *DestTy =
nullptr;
2322 bool IsSigned =
false;
2338 DestTy = UCast->getDestTy();
2342 DestTy = SCast->getDestTy();
2344 if (!DestTy)
continue;
2364 if (Mantissa == -1)
continue;
2368 unsigned Entry, Latch;
2378 if (!Init)
continue;
2379 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2383 BinaryOperator *Incr =
2385 if (!Incr)
continue;
2386 if (Incr->
getOpcode() != Instruction::Add
2387 && Incr->
getOpcode() != Instruction::Sub)
2391 ConstantInt *
C =
nullptr;
2403 if (!
C->getValue().isStrictlyPositive())
2411 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2413 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2414 : Instruction::FSub,
2431bool LSRInstance::FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse) {
2432 for (IVStrideUse &U : IU)
2433 if (
U.getUser() ==
Cond) {
2491Instruction *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2506 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2507 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2513 const SCEVNAryExpr *
Max =
nullptr;
2515 Pred = ICmpInst::ICMP_SLE;
2518 Pred = ICmpInst::ICMP_SLT;
2521 Pred = ICmpInst::ICMP_ULT;
2530 if (
Max->getNumOperands() != 2)
2533 const SCEV *MaxLHS =
Max->getOperand(0);
2534 const SCEV *MaxRHS =
Max->getOperand(1);
2539 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2550 "Loop condition operand is an addrec in a different loop!");
2554 Value *NewRHS =
nullptr;
2555 if (ICmpInst::isTrueWhenEqual(Pred)) {
2559 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2560 NewRHS = BO->getOperand(0);
2563 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2564 NewRHS = BO->getOperand(0);
2572 NewRHS = SU->getValue();
2584 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2585 Cond->getOperand(0), NewRHS,
"scmp");
2589 Cond->replaceAllUsesWith(NewCond);
2592 Cond->eraseFromParent();
2594 if (
Cmp->use_empty()) {
2596 Cmp->eraseFromParent();
2603LSRInstance::OptimizeLoopTermCond() {
2604 SmallPtrSet<Instruction *, 4> PostIncs;
2619 SmallVector<BasicBlock*, 8> ExitingBlocks;
2620 L->getExitingBlocks(ExitingBlocks);
2628 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2650 IVStrideUse *CondUse =
nullptr;
2651 if (!FindIVUserForCond(
Cond, CondUse))
2661 Cond = OptimizeMax(Cmp, CondUse);
2666 if (!DT.
dominates(ExitingBlock, LatchBlock))
2671 if (LatchBlock != ExitingBlock)
2672 for (
const IVStrideUse &UI : IU)
2675 if (&UI != CondUse &&
2679 const SCEV *
A = IU.getStride(*CondUse, L);
2680 const SCEV *
B = IU.getStride(UI, L);
2681 if (!
A || !
B)
continue;
2690 if (
const SCEVConstant *
D =
2692 const ConstantInt *
C =
D->getValue();
2694 if (
C->isOne() ||
C->isMinusOne())
2695 goto decline_post_inc;
2697 if (
C->getValue().getSignificantBits() >= 64 ||
2698 C->getValue().isMinSignedValue())
2699 goto decline_post_inc;
2702 MemAccessTy AccessTy =
2704 int64_t Scale =
C->getSExtValue();
2708 AccessTy.AddrSpace))
2709 goto decline_post_inc;
2714 AccessTy.AddrSpace))
2715 goto decline_post_inc;
2720 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2728 if (
Cond->hasOneUse()) {
2734 Cond->setName(
L->getHeader()->getName() +
".termcond");
2756 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2757 for (Instruction *Inst : PostIncs)
2763bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2764 bool HasBaseReg, LSRUse::KindType Kind,
2765 MemAccessTy AccessTy) {
2766 Immediate NewMinOffset = LU.MinOffset;
2767 Immediate NewMaxOffset = LU.MaxOffset;
2768 MemAccessTy NewAccessTy = AccessTy;
2773 if (LU.Kind != Kind)
2779 if (Kind == LSRUse::Address) {
2780 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2781 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2782 AccessTy.AddrSpace);
2787 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2789 LU.MaxOffset - NewOffset, HasBaseReg))
2791 NewMinOffset = NewOffset;
2792 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2794 NewOffset - LU.MinOffset, HasBaseReg))
2796 NewMaxOffset = NewOffset;
2802 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2803 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2807 LU.MinOffset = NewMinOffset;
2808 LU.MaxOffset = NewMaxOffset;
2809 LU.AccessTy = NewAccessTy;
2816std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2817 LSRUse::KindType Kind,
2818 MemAccessTy AccessTy) {
2819 const SCEV *
Copy = Expr;
2820 SCEVUse ExprUse = Expr;
2828 Offset = Immediate::getFixed(0);
2831 std::pair<UseMapTy::iterator, bool>
P =
2832 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2835 size_t LUIdx =
P.first->second;
2836 LSRUse &LU =
Uses[LUIdx];
2837 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2839 return std::make_pair(LUIdx,
Offset);
2843 size_t LUIdx =
Uses.size();
2844 P.first->second = LUIdx;
2845 Uses.push_back(LSRUse(Kind, AccessTy));
2846 LSRUse &LU =
Uses[LUIdx];
2850 return std::make_pair(LUIdx,
Offset);
2854void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2855 if (&LU != &
Uses.back())
2860 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2866LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2867 const LSRUse &OrigLU) {
2869 for (LSRUse &LU :
Uses) {
2875 if (&LU != &OrigLU &&
2876 LU.Kind != LSRUse::ICmpZero &&
2877 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2878 LU.WidestFixupType == OrigLU.WidestFixupType &&
2879 LU.HasFormulaWithSameRegs(OrigF)) {
2881 for (
const Formula &
F : LU.Formulae) {
2884 if (
F.BaseRegs == OrigF.BaseRegs &&
2885 F.ScaledReg == OrigF.ScaledReg &&
2886 F.BaseGV == OrigF.BaseGV &&
2887 F.Scale == OrigF.Scale &&
2888 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2889 if (
F.BaseOffset.isZero())
2904void LSRInstance::CollectInterestingTypesAndFactors() {
2905 SmallSetVector<const SCEV *, 4> Strides;
2909 for (
const IVStrideUse &U : IU) {
2910 const SCEV *Expr = IU.getExpr(U);
2928 }
while (!Worklist.
empty());
2932 for (SmallSetVector<const SCEV *, 4>::const_iterator
2934 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2935 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2936 const SCEV *OldStride = *
I;
2937 const SCEV *NewStride = *NewStrideIter;
2947 if (
const SCEVConstant *Factor =
2950 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2951 Factors.insert(Factor->getAPInt().getSExtValue());
2952 }
else if (
const SCEVConstant *Factor =
2956 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2957 Factors.insert(Factor->getAPInt().getSExtValue());
2963 if (Types.size() == 1)
2975 for(; OI != OE; ++OI) {
2994 return Trunc->getOperand(0);
3027 if (SubExpr->getSCEVType() ==
scAddExpr)
3030 if (SubExpr->getSCEVType() !=
scMulExpr)
3046bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3047 const SCEV *IncExpr,
3048 ScalarEvolution &SE) {
3061 SmallPtrSet<const SCEV*, 8> Processed;
3082 if (!Chain.hasIncs())
3085 if (!
Users.empty()) {
3086 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3088 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3091 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3100 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3103 const SCEV *LastIncExpr =
nullptr;
3104 unsigned NumConstIncrements = 0;
3105 unsigned NumVarIncrements = 0;
3106 unsigned NumReusedIncrements = 0;
3108 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3111 for (
const IVInc &Inc : Chain) {
3112 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3114 if (Inc.IncExpr->isZero())
3120 ++NumConstIncrements;
3124 if (Inc.IncExpr == LastIncExpr)
3125 ++NumReusedIncrements;
3129 LastIncExpr = Inc.IncExpr;
3134 if (NumConstIncrements > 1)
3141 cost += NumVarIncrements;
3145 cost -= NumReusedIncrements;
3147 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3154void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3155 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3159 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3160 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3164 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3165 const SCEV *LastIncExpr =
nullptr;
3166 for (; ChainIdx < NChains; ++ChainIdx) {
3167 IVChain &Chain = IVChainVec[ChainIdx];
3185 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3186 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3190 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3191 LastIncExpr = IncExpr;
3197 if (ChainIdx == NChains) {
3204 LastIncExpr = OperExpr;
3211 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3213 ChainUsersVec.
resize(NChains);
3214 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3215 <<
") IV=" << *LastIncExpr <<
"\n");
3217 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3218 <<
") IV+" << *LastIncExpr <<
"\n");
3220 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3222 IVChain &Chain = IVChainVec[ChainIdx];
3224 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3226 if (!LastIncExpr->
isZero()) {
3227 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3236 for (User *U : IVOper->
users()) {
3242 IVChain::const_iterator IncIter = Chain.Incs.begin();
3243 IVChain::const_iterator IncEnd = Chain.Incs.end();
3244 for( ; IncIter != IncEnd; ++IncIter) {
3245 if (IncIter->UserInst == OtherUse)
3248 if (IncIter != IncEnd)
3253 && IU.isIVUserOrOperand(OtherUse)) {
3256 NearUsers.
insert(OtherUse);
3261 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3286void LSRInstance::CollectChains() {
3290 SmallVector<BasicBlock *,8> LatchPath;
3293 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3299 for (BasicBlock *BB :
reverse(LatchPath)) {
3300 for (Instruction &
I : *BB) {
3312 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3313 ChainIdx < NChains; ++ChainIdx) {
3314 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3317 SmallPtrSet<Instruction*, 4> UniqueOperands;
3320 while (IVOpIter != IVOpEnd) {
3322 if (UniqueOperands.
insert(IVOpInst).second)
3323 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3324 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3329 for (PHINode &PN :
L->getHeader()->phis()) {
3336 ChainInstruction(&PN, IncV, ChainUsersVec);
3339 unsigned ChainIdx = 0;
3340 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3341 UsersIdx < NChains; ++UsersIdx) {
3343 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3346 if (ChainIdx != UsersIdx)
3347 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3348 FinalizeChain(IVChainVec[ChainIdx]);
3351 IVChainVec.resize(ChainIdx);
3354void LSRInstance::FinalizeChain(IVChain &Chain) {
3355 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3356 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3358 for (
const IVInc &Inc : Chain) {
3360 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3361 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3362 IVIncSet.insert(UseI);
3370 Immediate IncOffset = Immediate::getZero();
3379 C->getSignificantBits() > 64)
3381 IncOffset = Immediate::getScalable(
C->getSExtValue());
3397void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3398 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3401 const IVInc &Head = Chain.Incs[0];
3406 Value *IVSrc =
nullptr;
3407 while (IVOpIter != IVOpEnd) {
3418 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3419 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3422 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3424 if (IVOpIter == IVOpEnd) {
3426 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3429 assert(IVSrc &&
"Failed to find IV chain source");
3434 const SCEV *LeftOverExpr =
nullptr;
3435 const SCEV *Accum = SE.
getZero(IntTy);
3439 for (
const IVInc &Inc : Chain) {
3442 InsertPt =
L->getLoopLatch()->getTerminator();
3446 Value *IVOper = IVSrc;
3447 if (!Inc.IncExpr->isZero()) {
3452 LeftOverExpr = LeftOverExpr ?
3453 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3457 bool FoundBase =
false;
3458 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3459 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3461 if (!Remainder->
isZero()) {
3463 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3464 const SCEV *IVOperExpr =
3466 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3475 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3478 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3481 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3485 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3488 LeftOverExpr =
nullptr;
3492 if (IVTy != OperTy) {
3494 "cannot extend a chained IV");
3496 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3498 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3505 for (PHINode &Phi :
L->getHeader()->phis()) {
3509 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3512 Value *IVOper = IVSrc;
3514 if (IVTy != PostIncTy) {
3516 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3517 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3518 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3520 Phi.replaceUsesOfWith(PostIncV, IVOper);
3526void LSRInstance::CollectFixupsAndInitialFormulae() {
3527 CondBrInst *ExitBranch =
nullptr;
3528 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3531 SmallPtrSet<const SCEV *, 16> Regs;
3532 DenseSet<const SCEV *> VisitedRegs;
3533 DenseSet<size_t> VisitedLSRUse;
3535 for (
const IVStrideUse &U : IU) {
3540 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3541 if (IVIncSet.count(UseI)) {
3542 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3546 LSRUse::KindType
Kind = LSRUse::Basic;
3547 MemAccessTy AccessTy;
3549 Kind = LSRUse::Address;
3553 const SCEV *S = IU.getExpr(U);
3569 if (CI->isEquality()) {
3572 Value *
NV = CI->getOperand(1);
3573 if (NV ==
U.getOperandValToReplace()) {
3574 CI->setOperand(1, CI->getOperand(0));
3575 CI->setOperand(0, NV);
3576 NV = CI->getOperand(1);
3583 (!
NV->getType()->isPointerTy() ||
3590 Kind = LSRUse::ICmpZero;
3592 }
else if (
L->isLoopInvariant(NV) &&
3595 !
NV->getType()->isPointerTy()) {
3606 Kind = LSRUse::ICmpZero;
3613 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3614 if (Factors[i] != -1)
3615 Factors.insert(-(uint64_t)Factors[i]);
3621 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3622 size_t LUIdx =
P.first;
3624 LSRUse &LU =
Uses[LUIdx];
3627 LSRFixup &LF = LU.getNewFixup();
3628 LF.UserInst = UserInst;
3629 LF.OperandValToReplace =
U.getOperandValToReplace();
3630 LF.PostIncLoops = TmpPostIncLoops;
3632 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3633 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3636 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3638 F.initialMatch(S, L, SE);
3639 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3640 HardwareLoopProfitable);
3641 VisitedLSRUse.
insert(LUIdx);
3644 if (!LU.WidestFixupType ||
3647 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3650 if (LU.Formulae.empty()) {
3651 InsertInitialFormula(S, LU, LUIdx);
3652 CountRegisters(LU.Formulae.back(), LUIdx);
3661void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3665 LU.RigidFormula =
true;
3668 F.initialMatch(S, L, SE);
3669 bool Inserted = InsertFormula(LU, LUIdx,
F);
3670 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3676LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3677 LSRUse &LU,
size_t LUIdx) {
3679 F.BaseRegs.push_back(S);
3680 F.HasBaseReg =
true;
3681 bool Inserted = InsertFormula(LU, LUIdx,
F);
3682 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3686void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3688 RegUses.countRegister(
F.ScaledReg, LUIdx);
3689 for (
const SCEV *BaseReg :
F.BaseRegs)
3690 RegUses.countRegister(BaseReg, LUIdx);
3695bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3698 "Formula is illegal");
3700 if (!LU.InsertFormula(
F, *L))
3703 CountRegisters(
F, LUIdx);
3709bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3721LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3723 SmallPtrSet<const SCEV *, 32> Visited;
3730 while (!Worklist.
empty()) {
3734 if (!Visited.
insert(S).second)
3745 const Value *
V = US->getValue();
3748 if (
L->contains(Inst))
continue;
3752 for (
const Use &U :
V->uses()) {
3762 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3784 bool HasIncompatibleEHPTerminatedBlock =
false;
3786 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3787 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3788 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3789 HasIncompatibleEHPTerminatedBlock =
true;
3794 if (HasIncompatibleEHPTerminatedBlock) {
3817 unsigned OtherIdx = !
U.getOperandNo();
3818 Value *OtherOp = ICI->getOperand(OtherIdx);
3828 std::pair<size_t, Immediate>
P =
3829 getUse(S, LSRUse::Basic, MemAccessTy());
3830 size_t LUIdx =
P.first;
3832 LSRUse &LU =
Uses[LUIdx];
3833 LSRFixup &LF = LU.getNewFixup();
3834 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3835 LF.OperandValToReplace =
U;
3837 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3838 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3839 if (!LU.WidestFixupType ||
3842 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3843 InsertSupplementalFormula(US, LU, LUIdx);
3844 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3860 unsigned Depth = 0) {
3867 for (
const SCEV *S :
Add->operands()) {
3874 const SCEV *Start, *Step;
3879 if (Start->isZero())
3888 Remainder =
nullptr;
3890 if (Remainder != Start) {
3912 LSRUse &LU,
const SCEV *S,
const Loop *L,
3914 if (LU.Kind != LSRUse::Address ||
3915 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3921 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3930void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3931 const Formula &
Base,
3932 unsigned Depth,
size_t Idx,
3934 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3942 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3946 if (AddOps.
size() == 1)
3960 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3965 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3969 if (InnerAddOps.size() == 1 &&
3971 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3974 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
3979 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3988 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3991 F.ScaledReg =
nullptr;
3994 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3995 }
else if (IsScaledReg)
3996 F.ScaledReg = InnerSum;
3998 F.BaseRegs[Idx] = InnerSum;
4006 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4009 F.BaseRegs.push_back(*J);
4014 if (InsertFormula(LU, LUIdx,
F))
4021 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4027void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4029 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4034 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4035 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4037 if (
Base.Scale == 1)
4038 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4044void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4047 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4048 (
Base.UnfoldedOffset.isNonZero()) <=
4056 Formula NewBase =
Base;
4057 NewBase.BaseRegs.clear();
4058 Type *CombinedIntegerType =
nullptr;
4059 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4062 if (!CombinedIntegerType)
4064 Ops.push_back(BaseReg);
4067 NewBase.BaseRegs.push_back(BaseReg);
4071 if (
Ops.size() == 0)
4076 auto GenerateFormula = [&](
const SCEV *Sum) {
4077 Formula
F = NewBase;
4085 F.BaseRegs.push_back(Sum);
4087 (void)InsertFormula(LU, LUIdx,
F);
4091 if (
Ops.size() > 1) {
4098 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4099 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4101 NewBase.UnfoldedOffset.getFixedValue(),
true));
4102 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4108void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4109 const Formula &
Base,
size_t Idx,
4111 SCEVUse
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4113 if (
G->isZero() || !GV)
4117 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4122 F.BaseRegs[Idx] =
G;
4123 (void)InsertFormula(LU, LUIdx,
F);
4127void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4130 if (
Base.BaseGV)
return;
4132 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4133 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4134 if (
Base.Scale == 1)
4135 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4140void LSRInstance::GenerateConstantOffsetsImpl(
4141 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4142 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4144 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4146 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4148 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4150 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4152 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4158 F.ScaledReg =
nullptr;
4160 F.deleteBaseReg(
F.BaseRegs[Idx]);
4162 }
else if (IsScaledReg)
4165 F.BaseRegs[Idx] = NewG;
4167 (void)InsertFormula(LU, LUIdx,
F);
4171 SCEVUse
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4182 const APInt *StepInt;
4187 for (Immediate
Offset : Worklist) {
4189 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4195 for (Immediate
Offset : Worklist)
4199 if (
G->isZero() ||
Imm.isZero() ||
4200 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4203 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4204 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4209 F.BaseRegs[Idx] =
G;
4214 (void)InsertFormula(LU, LUIdx,
F);
4218void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4224 if (LU.MaxOffset != LU.MinOffset)
4227 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4228 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4229 if (
Base.Scale == 1)
4230 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4236void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4238 if (LU.Kind != LSRUse::ICmpZero)
return;
4246 if (LU.MinOffset != LU.MaxOffset)
return;
4249 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4251 for (
const SCEV *BaseReg :
Base.BaseRegs)
4252 if (
BaseReg->getType()->isPointerTy())
4254 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4257 for (int64_t Factor : Factors) {
4262 if (
Base.BaseOffset.isMin() && Factor == -1)
4265 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4267 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4268 assert(Factor != 0 &&
"Zero factor not expected!");
4269 if (NewBaseOffset.getFixedValue() / Factor !=
4270 Base.BaseOffset.getFixedValue())
4278 Immediate
Offset = LU.MinOffset;
4279 if (
Offset.isMin() && Factor == -1)
4282 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4290 F.BaseOffset = NewBaseOffset;
4297 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4299 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4302 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4316 if (
F.UnfoldedOffset.isNonZero()) {
4317 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4319 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4320 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4321 Base.UnfoldedOffset.getFixedValue())
4325 IntTy,
F.UnfoldedOffset.getFixedValue()))
4330 (void)InsertFormula(LU, LUIdx,
F);
4337void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4344 if (
Base.Scale != 0 && !
Base.unscale())
4347 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4350 for (int64_t Factor : Factors) {
4351 Base.Scale = Factor;
4352 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4354 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4358 if (LU.Kind == LSRUse::Basic &&
4359 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4360 LU.AccessTy,
Base) &&
4361 LU.AllFixupsOutsideLoop)
4362 LU.Kind = LSRUse::Special;
4368 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4369 Base.BaseOffset.isZero() && !
Base.BaseGV)
4372 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4374 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4375 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4380 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4381 if (!Quotient->isZero()) {
4384 F.ScaledReg = Quotient;
4385 F.deleteBaseReg(
F.BaseRegs[i]);
4389 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4390 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4394 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4396 (void)InsertFormula(LU, LUIdx,
F);
4412 const SCEV *Result =
nullptr;
4413 for (
auto &L :
Loops) {
4417 if (!New || (Result && New != Result))
4422 assert(Result &&
"failed to create expression");
4427void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4429 if (
Base.BaseGV)
return;
4439 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4442 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4446 for (
auto &LF : LU.Fixups)
4447 Loops.push_back(LF.PostIncLoops);
4449 for (
Type *SrcTy : Types) {
4458 const SCEV *NewScaledReg =
4460 if (!NewScaledReg || NewScaledReg->
isZero())
4462 F.ScaledReg = NewScaledReg;
4464 bool HasZeroBaseReg =
false;
4465 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4466 const SCEV *NewBaseReg =
4468 if (!NewBaseReg || NewBaseReg->
isZero()) {
4469 HasZeroBaseReg =
true;
4479 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4483 (void)InsertFormula(LU, LUIdx,
F);
4496 const SCEV *OrigReg;
4498 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4499 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4501 void print(raw_ostream &OS)
const;
4507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4508void WorkItem::print(raw_ostream &OS)
const {
4509 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4510 <<
" , add offset " <<
Imm;
4520void LSRInstance::GenerateCrossUseConstantOffsets() {
4522 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4524 DenseMap<const SCEV *, ImmMapTy>
Map;
4525 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4527 for (
const SCEV *Use : RegUses) {
4530 auto Pair =
Map.try_emplace(
Reg);
4533 Pair.first->second.insert(std::make_pair(Imm, Use));
4534 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4541 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4543 for (
const SCEV *
Reg : Sequence) {
4544 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4547 if (Imms.size() == 1)
4551 for (
const auto &Entry
4553 <<
' ' <<
Entry.first;
4557 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4559 const SCEV *OrigReg = J->second;
4561 Immediate JImm = J->first;
4562 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4565 UsedByIndicesMap[
Reg].
count() == 1) {
4573 Immediate
First = Imms.begin()->first;
4574 Immediate
Last = std::prev(Imms.end())->first;
4575 if (!
First.isCompatibleImmediate(
Last)) {
4582 bool Scalable =
First.isScalable() ||
Last.isScalable();
4583 int64_t FI =
First.getKnownMinValue();
4584 int64_t LI =
Last.getKnownMinValue();
4587 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4590 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4591 ImmMapTy::const_iterator OtherImms[] = {
4592 Imms.begin(), std::prev(Imms.end()),
4593 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4594 for (
const auto &M : OtherImms) {
4595 if (M == J || M == JE)
continue;
4596 if (!JImm.isCompatibleImmediate(
M->first))
4600 Immediate
Imm = JImm.subUnsigned(
M->first);
4601 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4603 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4604 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4611 UsedByIndicesMap.
clear();
4612 UniqueItems.
clear();
4615 for (
const WorkItem &WI : WorkItems) {
4616 size_t LUIdx = WI.LUIdx;
4617 LSRUse &LU =
Uses[LUIdx];
4618 Immediate
Imm = WI.Imm;
4619 const SCEV *OrigReg = WI.OrigReg;
4622 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4626 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4627 Formula
F = LU.Formulae[
L];
4634 if (
F.ScaledReg == OrigReg) {
4635 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4637 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4639 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4640 if (
F.referencesReg(S))
4643 NewF.BaseOffset =
Offset;
4644 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4647 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4656 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4658 if (
C->getValue()->isNegative() !=
4659 (NewF.BaseOffset.isLessThanZero()) &&
4660 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4661 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4666 NewF.canonicalize(*this->L);
4667 (void)InsertFormula(LU, LUIdx, NewF);
4670 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4672 if (BaseReg != OrigReg)
4675 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4676 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4677 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4679 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4681 LU.Kind, LU.AccessTy, NewF)) {
4685 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4689 NewF.UnfoldedOffset = NewUnfoldedOffset;
4691 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4696 for (
const SCEV *NewReg : NewF.BaseRegs)
4698 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4700 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4702 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4703 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4706 NewF.BaseOffset.getFixedValue()))
4711 NewF.canonicalize(*this->L);
4712 (void)InsertFormula(LU, LUIdx, NewF);
4723LSRInstance::GenerateAllReuseFormulae() {
4726 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4727 LSRUse &LU =
Uses[LUIdx];
4728 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4730 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4733 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4734 LSRUse &LU =
Uses[LUIdx];
4735 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4736 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4737 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4738 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4739 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4740 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4741 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4742 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4744 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4745 LSRUse &LU =
Uses[LUIdx];
4746 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4747 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4750 GenerateCrossUseConstantOffsets();
4753 "After generating reuse formulae:\n";
4754 print_uses(
dbgs()));
4759void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4760 DenseSet<const SCEV *> VisitedRegs;
4761 SmallPtrSet<const SCEV *, 16> Regs;
4762 SmallPtrSet<const SCEV *, 16> LoserRegs;
4764 bool ChangedFormulae =
false;
4769 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4771 BestFormulaeTy BestFormulae;
4773 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4774 LSRUse &LU =
Uses[LUIdx];
4779 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4780 FIdx != NumForms; ++FIdx) {
4781 Formula &
F = LU.Formulae[FIdx];
4792 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4794 if (CostF.isLoser()) {
4806 for (
const SCEV *
Reg :
F.BaseRegs) {
4807 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4811 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4812 Key.push_back(
F.ScaledReg);
4817 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4818 BestFormulae.insert(std::make_pair(
Key, FIdx));
4822 Formula &Best = LU.Formulae[
P.first->second];
4824 Cost CostBest(L, SE,
TTI, AMK);
4826 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4827 HardwareLoopProfitable);
4828 if (CostF.isLess(CostBest))
4832 " in favor of formula ";
4833 Best.print(
dbgs());
dbgs() <<
'\n');
4836 ChangedFormulae =
true;
4838 LU.DeleteFormula(
F);
4846 LU.RecomputeRegs(LUIdx, RegUses);
4849 BestFormulae.clear();
4854 "After filtering out undesirable candidates:\n";
4862size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4864 for (
const LSRUse &LU :
Uses) {
4865 size_t FSize = LU.Formulae.size();
4880void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4884 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4885 "which use a superset of registers used by other "
4888 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4889 LSRUse &LU =
Uses[LUIdx];
4891 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4892 Formula &
F = LU.Formulae[i];
4893 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4899 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4905 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4906 (uint64_t)
C->getValue()->getSExtValue());
4907 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4908 (
I -
F.BaseRegs.begin()));
4909 if (LU.HasFormulaWithSameRegs(NewF)) {
4912 LU.DeleteFormula(
F);
4923 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4924 (
I -
F.BaseRegs.begin()));
4925 if (LU.HasFormulaWithSameRegs(NewF)) {
4928 LU.DeleteFormula(
F);
4939 LU.RecomputeRegs(LUIdx, RegUses);
4948void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4953 dbgs() <<
"The search space is too complex.\n"
4954 "Narrowing the search space by assuming that uses separated "
4955 "by a constant offset will use the same registers.\n");
4959 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4960 LSRUse &LU =
Uses[LUIdx];
4961 for (
const Formula &
F : LU.Formulae) {
4962 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4965 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4969 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4970 LU.Kind, LU.AccessTy))
4975 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4976 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4979 for (LSRFixup &
Fixup : LU.Fixups) {
4980 Fixup.Offset +=
F.BaseOffset;
4981 LUThatHas->pushFixup(
Fixup);
4987 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4988 Formula &
F = LUThatHas->Formulae[i];
4989 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4990 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4992 LUThatHas->DeleteFormula(
F);
5000 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
5003 DeleteUse(LU, LUIdx);
5016void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5020 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5021 "undesirable dedicated registers.\n");
5023 FilterOutUndesirableDedicatedRegisters();
5038void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5043 dbgs() <<
"The search space is too complex.\n"
5044 "Narrowing the search space by choosing the best Formula "
5045 "from the Formulae with the same Scale and ScaledReg.\n");
5048 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5050 BestFormulaeTy BestFormulae;
5052 bool ChangedFormulae =
false;
5054 DenseSet<const SCEV *> VisitedRegs;
5055 SmallPtrSet<const SCEV *, 16> Regs;
5057 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5058 LSRUse &LU =
Uses[LUIdx];
5063 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5068 size_t FARegNum = 0;
5069 for (
const SCEV *
Reg : FA.BaseRegs) {
5070 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5071 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5073 size_t FBRegNum = 0;
5074 for (
const SCEV *
Reg : FB.BaseRegs) {
5075 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5076 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5078 if (FARegNum != FBRegNum)
5079 return FARegNum < FBRegNum;
5086 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5088 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5089 return CostFA.isLess(CostFB);
5093 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5095 Formula &
F = LU.Formulae[FIdx];
5098 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5102 Formula &Best = LU.Formulae[
P.first->second];
5103 if (IsBetterThan(
F, Best))
5107 " in favor of formula ";
5108 Best.print(
dbgs());
dbgs() <<
'\n');
5110 ChangedFormulae =
true;
5112 LU.DeleteFormula(
F);
5118 LU.RecomputeRegs(LUIdx, RegUses);
5121 BestFormulae.clear();
5126 "After filtering out undesirable candidates:\n";
5133void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5140 "Narrowing the search space by choosing the lowest "
5141 "register Formula for PostInc Uses.\n");
5143 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5144 LSRUse &LU =
Uses[LUIdx];
5146 if (LU.Kind != LSRUse::Address)
5152 size_t MinRegs = std::numeric_limits<size_t>::max();
5153 for (
const Formula &
F : LU.Formulae)
5154 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5157 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5159 Formula &
F = LU.Formulae[FIdx];
5160 if (
F.getNumRegs() > MinRegs) {
5163 LU.DeleteFormula(
F);
5170 LU.RecomputeRegs(LUIdx, RegUses);
5221void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5230 SmallPtrSet<const SCEV *, 4> UniqRegs;
5234 DenseMap <const SCEV *, float> RegNumMap;
5235 for (
const SCEV *
Reg : RegUses) {
5239 for (
const LSRUse &LU :
Uses) {
5240 if (!LU.Regs.count(
Reg))
5242 float P = LU.getNotSelectedProbability(
Reg);
5248 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5252 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5255 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5256 LSRUse &LU =
Uses[LUIdx];
5258 if (LU.Formulae.size() < 2)
5263 float FMinRegNum = LU.Formulae[0].getNumRegs();
5264 float FMinARegNum = LU.Formulae[0].getNumRegs();
5266 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5267 Formula &
F = LU.Formulae[i];
5270 for (
const SCEV *BaseReg :
F.BaseRegs) {
5271 if (UniqRegs.
count(BaseReg))
5273 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5276 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5278 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5279 if (!UniqRegs.
count(ScaledReg)) {
5281 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5284 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5287 if (FMinRegNum > FRegNum ||
5288 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5289 FMinRegNum = FRegNum;
5290 FMinARegNum = FARegNum;
5295 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5297 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5298 while (LU.Formulae.size() != 1) {
5301 LU.Formulae.pop_back();
5303 LU.RecomputeRegs(LUIdx, RegUses);
5304 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5305 Formula &
F = LU.Formulae[0];
5321 MemAccessTy AccessType) {
5331 return TTI.isLegalAddressingMode(
5332 AccessType.MemTy,
nullptr,
5333 Diff->getSExtValue(),
5334 true, 0, AccessType.AddrSpace) &&
5335 !
TTI.isLegalAddressingMode(
5336 AccessType.MemTy,
nullptr,
5337 -Diff->getSExtValue(),
5338 true, 0, AccessType.AddrSpace);
5344void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5347 SmallPtrSet<const SCEV *, 4> Taken;
5355 const SCEV *Best =
nullptr;
5356 unsigned BestNum = 0;
5357 for (
const SCEV *
Reg : RegUses) {
5362 BestNum = RegUses.getUsedByIndices(
Reg).count();
5364 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5365 if (
Count > BestNum) {
5373 if (
Count == BestNum) {
5374 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5375 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5377 Uses[LUIdx].AccessTy)) {
5384 assert(Best &&
"Failed to find best LSRUse candidate");
5386 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5387 <<
" will yield profitable reuse.\n");
5392 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5393 LSRUse &LU =
Uses[LUIdx];
5394 if (!LU.Regs.count(Best))
continue;
5397 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5398 Formula &
F = LU.Formulae[i];
5399 if (!
F.referencesReg(Best)) {
5401 LU.DeleteFormula(
F);
5405 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5411 LU.RecomputeRegs(LUIdx, RegUses);
5422void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5423 NarrowSearchSpaceByDetectingSupersets();
5424 NarrowSearchSpaceByCollapsingUnrolledCode();
5425 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5427 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5428 NarrowSearchSpaceByFilterPostInc();
5430 NarrowSearchSpaceByDeletingCostlyFormulas();
5432 NarrowSearchSpaceByPickingWinnerRegs();
5436void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5438 SmallVectorImpl<const Formula *> &Workspace,
5439 const Cost &CurCost,
5440 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5441 DenseSet<const SCEV *> &VisitedRegs)
const {
5452 const LSRUse &LU =
Uses[Workspace.
size()];
5458 SmallSetVector<const SCEV *, 4> ReqRegs;
5459 for (
const SCEV *S : CurRegs)
5460 if (LU.Regs.count(S))
5463 SmallPtrSet<const SCEV *, 16> NewRegs;
5464 Cost NewCost(L, SE,
TTI, AMK);
5465 for (
const Formula &
F : LU.Formulae) {
5473 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5474 for (
const SCEV *
Reg : ReqRegs) {
5475 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5478 if (NumReqRegsToFind == 0)
5482 if (NumReqRegsToFind != 0) {
5493 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5494 if (NewCost.isLess(SolutionCost)) {
5496 if (Workspace.
size() !=
Uses.size()) {
5497 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5498 NewRegs, VisitedRegs);
5499 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5500 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5503 dbgs() <<
".\nRegs:\n";
5504 for (
const SCEV *S : NewRegs)
dbgs()
5505 <<
"- " << *S <<
"\n";
5508 SolutionCost = NewCost;
5509 Solution = Workspace;
5518void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5520 Cost SolutionCost(L, SE,
TTI, AMK);
5521 SolutionCost.Lose();
5522 Cost CurCost(L, SE,
TTI, AMK);
5523 SmallPtrSet<const SCEV *, 16> CurRegs;
5524 DenseSet<const SCEV *> VisitedRegs;
5528 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5529 CurRegs, VisitedRegs);
5530 if (Solution.
empty()) {
5537 "The chosen solution requires ";
5538 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5539 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5544 Solution[i]->print(
dbgs());
5550 const bool EnableDropUnprofitableSolution = [&] {
5562 if (BaselineCost.isLess(SolutionCost)) {
5563 if (!EnableDropUnprofitableSolution)
5565 dbgs() <<
"Baseline is more profitable than chosen solution, "
5566 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5569 "solution, dropping LSR solution.\n";);
5580 const SmallVectorImpl<Instruction *> &Inputs)
5584 bool AllDominate =
true;
5591 for (Instruction *Inst : Inputs) {
5592 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5593 AllDominate =
false;
5598 if (Tentative->
getParent() == Inst->getParent() &&
5599 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5609 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5610 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5614 if (!Rung)
return IP;
5615 Rung = Rung->getIDom();
5616 if (!Rung)
return IP;
5617 IDom = Rung->getBlock();
5620 const Loop *IDomLoop = LI.getLoopFor(IDom);
5621 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5622 if (IDomDepth <= IPLoopDepth &&
5623 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5640 SmallVector<Instruction *, 4> Inputs;
5643 if (LU.Kind == LSRUse::ICmpZero)
5644 if (Instruction *
I =
5647 if (LF.PostIncLoops.
count(L)) {
5648 if (LF.isUseFullyOutsideLoop(L))
5649 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5655 for (
const Loop *PIL : LF.PostIncLoops) {
5656 if (PIL == L)
continue;
5661 if (!ExitingBlocks.
empty()) {
5663 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5670 "Insertion point must be a normal instruction");
5680 while (IP->isEHPad()) ++IP;
5685 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5693Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5695 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5696 if (LU.RigidFormula)
5697 return LF.OperandValToReplace;
5701 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5706 Rewriter.setPostInc(LF.PostIncLoops);
5711 Type *Ty =
F.getType();
5725 for (
const SCEV *
Reg :
F.BaseRegs) {
5726 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5734 Value *ICmpScaledV =
nullptr;
5736 const SCEV *ScaledS =
F.ScaledReg;
5742 if (LU.Kind == LSRUse::ICmpZero) {
5752 "The only scale supported by ICmpZero uses is -1!");
5753 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5761 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5771 Ops.push_back(ScaledS);
5797 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5798 "Expanding mismatched offsets\n");
5800 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5801 if (
Offset.isNonZero()) {
5802 if (LU.Kind == LSRUse::ICmpZero) {
5809 IntTy, -(uint64_t)
Offset.getFixedValue(),
true);
5818 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5823 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5824 if (UnfoldedOffset.isNonZero()) {
5826 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5830 const SCEV *FullS =
Ops.empty() ?
5841 if (LU.Kind == LSRUse::ICmpZero) {
5845 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5846 "a scale at the same time!");
5847 if (
F.Scale == -1) {
5848 if (ICmpScaledV->
getType() != OpTy) {
5858 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5859 "ICmp does not support folding a global value and "
5860 "a scale at the same time!");
5864 -(uint64_t)
Offset.getFixedValue(),
5866 if (
C->getType() != OpTy) {
5870 assert(
C &&
"Cast of ConstantInt should have folded");
5883void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
5884 const LSRFixup &LF,
const Formula &
F,
5885 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5886 DenseMap<BasicBlock *, Value *>
Inserted;
5890 bool needUpdateFixups =
false;
5901 Loop *PNLoop = LI.getLoopFor(Parent);
5902 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5908 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5909 .setMergeIdenticalEdges()
5910 .setKeepOneInputPHIs());
5913 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5924 if (
L->contains(BB) && !
L->contains(PN))
5932 needUpdateFixups =
true;
5937 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5950 LF.OperandValToReplace->
getType(),
"tmp",
5957 if (
L->contains(
I) && !
L->contains(BB))
5958 InsertedNonLCSSAInsts.insert(
I);
5961 Pair.first->second = FullV;
5968 if (needUpdateFixups) {
5969 for (LSRUse &LU :
Uses)
5970 for (LSRFixup &
Fixup : LU.Fixups)
5974 if (
Fixup.UserInst == PN) {
5977 bool foundInOriginalPHI =
false;
5979 if (val ==
Fixup.OperandValToReplace) {
5980 foundInOriginalPHI =
true;
5985 if (foundInOriginalPHI)
5996 if (val ==
Fixup.OperandValToReplace)
5997 Fixup.UserInst = NewPN;
6007void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6009 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6013 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6019 if (FullV->
getType() != OpTy) {
6031 if (LU.Kind == LSRUse::ICmpZero)
6047 const LSRFixup &
Fixup,
const LSRUse &LU,
6051 if (LU.Kind != LSRUse::Address)
6052 return IVIncInsertPos;
6056 Type *Ty =
I->getType();
6059 return IVIncInsertPos;
6066 return IVIncInsertPos;
6073void LSRInstance::ImplementSolution(
6074 const SmallVectorImpl<const Formula *> &Solution) {
6080 for (
const IVChain &Chain : IVChainVec) {
6086 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6087 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6090 Rewriter.setIVIncInsertPos(L, InsertPos);
6091 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6095 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6098 for (
const IVChain &Chain : IVChainVec) {
6099 GenerateIVChain(Chain, DeadInsts);
6103 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6121 for (PHINode &PN :
L->getHeader()->phis()) {
6122 BinaryOperator *BO =
nullptr;
6128 case Instruction::Sub:
6133 case Instruction::Add:
6150 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6159LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6160 DominatorTree &DT, LoopInfo &LI,
6161 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6162 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6163 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6166 :
TTI.getPreferredAddressingMode(
L, &SE)),
6169 if (!
L->isLoopSimplifyForm())
6177 unsigned NumUsers = 0;
6181 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6189 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6199 L->getHeader()->printAsOperand(
dbgs(),
false);
6205 HardwareLoopProfitable =
6206 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6210#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6213 Rewriter.disableCanonicalMode();
6214 Rewriter.enableLSRMode();
6218 OptimizeLoopTermCond();
6221 if (IU.empty())
return;
6224 if (!
L->isInnermost()) {
6237 CollectInterestingTypesAndFactors();
6238 CollectFixupsAndInitialFormulae();
6239 CollectLoopInvariantFixupsAndFormulae();
6245 print_uses(
dbgs()));
6247 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6251 GenerateAllReuseFormulae();
6253 FilterOutUndesirableDedicatedRegisters();
6254 NarrowSearchSpaceUsingHeuristics();
6264 if (Solution.
empty())
6269 for (
const LSRUse &LU :
Uses) {
6270 for (
const Formula &
F : LU.Formulae)
6272 F) &&
"Illegal formula generated!");
6277 ImplementSolution(Solution);
6280#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6281void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6282 if (Factors.empty() &&
Types.empty())
return;
6284 OS <<
"LSR has identified the following interesting factors and types: ";
6287 for (int64_t Factor : Factors)
6288 OS <<
LS <<
'*' << Factor;
6290 for (
Type *Ty : Types)
6291 OS <<
LS <<
'(' << *Ty <<
')';
6295void LSRInstance::print_fixups(raw_ostream &OS)
const {
6296 OS <<
"LSR is examining the following fixup sites:\n";
6297 for (
const LSRUse &LU :
Uses)
6298 for (
const LSRFixup &LF : LU.Fixups) {
6305void LSRInstance::print_uses(raw_ostream &OS)
const {
6306 OS <<
"LSR is examining the following uses:\n";
6307 for (
const LSRUse &LU :
Uses) {
6311 for (
const Formula &
F : LU.Formulae) {
6319void LSRInstance::print(raw_ostream &OS)
const {
6320 print_factors_and_types(OS);
6332class LoopStrengthReduce :
public LoopPass {
6336 LoopStrengthReduce();
6339 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6340 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6345LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6349void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6376ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6377 llvm::DIExpression::expr_op_iterator Begin =
6378 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6379 llvm::DIExpression::expr_op_iterator End =
6380 llvm::DIExpression::expr_op_iterator(Expr.
end());
6381 return {Begin, End};
6384struct SCEVDbgValueBuilder {
6385 SCEVDbgValueBuilder() =
default;
6386 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6388 void clone(
const SCEVDbgValueBuilder &
Base) {
6389 LocationOps =
Base.LocationOps;
6394 LocationOps.
clear();
6401 SmallVector<Value *, 2> LocationOps;
6404 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6411 unsigned ArgIndex = 0;
6412 if (It != LocationOps.
end()) {
6413 ArgIndex = std::distance(LocationOps.
begin(), It);
6415 ArgIndex = LocationOps.
size();
6421 void pushValue(
const SCEVUnknown *U) {
6426 bool pushConst(
const SCEVConstant *
C) {
6427 if (
C->getAPInt().getSignificantBits() > 64)
6429 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6430 Expr.
push_back(
C->getAPInt().getSExtValue());
6437 return ToDwarfOpIter(Expr);
6442 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6445 "Expected arithmetic SCEV type");
6447 unsigned EmitOperator = 0;
6448 for (
const auto &
Op : CommExpr->
operands()) {
6451 if (EmitOperator >= 1)
6452 pushOperator(DwarfOp);
6459 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6460 const llvm::SCEV *Inner =
C->getOperand(0);
6461 const llvm::Type *
Type =
C->getType();
6462 uint64_t ToWidth =
Type->getIntegerBitWidth();
6463 bool Success = pushSCEV(Inner);
6465 IsSigned ? llvm::dwarf::DW_ATE_signed
6466 : llvm::dwarf::DW_ATE_unsigned};
6467 for (
const auto &
Op : CastOps)
6473 bool pushSCEV(
const llvm::SCEV *S) {
6476 Success &= pushConst(StartInt);
6481 pushLocation(
U->getValue());
6484 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6487 Success &= pushSCEV(UDiv->getLHS());
6488 Success &= pushSCEV(UDiv->getRHS());
6489 pushOperator(llvm::dwarf::DW_OP_div);
6496 "Unexpected cast type in SCEV.");
6500 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6515 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6517 if (
C->getAPInt().getSignificantBits() > 64)
6519 int64_t
I =
C->getAPInt().getSExtValue();
6521 case llvm::dwarf::DW_OP_plus:
6522 case llvm::dwarf::DW_OP_minus:
6524 case llvm::dwarf::DW_OP_mul:
6525 case llvm::dwarf::DW_OP_div:
6538 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6544 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6545 if (!pushSCEV(Stride))
6547 pushOperator(llvm::dwarf::DW_OP_mul);
6549 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6550 if (!pushSCEV(Start))
6552 pushOperator(llvm::dwarf::DW_OP_plus);
6558 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6559 pushLocation(OffsetValue);
6562 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6563 << std::to_string(
Offset) <<
"\n");
6569 bool createIterCountExpr(
const SCEV *S,
6570 const SCEVDbgValueBuilder &IterationCount,
6571 ScalarEvolution &SE) {
6580 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6584 if (!Rec->isAffine())
6592 clone(IterationCount);
6593 if (!SCEVToValueExpr(*Rec, SE))
6604 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6605 ScalarEvolution &SE) {
6611 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6612 if (!pushSCEV(Start))
6614 pushOperator(llvm::dwarf::DW_OP_minus);
6616 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6617 if (!pushSCEV(Stride))
6619 pushOperator(llvm::dwarf::DW_OP_div);
6627 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6628 SmallVectorImpl<Value *> &DestLocations) {
6630 "Expected the locations vector to contain the IV");
6635 "Expected the location ops to contain the IV.");
6639 for (
const auto &
Op : LocationOps) {
6640 auto It =
find(DestLocations,
Op);
6641 if (It != DestLocations.
end()) {
6643 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6651 for (
const auto &
Op : expr_ops()) {
6653 Op.appendToVector(DestExpr);
6660 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6668struct DVIRecoveryRec {
6669 DVIRecoveryRec(DbgVariableRecord *DVR)
6670 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6672 DbgVariableRecord *DbgRef;
6674 bool HadLocationArgList;
6680 for (
auto &RE : RecoveryExprs)
6682 RecoveryExprs.clear();
6685 ~DVIRecoveryRec() { clear(); }
6693 auto expr_ops = ToDwarfOpIter(Expr);
6695 for (
auto Op : expr_ops)
6704template <
typename T>
6708 "contain any DW_OP_llvm_arg operands.");
6715template <
typename T>
6720 "Expected expression that references DIArglist locations using "
6721 "DW_OP_llvm_arg operands.");
6723 for (
Value *V : Locations)
6740 if (NumLLVMArgs == 0) {
6747 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6777 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6778 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6779 assert(DVIRec.Expr &&
"Expected an expression");
6784 if (!DVIRec.HadLocationArgList) {
6785 assert(DVIRec.LocationOps.size() == 1 &&
6786 "Unexpected number of location ops.");
6790 Value *CachedValue =
6795 for (
WeakVH VH : DVIRec.LocationOps) {
6803 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6808 const SCEV *SCEVInductionVar,
6809 SCEVDbgValueBuilder IterCountExpr) {
6823 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6825 NewLocationOps.
push_back(LSRInductionVar);
6827 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6828 WeakVH VH = DVIRec.LocationOps[i];
6834 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6836 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6844 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6845 <<
" refers to a location that is now undef or erased. "
6846 "Salvage abandoned.\n");
6850 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6851 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6853 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6854 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6858 if (std::optional<APInt>
Offset =
6860 if (
Offset->getSignificantBits() <= 64)
6861 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6864 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6873 assert(DVIRec.RecoveryExprs.size() == 1 &&
6874 "Expected only a single recovery expression for an empty "
6876 assert(DVIRec.RecoveryExprs[0] &&
6877 "Expected a SCEVDbgSalvageBuilder for location 0");
6878 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6879 B->appendToVectors(
NewExpr, NewLocationOps);
6881 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
6889 SCEVDbgValueBuilder *DbgBuilder =
6890 DVIRec.RecoveryExprs[LocationArgIndex].get();
6896 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6897 "Expected a positive index for the location-op position.");
6898 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6902 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6906 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6914 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6915 if (DVIToUpdate.empty())
6919 assert(SCEVInductionVar &&
6920 "Anticipated a SCEV for the post-LSR induction variable");
6924 if (!IVAddRec->isAffine())
6932 SCEVDbgValueBuilder IterCountExpr;
6933 IterCountExpr.pushLocation(LSRInductionVar);
6934 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6937 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6940 for (
auto &DVIRec : DVIToUpdate) {
6941 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6952 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6953 for (
const auto &
B : L->getBlocks()) {
6954 for (
auto &
I : *
B) {
6956 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6961 if (DbgVal.isKillLocation())
6966 const auto &HasTranslatableLocationOps =
6968 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6982 if (!HasTranslatableLocationOps(DbgVal))
6985 std::unique_ptr<DVIRecoveryRec> NewRec =
6986 std::make_unique<DVIRecoveryRec>(&DbgVal);
6990 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6991 for (
const auto LocOp : DbgVal.location_ops()) {
6992 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6993 NewRec->LocationOps.push_back(LocOp);
6994 NewRec->HadLocationArgList = DbgVal.hasArgList();
6996 SalvageableDVISCEVs.push_back(std::move(NewRec));
7006 const LSRInstance &LSR) {
7008 auto IsSuitableIV = [&](
PHINode *
P) {
7019 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7026 if (IsSuitableIV(
P))
7030 for (
PHINode &
P : L.getHeader()->phis()) {
7031 if (IsSuitableIV(&
P))
7049 std::unique_ptr<MemorySSAUpdater> MSSAU;
7051 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7054 const LSRInstance &Reducer =
7055 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7056 Changed |= Reducer.getChanged();
7063#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7066 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7080 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7094 if (SalvageableDVIRecords.
empty())
7100 for (
const auto &L : LI) {
7104 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7105 "could not be identified.\n");
7109 for (
auto &Rec : SalvageableDVIRecords)
7111 SalvageableDVIRecords.
clear();
7115bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7119 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7120 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7121 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7122 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7123 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7124 *
L->getHeader()->getParent());
7125 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7126 *
L->getHeader()->getParent());
7127 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7128 *
L->getHeader()->getParent());
7129 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7132 MSSA = &MSSAAnalysis->getMSSA();
7149char LoopStrengthReduce::ID = 0;
7152 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static Immediate ExtractImmediate(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< SCEVUse > &Good, SmallVectorImpl< SCEVUse > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static GlobalValue * ExtractSymbol(SCEVUse &S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Value * getCondition() const
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
ArrayRef< SCEVUse > operands() const
bool hasNoSignedWrap() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI ArrayRef< SCEVUse > operands() const
Return operands of this SCEV expression.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.