132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
348struct KeyOrderTargetImmediate {
349 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
350 if (
LHS.isScalable() && !
RHS.isScalable())
352 if (!
LHS.isScalable() &&
RHS.isScalable())
354 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
361struct KeyOrderSizeTAndImmediate {
362 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
363 const std::pair<size_t, Immediate> &
RHS)
const {
364 size_t LSize =
LHS.first;
365 size_t RSize =
RHS.first;
367 return LSize < RSize;
368 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
387 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
389 RegUsesTy RegUsesMap;
393 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
394 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
395 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
397 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
399 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
415RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
416 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
417 RegSortData &RSD = Pair.first->second;
420 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
421 RSD.UsedByIndices.
set(LUIdx);
425RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
426 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
427 assert(It != RegUsesMap.end());
428 RegSortData &RSD = It->second;
430 RSD.UsedByIndices.
reset(LUIdx);
434RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
435 assert(LUIdx <= LastLUIdx);
439 for (
auto &Pair : RegUsesMap) {
440 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441 if (LUIdx < UsedByIndices.
size())
442 UsedByIndices[LUIdx] =
443 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
444 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
449RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
450 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
451 if (
I == RegUsesMap.end())
453 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
455 if (i == -1)
return false;
456 if ((
size_t)i != LUIdx)
return true;
460const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
461 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
462 assert(
I != RegUsesMap.end() &&
"Unknown register!");
463 return I->second.UsedByIndices;
466void RegUseTracker::clear() {
477 GlobalValue *BaseGV =
nullptr;
480 Immediate BaseOffset = Immediate::getZero();
483 bool HasBaseReg =
false;
506 const SCEV *ScaledReg =
nullptr;
511 Immediate UnfoldedOffset = Immediate::getZero();
515 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
519 void canonicalize(
const Loop &L);
523 bool hasZeroEnd()
const;
525 bool countsDownToZero()
const;
527 size_t getNumRegs()
const;
530 void deleteBaseReg(
const SCEV *&S);
532 bool referencesReg(
const SCEV *S)
const;
533 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
534 const RegUseTracker &RegUses)
const;
536 void print(raw_ostream &OS)
const;
555 for (
const SCEV *S :
Add->operands())
561 const SCEV *Start, *Step;
576 if (
Mul->getOperand(0)->isAllOnesValue()) {
585 for (
const SCEV *S : MyGood)
587 for (
const SCEV *S : MyBad)
599void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
606 BaseRegs.push_back(Sum);
612 BaseRegs.push_back(Sum);
627bool Formula::isCanonical(
const Loop &L)
const {
628 assert((Scale == 0 || ScaledReg) &&
629 "ScaledReg must be non-null if Scale is non-zero");
632 return BaseRegs.size() <= 1;
637 if (Scale == 1 && BaseRegs.empty())
646 return none_of(BaseRegs, [&L](
const SCEV *S) {
657void Formula::canonicalize(
const Loop &L) {
661 if (BaseRegs.empty()) {
663 assert(ScaledReg &&
"Expected 1*reg => reg");
664 assert(Scale == 1 &&
"Expected 1*reg => reg");
665 BaseRegs.push_back(ScaledReg);
673 ScaledReg = BaseRegs.pop_back_val();
681 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
684 if (
I != BaseRegs.end())
694bool Formula::unscale() {
698 BaseRegs.push_back(ScaledReg);
703bool Formula::hasZeroEnd()
const {
704 if (UnfoldedOffset || BaseOffset)
706 if (BaseRegs.size() != 1 || ScaledReg)
711bool Formula::countsDownToZero()
const {
714 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
715 const APInt *StepInt;
723size_t Formula::getNumRegs()
const {
724 return !!ScaledReg + BaseRegs.size();
729Type *Formula::getType()
const {
730 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
731 ScaledReg ? ScaledReg->
getType() :
737void Formula::deleteBaseReg(
const SCEV *&S) {
738 if (&S != &BaseRegs.back())
744bool Formula::referencesReg(
const SCEV *S)
const {
750bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
751 const RegUseTracker &RegUses)
const {
753 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
755 for (
const SCEV *BaseReg : BaseRegs)
756 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
761#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
762void Formula::print(raw_ostream &OS)
const {
763 ListSeparator
Plus(
" + ");
768 if (BaseOffset.isNonZero())
769 OS <<
Plus << BaseOffset;
771 for (
const SCEV *BaseReg : BaseRegs)
774 if (HasBaseReg && BaseRegs.empty())
775 OS <<
Plus <<
"**error: HasBaseReg**";
776 else if (!HasBaseReg && !BaseRegs.empty())
777 OS <<
Plus <<
"**error: !HasBaseReg**";
780 OS <<
Plus << Scale <<
"*reg(";
787 if (UnfoldedOffset.isNonZero())
788 OS <<
Plus <<
"imm(" << UnfoldedOffset <<
')';
828 bool IgnoreSignificantBits =
false) {
839 if (
RA.isAllOnes()) {
840 if (
LHS->getType()->isPointerTy())
853 const APInt &LA =
C->getAPInt();
862 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
864 IgnoreSignificantBits);
865 if (!Step)
return nullptr;
867 IgnoreSignificantBits);
868 if (!Start)
return nullptr;
881 for (
const SCEV *S :
Add->operands()) {
883 if (!
Op)
return nullptr;
911 for (
const SCEV *S :
Mul->operands()) {
914 IgnoreSignificantBits)) {
934 if (
C->getSignificantBits() <= 64) {
936 return Immediate::getFixed(
C->getSExtValue());
941 if (Result.isNonZero())
947 if (Result.isNonZero())
955 return Immediate::getScalable(
C->getSExtValue());
957 return Immediate::getZero();
992 if (
SI->getPointerOperand() == OperandVal)
997 switch (
II->getIntrinsicID()) {
998 case Intrinsic::memset:
999 case Intrinsic::prefetch:
1000 case Intrinsic::masked_load:
1001 if (
II->getArgOperand(0) == OperandVal)
1004 case Intrinsic::masked_store:
1005 if (
II->getArgOperand(1) == OperandVal)
1008 case Intrinsic::memmove:
1009 case Intrinsic::memcpy:
1010 if (
II->getArgOperand(0) == OperandVal ||
1011 II->getArgOperand(1) == OperandVal)
1016 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1017 if (IntrInfo.
PtrVal == OperandVal)
1023 if (RMW->getPointerOperand() == OperandVal)
1026 if (CmpX->getPointerOperand() == OperandVal)
1035 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1039 AccessTy.MemTy = Ty;
1043 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1045 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1047 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1049 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1051 switch (
II->getIntrinsicID()) {
1052 case Intrinsic::prefetch:
1053 case Intrinsic::memset:
1054 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1055 AccessTy.MemTy = OperandVal->
getType();
1057 case Intrinsic::memmove:
1058 case Intrinsic::memcpy:
1060 AccessTy.MemTy = OperandVal->
getType();
1062 case Intrinsic::masked_load:
1063 AccessTy.AddrSpace =
1064 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1066 case Intrinsic::masked_store:
1067 AccessTy.AddrSpace =
1068 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1072 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1128 if (!Processed.
insert(S).second)
1132 for (
const SCEV *S :
Add->operands()) {
1139 const SCEV *Op0, *Op1;
1148 Value *UVal = U->getValue();
1152 if (UI && UI->
getOpcode() == Instruction::Mul &&
1185 const LSRUse &LU,
const Formula &
F);
1189 const LSRUse &LU,
const Formula &
F,
1196 const Loop *
L =
nullptr;
1197 ScalarEvolution *SE =
nullptr;
1198 const TargetTransformInfo *
TTI =
nullptr;
1199 TargetTransformInfo::LSRCost
C;
1204 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1206 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1224 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1225 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1226 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1227 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1233 return C.NumRegs == ~0
u;
1236 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1237 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1238 bool HardwareLoopProfitable,
1239 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1241 void print(raw_ostream &OS)
const;
1245 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1246 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1247 bool HardwareLoopProfitable);
1248 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1249 SmallPtrSetImpl<const SCEV *> &Regs,
1250 const LSRUse &LU,
bool HardwareLoopProfitable,
1251 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1262 Value *OperandValToReplace =
nullptr;
1272 Immediate
Offset = Immediate::getZero();
1274 LSRFixup() =
default;
1276 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1278 void print(raw_ostream &OS)
const;
1288 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1301 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1304 MemAccessTy AccessTy;
1310 Immediate MinOffset = Immediate::getFixedMax();
1311 Immediate MaxOffset = Immediate::getFixedMin();
1315 bool AllFixupsOutsideLoop =
true;
1320 bool AllFixupsUnconditional =
true;
1327 bool RigidFormula =
false;
1333 Type *WidestFixupType =
nullptr;
1341 SmallPtrSet<const SCEV *, 4> Regs;
1343 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1345 LSRFixup &getNewFixup() {
1346 Fixups.push_back(LSRFixup());
1350 void pushFixup(LSRFixup &f) {
1352 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1353 MaxOffset =
f.Offset;
1354 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1355 MinOffset =
f.Offset;
1358 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1359 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1360 bool InsertFormula(
const Formula &
F,
const Loop &L);
1361 void DeleteFormula(Formula &
F);
1362 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1364 void print(raw_ostream &OS)
const;
1371 LSRUse::KindType Kind, MemAccessTy AccessTy,
1372 GlobalValue *BaseGV, Immediate BaseOffset,
1373 bool HasBaseReg, int64_t Scale,
1374 Instruction *
Fixup =
nullptr);
1387 [&](
unsigned i,
const SCEV *
Reg) {
1388 return i + getSetupCost(Reg, Depth - 1);
1397void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1398 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1399 bool HardwareLoopProfitable) {
1404 if (AR->getLoop() != L) {
1411 if (!AR->getLoop()->contains(L)) {
1421 unsigned LoopCost = 1;
1430 F.BaseOffset.isFixed() &&
1431 *Step ==
F.BaseOffset.getFixedValue();
1436 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1443 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1444 HardwareLoopProfitable)
1446 C.AddRecCost += LoopCost;
1451 if (!Regs.
count(AR->getOperand(1))) {
1452 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1464 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1473void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1474 SmallPtrSetImpl<const SCEV *> &Regs,
1475 const LSRUse &LU,
bool HardwareLoopProfitable,
1476 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1477 if (LoserRegs && LoserRegs->
count(
Reg)) {
1482 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1483 if (LoserRegs && isLoser())
1488void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1489 const DenseSet<const SCEV *> &VisitedRegs,
1490 const LSRUse &LU,
bool HardwareLoopProfitable,
1491 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1494 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1496 unsigned PrevAddRecCost =
C.AddRecCost;
1497 unsigned PrevNumRegs =
C.NumRegs;
1498 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1499 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1500 if (VisitedRegs.
count(ScaledReg)) {
1504 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1509 for (
const SCEV *BaseReg :
F.BaseRegs) {
1510 if (VisitedRegs.
count(BaseReg)) {
1514 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1521 size_t NumBaseParts =
F.getNumRegs();
1522 if (NumBaseParts > 1)
1527 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1533 for (
const LSRFixup &
Fixup : LU.Fixups) {
1534 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1535 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1539 else if (
Offset.isNonZero())
1541 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1545 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1566 if (
C.NumRegs > TTIRegNum) {
1569 if (PrevNumRegs > TTIRegNum)
1570 C.Insns += (
C.NumRegs - PrevNumRegs);
1572 C.Insns += (
C.NumRegs - TTIRegNum);
1585 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1589 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1592 if (LU.Kind != LSRUse::ICmpZero)
1593 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1599 C.Insns = std::numeric_limits<unsigned>::max();
1600 C.NumRegs = std::numeric_limits<unsigned>::max();
1601 C.AddRecCost = std::numeric_limits<unsigned>::max();
1602 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1603 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1604 C.ImmCost = std::numeric_limits<unsigned>::max();
1605 C.SetupCost = std::numeric_limits<unsigned>::max();
1606 C.ScaleCost = std::numeric_limits<unsigned>::max();
1610bool Cost::isLess(
const Cost &
Other)
const {
1612 C.Insns !=
Other.C.Insns)
1613 return C.Insns <
Other.C.Insns;
1617#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1620 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1621 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1622 if (
C.AddRecCost != 0)
1623 OS <<
", with addrec cost " <<
C.AddRecCost;
1624 if (
C.NumIVMuls != 0)
1625 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1626 << (
C.NumIVMuls == 1 ?
"" :
"s");
1627 if (
C.NumBaseAdds != 0)
1628 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1629 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1630 if (
C.ScaleCost != 0)
1631 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1633 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1634 if (
C.SetupCost != 0)
1635 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1644bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1647 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1648 if (PN->getIncomingValue(i) == OperandValToReplace &&
1649 L->contains(PN->getIncomingBlock(i)))
1654 return !
L->contains(UserInst);
1657#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1658void LSRFixup::print(raw_ostream &OS)
const {
1663 Store->getOperand(0)->printAsOperand(OS,
false);
1669 OS <<
", OperandValToReplace=";
1672 for (
const Loop *PIL : PostIncLoops) {
1673 OS <<
", PostIncLoop=";
1674 PIL->getHeader()->printAsOperand(OS,
false);
1678 OS <<
", Offset=" <<
Offset;
1688bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1690 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1697float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1699 for (
const Formula &
F : Formulae)
1700 if (
F.referencesReg(
Reg))
1702 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1707bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1708 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1710 if (!Formulae.empty() && RigidFormula)
1714 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1722 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1723 "Zero allocated in a scaled register!");
1725 for (
const SCEV *BaseReg :
F.BaseRegs)
1726 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1730 Formulae.push_back(
F);
1741void LSRUse::DeleteFormula(Formula &
F) {
1742 if (&
F != &Formulae.back())
1744 Formulae.pop_back();
1748void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1750 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1752 for (
const Formula &
F : Formulae) {
1753 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1758 for (
const SCEV *S : OldRegs)
1760 RegUses.dropRegister(S, LUIdx);
1763#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1764void LSRUse::print(raw_ostream &OS)
const {
1765 OS <<
"LSR Use: Kind=";
1767 case Basic: OS <<
"Basic";
break;
1768 case Special: OS <<
"Special";
break;
1769 case ICmpZero: OS <<
"ICmpZero";
break;
1771 OS <<
"Address of ";
1775 OS << *AccessTy.MemTy;
1778 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1781 OS <<
", Offsets={";
1782 bool NeedComma =
false;
1783 for (
const LSRFixup &
Fixup : Fixups) {
1784 if (NeedComma) OS <<
',';
1790 if (AllFixupsOutsideLoop)
1791 OS <<
", all-fixups-outside-loop";
1793 if (AllFixupsUnconditional)
1794 OS <<
", all-fixups-unconditional";
1796 if (WidestFixupType)
1797 OS <<
", widest fixup type: " << *WidestFixupType;
1806 LSRUse::KindType Kind, MemAccessTy AccessTy,
1808 bool HasBaseReg, int64_t Scale,
1811 case LSRUse::Address: {
1812 int64_t FixedOffset =
1813 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1814 int64_t ScalableOffset =
1815 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1816 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1817 HasBaseReg, Scale, AccessTy.AddrSpace,
1818 Fixup, ScalableOffset);
1820 case LSRUse::ICmpZero:
1827 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1832 if (Scale != 0 && Scale != -1)
1837 if (BaseOffset.isNonZero()) {
1840 if (BaseOffset.isScalable())
1850 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1851 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1859 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1861 case LSRUse::Special:
1863 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1870 Immediate MinOffset, Immediate MaxOffset,
1871 LSRUse::KindType Kind, MemAccessTy AccessTy,
1873 bool HasBaseReg, int64_t Scale) {
1874 if (BaseOffset.isNonZero() &&
1875 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1876 BaseOffset.isScalable() != MaxOffset.isScalable()))
1879 int64_t
Base = BaseOffset.getKnownMinValue();
1880 int64_t Min = MinOffset.getKnownMinValue();
1881 int64_t Max = MaxOffset.getKnownMinValue();
1884 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1887 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1890 HasBaseReg, Scale) &&
1896 Immediate MinOffset, Immediate MaxOffset,
1897 LSRUse::KindType Kind, MemAccessTy AccessTy,
1898 const Formula &
F,
const Loop &L) {
1906 assert((
F.isCanonical(L) ||
F.Scale != 0));
1908 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1913 Immediate MaxOffset, LSRUse::KindType Kind,
1915 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1918 BaseOffset, HasBaseReg, Scale) ||
1923 BaseGV, BaseOffset,
true, 0));
1927 Immediate MaxOffset, LSRUse::KindType Kind,
1928 MemAccessTy AccessTy,
const Formula &
F) {
1929 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1930 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1936 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1938 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1942 const LSRUse &LU,
const Formula &
F) {
1944 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1945 for (
const LSRFixup &
Fixup : LU.Fixups)
1947 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1948 F.Scale,
Fixup.UserInst))
1954 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1959 const LSRUse &LU,
const Formula &
F,
1968 return F.Scale != 1;
1971 case LSRUse::Address: {
1973 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1974 if (
F.BaseOffset.isScalable()) {
1975 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1976 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1978 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1979 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1983 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1986 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1989 "Legal addressing mode has an illegal cost!");
1990 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1992 case LSRUse::ICmpZero:
1994 case LSRUse::Special:
2004 LSRUse::KindType Kind, MemAccessTy AccessTy,
2008 if (BaseOffset.isZero() && !BaseGV)
2013 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2017 if (!HasBaseReg && Scale == 1) {
2027 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2037 Immediate MaxOffset, LSRUse::KindType Kind,
2038 MemAccessTy AccessTy,
const SCEV *S,
2041 if (S->
isZero())
return true;
2049 if (!S->
isZero())
return false;
2052 if (BaseOffset.isZero() && !BaseGV)
2055 if (BaseOffset.isScalable())
2060 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2063 BaseOffset, HasBaseReg, Scale);
2080 const SCEV *IncExpr;
2082 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2083 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2090 const SCEV *ExprBase =
nullptr;
2092 IVChain() =
default;
2093 IVChain(
const IVInc &Head,
const SCEV *
Base)
2094 : Incs(1, Head), ExprBase(
Base) {}
2099 const_iterator
begin()
const {
2101 return std::next(Incs.
begin());
2103 const_iterator
end()
const {
2108 bool hasIncs()
const {
return Incs.
size() >= 2; }
2117 bool isProfitableIncrement(
const SCEV *OperExpr,
2118 const SCEV *IncExpr,
2126 SmallPtrSet<Instruction*, 4> FarUsers;
2127 SmallPtrSet<Instruction*, 4> NearUsers;
2133 ScalarEvolution &SE;
2136 AssumptionCache &AC;
2137 TargetLibraryInfo &TLI;
2138 const TargetTransformInfo &
TTI;
2140 MemorySSAUpdater *MSSAU;
2144 bool HardwareLoopProfitable =
false;
2158 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2165 SmallSetVector<Type *, 4> Types;
2171 RegUseTracker RegUses;
2176 static const unsigned MaxChains = 8;
2182 SmallPtrSet<Use*, MaxChains> IVIncSet;
2185 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2191 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2193 void OptimizeShadowIV();
2194 bool FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse);
2196 void OptimizeLoopTermCond();
2198 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2199 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2200 void FinalizeChain(IVChain &Chain);
2201 void CollectChains();
2202 void GenerateIVChain(
const IVChain &Chain,
2203 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2205 void CollectInterestingTypesAndFactors();
2206 void CollectFixupsAndInitialFormulae();
2209 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2212 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2213 LSRUse::KindType Kind, MemAccessTy AccessTy);
2215 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2216 MemAccessTy AccessTy);
2218 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2220 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2222 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2223 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2224 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2225 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2226 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2228 void CollectLoopInvariantFixupsAndFormulae();
2230 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2231 unsigned Depth = 0);
2233 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2235 size_t Idx,
bool IsScaledReg =
false);
2236 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2237 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2238 const Formula &
Base,
size_t Idx,
2239 bool IsScaledReg =
false);
2240 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2241 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2242 const Formula &
Base,
2243 const SmallVectorImpl<Immediate> &Worklist,
2244 size_t Idx,
bool IsScaledReg =
false);
2245 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2246 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2247 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2248 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateCrossUseConstantOffsets();
2250 void GenerateAllReuseFormulae();
2252 void FilterOutUndesirableDedicatedRegisters();
2254 size_t EstimateSearchSpaceComplexity()
const;
2255 void NarrowSearchSpaceByDetectingSupersets();
2256 void NarrowSearchSpaceByCollapsingUnrolledCode();
2257 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2258 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2259 void NarrowSearchSpaceByFilterPostInc();
2260 void NarrowSearchSpaceByDeletingCostlyFormulas();
2261 void NarrowSearchSpaceByPickingWinnerRegs();
2262 void NarrowSearchSpaceUsingHeuristics();
2264 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2266 SmallVectorImpl<const Formula *> &Workspace,
2267 const Cost &CurCost,
2268 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2269 DenseSet<const SCEV *> &VisitedRegs)
const;
2270 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2274 const SmallVectorImpl<Instruction *> &Inputs)
const;
2277 const LSRUse &LU)
const;
2279 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2281 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2282 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2284 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2285 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2286 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2287 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2290 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2291 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2292 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2294 bool getChanged()
const {
return Changed; }
2295 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2296 return ScalarEvolutionIVs;
2299 void print_factors_and_types(raw_ostream &OS)
const;
2300 void print_fixups(raw_ostream &OS)
const;
2301 void print_uses(raw_ostream &OS)
const;
2302 void print(raw_ostream &OS)
const;
2310void LSRInstance::OptimizeShadowIV() {
2320 Type *DestTy =
nullptr;
2321 bool IsSigned =
false;
2337 DestTy = UCast->getDestTy();
2341 DestTy = SCast->getDestTy();
2343 if (!DestTy)
continue;
2363 if (Mantissa == -1)
continue;
2367 unsigned Entry, Latch;
2377 if (!Init)
continue;
2378 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2382 BinaryOperator *Incr =
2384 if (!Incr)
continue;
2385 if (Incr->
getOpcode() != Instruction::Add
2386 && Incr->
getOpcode() != Instruction::Sub)
2390 ConstantInt *
C =
nullptr;
2402 if (!
C->getValue().isStrictlyPositive())
2410 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2412 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2413 : Instruction::FSub,
2430bool LSRInstance::FindIVUserForCond(Instruction *
Cond, IVStrideUse *&CondUse) {
2431 for (IVStrideUse &U : IU)
2432 if (
U.getUser() ==
Cond) {
2490Instruction *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2505 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2506 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2512 const SCEVNAryExpr *
Max =
nullptr;
2514 Pred = ICmpInst::ICMP_SLE;
2517 Pred = ICmpInst::ICMP_SLT;
2520 Pred = ICmpInst::ICMP_ULT;
2529 if (
Max->getNumOperands() != 2)
2532 const SCEV *MaxLHS =
Max->getOperand(0);
2533 const SCEV *MaxRHS =
Max->getOperand(1);
2538 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2549 "Loop condition operand is an addrec in a different loop!");
2553 Value *NewRHS =
nullptr;
2554 if (ICmpInst::isTrueWhenEqual(Pred)) {
2558 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2559 NewRHS = BO->getOperand(0);
2562 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2563 NewRHS = BO->getOperand(0);
2571 NewRHS = SU->getValue();
2583 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2584 Cond->getOperand(0), NewRHS,
"scmp");
2588 Cond->replaceAllUsesWith(NewCond);
2591 Cond->eraseFromParent();
2593 if (
Cmp->use_empty()) {
2595 Cmp->eraseFromParent();
2602LSRInstance::OptimizeLoopTermCond() {
2603 SmallPtrSet<Instruction *, 4> PostIncs;
2618 SmallVector<BasicBlock*, 8> ExitingBlocks;
2619 L->getExitingBlocks(ExitingBlocks);
2627 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2649 IVStrideUse *CondUse =
nullptr;
2650 if (!FindIVUserForCond(
Cond, CondUse))
2660 Cond = OptimizeMax(Cmp, CondUse);
2665 if (!DT.
dominates(ExitingBlock, LatchBlock))
2670 if (LatchBlock != ExitingBlock)
2671 for (
const IVStrideUse &UI : IU)
2674 if (&UI != CondUse &&
2678 const SCEV *
A = IU.getStride(*CondUse, L);
2679 const SCEV *
B = IU.getStride(UI, L);
2680 if (!
A || !
B)
continue;
2689 if (
const SCEVConstant *
D =
2691 const ConstantInt *
C =
D->getValue();
2693 if (
C->isOne() ||
C->isMinusOne())
2694 goto decline_post_inc;
2696 if (
C->getValue().getSignificantBits() >= 64 ||
2697 C->getValue().isMinSignedValue())
2698 goto decline_post_inc;
2701 MemAccessTy AccessTy =
2703 int64_t Scale =
C->getSExtValue();
2707 AccessTy.AddrSpace))
2708 goto decline_post_inc;
2713 AccessTy.AddrSpace))
2714 goto decline_post_inc;
2719 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2727 if (
Cond->hasOneUse()) {
2733 Cond->setName(
L->getHeader()->getName() +
".termcond");
2755 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2756 for (Instruction *Inst : PostIncs)
2762bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2763 bool HasBaseReg, LSRUse::KindType Kind,
2764 MemAccessTy AccessTy) {
2765 Immediate NewMinOffset = LU.MinOffset;
2766 Immediate NewMaxOffset = LU.MaxOffset;
2767 MemAccessTy NewAccessTy = AccessTy;
2772 if (LU.Kind != Kind)
2778 if (Kind == LSRUse::Address) {
2779 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2780 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2781 AccessTy.AddrSpace);
2786 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2788 LU.MaxOffset - NewOffset, HasBaseReg))
2790 NewMinOffset = NewOffset;
2791 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2793 NewOffset - LU.MinOffset, HasBaseReg))
2795 NewMaxOffset = NewOffset;
2801 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2802 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2806 LU.MinOffset = NewMinOffset;
2807 LU.MaxOffset = NewMaxOffset;
2808 LU.AccessTy = NewAccessTy;
2815std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2816 LSRUse::KindType Kind,
2817 MemAccessTy AccessTy) {
2818 const SCEV *
Copy = Expr;
2825 Offset = Immediate::getFixed(0);
2828 std::pair<UseMapTy::iterator, bool>
P =
2829 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2832 size_t LUIdx =
P.first->second;
2833 LSRUse &LU =
Uses[LUIdx];
2834 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2836 return std::make_pair(LUIdx,
Offset);
2840 size_t LUIdx =
Uses.size();
2841 P.first->second = LUIdx;
2842 Uses.push_back(LSRUse(Kind, AccessTy));
2843 LSRUse &LU =
Uses[LUIdx];
2847 return std::make_pair(LUIdx,
Offset);
2851void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2852 if (&LU != &
Uses.back())
2857 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2863LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2864 const LSRUse &OrigLU) {
2866 for (LSRUse &LU :
Uses) {
2872 if (&LU != &OrigLU &&
2873 LU.Kind != LSRUse::ICmpZero &&
2874 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2875 LU.WidestFixupType == OrigLU.WidestFixupType &&
2876 LU.HasFormulaWithSameRegs(OrigF)) {
2878 for (
const Formula &
F : LU.Formulae) {
2881 if (
F.BaseRegs == OrigF.BaseRegs &&
2882 F.ScaledReg == OrigF.ScaledReg &&
2883 F.BaseGV == OrigF.BaseGV &&
2884 F.Scale == OrigF.Scale &&
2885 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2886 if (
F.BaseOffset.isZero())
2901void LSRInstance::CollectInterestingTypesAndFactors() {
2902 SmallSetVector<const SCEV *, 4> Strides;
2906 for (
const IVStrideUse &U : IU) {
2907 const SCEV *Expr = IU.getExpr(U);
2925 }
while (!Worklist.
empty());
2929 for (SmallSetVector<const SCEV *, 4>::const_iterator
2931 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2932 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2933 const SCEV *OldStride = *
I;
2934 const SCEV *NewStride = *NewStrideIter;
2944 if (
const SCEVConstant *Factor =
2947 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2948 Factors.insert(Factor->getAPInt().getSExtValue());
2949 }
else if (
const SCEVConstant *Factor =
2953 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2954 Factors.insert(Factor->getAPInt().getSExtValue());
2960 if (Types.size() == 1)
2972 for(; OI != OE; ++OI) {
2991 return Trunc->getOperand(0);
3024 if (SubExpr->getSCEVType() ==
scAddExpr)
3027 if (SubExpr->getSCEVType() !=
scMulExpr)
3043bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3044 const SCEV *IncExpr,
3045 ScalarEvolution &SE) {
3058 SmallPtrSet<const SCEV*, 8> Processed;
3079 if (!Chain.hasIncs())
3082 if (!
Users.empty()) {
3083 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3085 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3088 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3097 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3100 const SCEV *LastIncExpr =
nullptr;
3101 unsigned NumConstIncrements = 0;
3102 unsigned NumVarIncrements = 0;
3103 unsigned NumReusedIncrements = 0;
3105 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3108 for (
const IVInc &Inc : Chain) {
3109 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3111 if (Inc.IncExpr->isZero())
3117 ++NumConstIncrements;
3121 if (Inc.IncExpr == LastIncExpr)
3122 ++NumReusedIncrements;
3126 LastIncExpr = Inc.IncExpr;
3131 if (NumConstIncrements > 1)
3138 cost += NumVarIncrements;
3142 cost -= NumReusedIncrements;
3144 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3151void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3152 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3156 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3157 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3161 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3162 const SCEV *LastIncExpr =
nullptr;
3163 for (; ChainIdx < NChains; ++ChainIdx) {
3164 IVChain &Chain = IVChainVec[ChainIdx];
3182 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3183 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3187 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3188 LastIncExpr = IncExpr;
3194 if (ChainIdx == NChains) {
3201 LastIncExpr = OperExpr;
3208 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3210 ChainUsersVec.
resize(NChains);
3211 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3212 <<
") IV=" << *LastIncExpr <<
"\n");
3214 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3215 <<
") IV+" << *LastIncExpr <<
"\n");
3217 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3219 IVChain &Chain = IVChainVec[ChainIdx];
3221 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3223 if (!LastIncExpr->
isZero()) {
3224 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3233 for (User *U : IVOper->
users()) {
3239 IVChain::const_iterator IncIter = Chain.Incs.begin();
3240 IVChain::const_iterator IncEnd = Chain.Incs.end();
3241 for( ; IncIter != IncEnd; ++IncIter) {
3242 if (IncIter->UserInst == OtherUse)
3245 if (IncIter != IncEnd)
3250 && IU.isIVUserOrOperand(OtherUse)) {
3253 NearUsers.
insert(OtherUse);
3258 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3283void LSRInstance::CollectChains() {
3287 SmallVector<BasicBlock *,8> LatchPath;
3290 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3296 for (BasicBlock *BB :
reverse(LatchPath)) {
3297 for (Instruction &
I : *BB) {
3309 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3310 ChainIdx < NChains; ++ChainIdx) {
3311 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3314 SmallPtrSet<Instruction*, 4> UniqueOperands;
3317 while (IVOpIter != IVOpEnd) {
3319 if (UniqueOperands.
insert(IVOpInst).second)
3320 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3321 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3326 for (PHINode &PN :
L->getHeader()->phis()) {
3333 ChainInstruction(&PN, IncV, ChainUsersVec);
3336 unsigned ChainIdx = 0;
3337 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3338 UsersIdx < NChains; ++UsersIdx) {
3340 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3343 if (ChainIdx != UsersIdx)
3344 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3345 FinalizeChain(IVChainVec[ChainIdx]);
3348 IVChainVec.resize(ChainIdx);
3351void LSRInstance::FinalizeChain(IVChain &Chain) {
3352 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3353 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3355 for (
const IVInc &Inc : Chain) {
3357 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3358 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3359 IVIncSet.insert(UseI);
3367 Immediate IncOffset = Immediate::getZero();
3376 C->getSignificantBits() > 64)
3378 IncOffset = Immediate::getScalable(
C->getSExtValue());
3394void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3395 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3398 const IVInc &Head = Chain.Incs[0];
3403 Value *IVSrc =
nullptr;
3404 while (IVOpIter != IVOpEnd) {
3415 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3416 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3419 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3421 if (IVOpIter == IVOpEnd) {
3423 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3426 assert(IVSrc &&
"Failed to find IV chain source");
3431 const SCEV *LeftOverExpr =
nullptr;
3432 const SCEV *Accum = SE.
getZero(IntTy);
3436 for (
const IVInc &Inc : Chain) {
3439 InsertPt =
L->getLoopLatch()->getTerminator();
3443 Value *IVOper = IVSrc;
3444 if (!Inc.IncExpr->isZero()) {
3449 LeftOverExpr = LeftOverExpr ?
3450 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3454 bool FoundBase =
false;
3455 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3456 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3458 if (!Remainder->
isZero()) {
3460 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3461 const SCEV *IVOperExpr =
3463 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3472 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3475 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3478 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3482 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3485 LeftOverExpr =
nullptr;
3489 if (IVTy != OperTy) {
3491 "cannot extend a chained IV");
3493 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3495 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3502 for (PHINode &Phi :
L->getHeader()->phis()) {
3506 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3509 Value *IVOper = IVSrc;
3511 if (IVTy != PostIncTy) {
3513 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3514 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3515 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3517 Phi.replaceUsesOfWith(PostIncV, IVOper);
3523void LSRInstance::CollectFixupsAndInitialFormulae() {
3524 BranchInst *ExitBranch =
nullptr;
3525 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3528 SmallPtrSet<const SCEV *, 16> Regs;
3529 DenseSet<const SCEV *> VisitedRegs;
3530 DenseSet<size_t> VisitedLSRUse;
3532 for (
const IVStrideUse &U : IU) {
3537 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3538 if (IVIncSet.count(UseI)) {
3539 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3543 LSRUse::KindType
Kind = LSRUse::Basic;
3544 MemAccessTy AccessTy;
3546 Kind = LSRUse::Address;
3550 const SCEV *S = IU.getExpr(U);
3566 if (CI->isEquality()) {
3569 Value *
NV = CI->getOperand(1);
3570 if (NV ==
U.getOperandValToReplace()) {
3571 CI->setOperand(1, CI->getOperand(0));
3572 CI->setOperand(0, NV);
3573 NV = CI->getOperand(1);
3580 (!
NV->getType()->isPointerTy() ||
3587 Kind = LSRUse::ICmpZero;
3589 }
else if (
L->isLoopInvariant(NV) &&
3592 !
NV->getType()->isPointerTy()) {
3603 Kind = LSRUse::ICmpZero;
3610 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3611 if (Factors[i] != -1)
3612 Factors.insert(-(uint64_t)Factors[i]);
3618 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3619 size_t LUIdx =
P.first;
3621 LSRUse &LU =
Uses[LUIdx];
3624 LSRFixup &LF = LU.getNewFixup();
3625 LF.UserInst = UserInst;
3626 LF.OperandValToReplace =
U.getOperandValToReplace();
3627 LF.PostIncLoops = TmpPostIncLoops;
3629 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3630 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3633 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3635 F.initialMatch(S, L, SE);
3636 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3637 HardwareLoopProfitable);
3638 VisitedLSRUse.
insert(LUIdx);
3641 if (!LU.WidestFixupType ||
3644 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3647 if (LU.Formulae.empty()) {
3648 InsertInitialFormula(S, LU, LUIdx);
3649 CountRegisters(LU.Formulae.back(), LUIdx);
3658void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3662 LU.RigidFormula =
true;
3665 F.initialMatch(S, L, SE);
3666 bool Inserted = InsertFormula(LU, LUIdx,
F);
3667 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3673LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3674 LSRUse &LU,
size_t LUIdx) {
3676 F.BaseRegs.push_back(S);
3677 F.HasBaseReg =
true;
3678 bool Inserted = InsertFormula(LU, LUIdx,
F);
3679 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3683void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3685 RegUses.countRegister(
F.ScaledReg, LUIdx);
3686 for (
const SCEV *BaseReg :
F.BaseRegs)
3687 RegUses.countRegister(BaseReg, LUIdx);
3692bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3695 "Formula is illegal");
3697 if (!LU.InsertFormula(
F, *L))
3700 CountRegisters(
F, LUIdx);
3706bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3718LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3720 SmallPtrSet<const SCEV *, 32> Visited;
3727 while (!Worklist.
empty()) {
3731 if (!Visited.
insert(S).second)
3742 const Value *
V = US->getValue();
3745 if (
L->contains(Inst))
continue;
3749 for (
const Use &U :
V->uses()) {
3759 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3781 bool HasIncompatibleEHPTerminatedBlock =
false;
3783 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3784 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3785 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3786 HasIncompatibleEHPTerminatedBlock =
true;
3791 if (HasIncompatibleEHPTerminatedBlock) {
3814 unsigned OtherIdx = !
U.getOperandNo();
3815 Value *OtherOp = ICI->getOperand(OtherIdx);
3825 std::pair<size_t, Immediate>
P =
3826 getUse(S, LSRUse::Basic, MemAccessTy());
3827 size_t LUIdx =
P.first;
3829 LSRUse &LU =
Uses[LUIdx];
3830 LSRFixup &LF = LU.getNewFixup();
3831 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3832 LF.OperandValToReplace =
U;
3834 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3835 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3836 if (!LU.WidestFixupType ||
3839 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3840 InsertSupplementalFormula(US, LU, LUIdx);
3841 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3857 unsigned Depth = 0) {
3864 for (
const SCEV *S :
Add->operands()) {
3871 const SCEV *Start, *Step;
3876 if (Start->isZero())
3885 Remainder =
nullptr;
3887 if (Remainder != Start) {
3909 LSRUse &LU,
const SCEV *S,
const Loop *L,
3911 if (LU.Kind != LSRUse::Address ||
3912 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3918 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3927void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3928 const Formula &
Base,
3929 unsigned Depth,
size_t Idx,
3931 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3939 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3943 if (AddOps.
size() == 1)
3957 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3962 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3966 if (InnerAddOps.size() == 1 &&
3968 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3971 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
3976 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3985 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3988 F.ScaledReg =
nullptr;
3991 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3992 }
else if (IsScaledReg)
3993 F.ScaledReg = InnerSum;
3995 F.BaseRegs[Idx] = InnerSum;
4003 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4006 F.BaseRegs.push_back(*J);
4011 if (InsertFormula(LU, LUIdx,
F))
4018 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4024void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4026 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4031 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4032 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4034 if (
Base.Scale == 1)
4035 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4041void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4044 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4045 (
Base.UnfoldedOffset.isNonZero()) <=
4053 Formula NewBase =
Base;
4054 NewBase.BaseRegs.clear();
4055 Type *CombinedIntegerType =
nullptr;
4056 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4059 if (!CombinedIntegerType)
4061 Ops.push_back(BaseReg);
4064 NewBase.BaseRegs.push_back(BaseReg);
4068 if (
Ops.size() == 0)
4073 auto GenerateFormula = [&](
const SCEV *Sum) {
4074 Formula
F = NewBase;
4082 F.BaseRegs.push_back(Sum);
4084 (void)InsertFormula(LU, LUIdx,
F);
4088 if (
Ops.size() > 1) {
4095 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4096 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4098 NewBase.UnfoldedOffset.getFixedValue(),
true));
4099 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4105void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4106 const Formula &
Base,
size_t Idx,
4108 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4110 if (
G->isZero() || !GV)
4114 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4119 F.BaseRegs[Idx] =
G;
4120 (void)InsertFormula(LU, LUIdx,
F);
4124void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4127 if (
Base.BaseGV)
return;
4129 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4130 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4131 if (
Base.Scale == 1)
4132 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4137void LSRInstance::GenerateConstantOffsetsImpl(
4138 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4139 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4141 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4143 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4145 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4147 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4149 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4155 F.ScaledReg =
nullptr;
4157 F.deleteBaseReg(
F.BaseRegs[Idx]);
4159 }
else if (IsScaledReg)
4162 F.BaseRegs[Idx] = NewG;
4164 (void)InsertFormula(LU, LUIdx,
F);
4168 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4179 const APInt *StepInt;
4184 for (Immediate
Offset : Worklist) {
4186 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4192 for (Immediate
Offset : Worklist)
4196 if (
G->isZero() ||
Imm.isZero() ||
4197 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4200 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4201 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4206 F.BaseRegs[Idx] =
G;
4211 (void)InsertFormula(LU, LUIdx,
F);
4215void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4221 if (LU.MaxOffset != LU.MinOffset)
4224 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4225 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4226 if (
Base.Scale == 1)
4227 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4233void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4235 if (LU.Kind != LSRUse::ICmpZero)
return;
4243 if (LU.MinOffset != LU.MaxOffset)
return;
4246 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4248 for (
const SCEV *BaseReg :
Base.BaseRegs)
4249 if (
BaseReg->getType()->isPointerTy())
4251 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4254 for (int64_t Factor : Factors) {
4259 if (
Base.BaseOffset.isMin() && Factor == -1)
4262 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4264 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4265 assert(Factor != 0 &&
"Zero factor not expected!");
4266 if (NewBaseOffset.getFixedValue() / Factor !=
4267 Base.BaseOffset.getFixedValue())
4275 Immediate
Offset = LU.MinOffset;
4276 if (
Offset.isMin() && Factor == -1)
4279 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4287 F.BaseOffset = NewBaseOffset;
4294 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4296 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4299 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4313 if (
F.UnfoldedOffset.isNonZero()) {
4314 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4316 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4317 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4318 Base.UnfoldedOffset.getFixedValue())
4322 IntTy,
F.UnfoldedOffset.getFixedValue()))
4327 (void)InsertFormula(LU, LUIdx,
F);
4334void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4341 if (
Base.Scale != 0 && !
Base.unscale())
4344 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4347 for (int64_t Factor : Factors) {
4348 Base.Scale = Factor;
4349 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4351 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4355 if (LU.Kind == LSRUse::Basic &&
4356 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4357 LU.AccessTy,
Base) &&
4358 LU.AllFixupsOutsideLoop)
4359 LU.Kind = LSRUse::Special;
4365 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4366 Base.BaseOffset.isZero() && !
Base.BaseGV)
4369 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4371 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4372 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4377 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4378 if (!Quotient->isZero()) {
4381 F.ScaledReg = Quotient;
4382 F.deleteBaseReg(
F.BaseRegs[i]);
4386 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4387 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4391 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4393 (void)InsertFormula(LU, LUIdx,
F);
4409 const SCEV *Result =
nullptr;
4410 for (
auto &L :
Loops) {
4414 if (!New || (Result && New != Result))
4419 assert(Result &&
"failed to create expression");
4424void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4426 if (
Base.BaseGV)
return;
4436 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4439 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4443 for (
auto &LF : LU.Fixups)
4444 Loops.push_back(LF.PostIncLoops);
4446 for (
Type *SrcTy : Types) {
4455 const SCEV *NewScaledReg =
4457 if (!NewScaledReg || NewScaledReg->
isZero())
4459 F.ScaledReg = NewScaledReg;
4461 bool HasZeroBaseReg =
false;
4462 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4463 const SCEV *NewBaseReg =
4465 if (!NewBaseReg || NewBaseReg->
isZero()) {
4466 HasZeroBaseReg =
true;
4476 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4480 (void)InsertFormula(LU, LUIdx,
F);
4493 const SCEV *OrigReg;
4495 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4496 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4498 void print(raw_ostream &OS)
const;
4504#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4505void WorkItem::print(raw_ostream &OS)
const {
4506 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4507 <<
" , add offset " <<
Imm;
4517void LSRInstance::GenerateCrossUseConstantOffsets() {
4519 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4521 DenseMap<const SCEV *, ImmMapTy>
Map;
4522 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4524 for (
const SCEV *Use : RegUses) {
4527 auto Pair =
Map.try_emplace(
Reg);
4530 Pair.first->second.insert(std::make_pair(Imm, Use));
4531 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4538 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4540 for (
const SCEV *
Reg : Sequence) {
4541 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4544 if (Imms.size() == 1)
4548 for (
const auto &Entry
4550 <<
' ' <<
Entry.first;
4554 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4556 const SCEV *OrigReg = J->second;
4558 Immediate JImm = J->first;
4559 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4562 UsedByIndicesMap[
Reg].
count() == 1) {
4570 Immediate
First = Imms.begin()->first;
4571 Immediate
Last = std::prev(Imms.end())->first;
4572 if (!
First.isCompatibleImmediate(
Last)) {
4579 bool Scalable =
First.isScalable() ||
Last.isScalable();
4580 int64_t FI =
First.getKnownMinValue();
4581 int64_t LI =
Last.getKnownMinValue();
4584 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4587 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4588 ImmMapTy::const_iterator OtherImms[] = {
4589 Imms.begin(), std::prev(Imms.end()),
4590 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4591 for (
const auto &M : OtherImms) {
4592 if (M == J || M == JE)
continue;
4593 if (!JImm.isCompatibleImmediate(
M->first))
4597 Immediate
Imm = JImm.subUnsigned(
M->first);
4598 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4600 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4601 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4608 UsedByIndicesMap.
clear();
4609 UniqueItems.
clear();
4612 for (
const WorkItem &WI : WorkItems) {
4613 size_t LUIdx = WI.LUIdx;
4614 LSRUse &LU =
Uses[LUIdx];
4615 Immediate
Imm = WI.Imm;
4616 const SCEV *OrigReg = WI.OrigReg;
4619 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4623 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4624 Formula
F = LU.Formulae[
L];
4631 if (
F.ScaledReg == OrigReg) {
4632 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4634 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4636 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4637 if (
F.referencesReg(S))
4640 NewF.BaseOffset =
Offset;
4641 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4644 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4653 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4655 if (
C->getValue()->isNegative() !=
4656 (NewF.BaseOffset.isLessThanZero()) &&
4657 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4658 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4663 NewF.canonicalize(*this->L);
4664 (void)InsertFormula(LU, LUIdx, NewF);
4667 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4669 if (BaseReg != OrigReg)
4672 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4673 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4674 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4676 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4678 LU.Kind, LU.AccessTy, NewF)) {
4682 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4686 NewF.UnfoldedOffset = NewUnfoldedOffset;
4688 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4693 for (
const SCEV *NewReg : NewF.BaseRegs)
4695 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4697 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4699 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4700 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4703 NewF.BaseOffset.getFixedValue()))
4708 NewF.canonicalize(*this->L);
4709 (void)InsertFormula(LU, LUIdx, NewF);
4720LSRInstance::GenerateAllReuseFormulae() {
4723 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4724 LSRUse &LU =
Uses[LUIdx];
4725 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4726 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4727 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4728 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4730 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4731 LSRUse &LU =
Uses[LUIdx];
4732 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4733 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4734 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4735 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4736 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4737 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4738 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4739 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4741 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4742 LSRUse &LU =
Uses[LUIdx];
4743 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4744 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4747 GenerateCrossUseConstantOffsets();
4750 "After generating reuse formulae:\n";
4751 print_uses(
dbgs()));
4756void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4757 DenseSet<const SCEV *> VisitedRegs;
4758 SmallPtrSet<const SCEV *, 16> Regs;
4759 SmallPtrSet<const SCEV *, 16> LoserRegs;
4761 bool ChangedFormulae =
false;
4766 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4768 BestFormulaeTy BestFormulae;
4770 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4771 LSRUse &LU =
Uses[LUIdx];
4776 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4777 FIdx != NumForms; ++FIdx) {
4778 Formula &
F = LU.Formulae[FIdx];
4789 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4791 if (CostF.isLoser()) {
4803 for (
const SCEV *
Reg :
F.BaseRegs) {
4804 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4808 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4809 Key.push_back(
F.ScaledReg);
4814 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4815 BestFormulae.insert(std::make_pair(
Key, FIdx));
4819 Formula &Best = LU.Formulae[
P.first->second];
4821 Cost CostBest(L, SE,
TTI, AMK);
4823 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4824 HardwareLoopProfitable);
4825 if (CostF.isLess(CostBest))
4829 " in favor of formula ";
4830 Best.print(
dbgs());
dbgs() <<
'\n');
4833 ChangedFormulae =
true;
4835 LU.DeleteFormula(
F);
4843 LU.RecomputeRegs(LUIdx, RegUses);
4846 BestFormulae.clear();
4851 "After filtering out undesirable candidates:\n";
4859size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4861 for (
const LSRUse &LU :
Uses) {
4862 size_t FSize = LU.Formulae.size();
4877void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4881 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4882 "which use a superset of registers used by other "
4885 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4886 LSRUse &LU =
Uses[LUIdx];
4888 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4889 Formula &
F = LU.Formulae[i];
4890 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4896 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4902 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4903 (uint64_t)
C->getValue()->getSExtValue());
4904 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4905 (
I -
F.BaseRegs.begin()));
4906 if (LU.HasFormulaWithSameRegs(NewF)) {
4909 LU.DeleteFormula(
F);
4920 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4921 (
I -
F.BaseRegs.begin()));
4922 if (LU.HasFormulaWithSameRegs(NewF)) {
4925 LU.DeleteFormula(
F);
4936 LU.RecomputeRegs(LUIdx, RegUses);
4945void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4950 dbgs() <<
"The search space is too complex.\n"
4951 "Narrowing the search space by assuming that uses separated "
4952 "by a constant offset will use the same registers.\n");
4956 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4957 LSRUse &LU =
Uses[LUIdx];
4958 for (
const Formula &
F : LU.Formulae) {
4959 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4962 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4966 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4967 LU.Kind, LU.AccessTy))
4972 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4973 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4976 for (LSRFixup &
Fixup : LU.Fixups) {
4977 Fixup.Offset +=
F.BaseOffset;
4978 LUThatHas->pushFixup(
Fixup);
4984 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4985 Formula &
F = LUThatHas->Formulae[i];
4986 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4987 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4989 LUThatHas->DeleteFormula(
F);
4997 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
5000 DeleteUse(LU, LUIdx);
5013void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5017 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5018 "undesirable dedicated registers.\n");
5020 FilterOutUndesirableDedicatedRegisters();
5035void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5040 dbgs() <<
"The search space is too complex.\n"
5041 "Narrowing the search space by choosing the best Formula "
5042 "from the Formulae with the same Scale and ScaledReg.\n");
5045 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5047 BestFormulaeTy BestFormulae;
5049 bool ChangedFormulae =
false;
5051 DenseSet<const SCEV *> VisitedRegs;
5052 SmallPtrSet<const SCEV *, 16> Regs;
5054 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5055 LSRUse &LU =
Uses[LUIdx];
5060 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5065 size_t FARegNum = 0;
5066 for (
const SCEV *
Reg : FA.BaseRegs) {
5067 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5068 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5070 size_t FBRegNum = 0;
5071 for (
const SCEV *
Reg : FB.BaseRegs) {
5072 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5073 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5075 if (FARegNum != FBRegNum)
5076 return FARegNum < FBRegNum;
5083 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5085 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5086 return CostFA.isLess(CostFB);
5090 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5092 Formula &
F = LU.Formulae[FIdx];
5095 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5099 Formula &Best = LU.Formulae[
P.first->second];
5100 if (IsBetterThan(
F, Best))
5104 " in favor of formula ";
5105 Best.print(
dbgs());
dbgs() <<
'\n');
5107 ChangedFormulae =
true;
5109 LU.DeleteFormula(
F);
5115 LU.RecomputeRegs(LUIdx, RegUses);
5118 BestFormulae.clear();
5123 "After filtering out undesirable candidates:\n";
5130void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5137 "Narrowing the search space by choosing the lowest "
5138 "register Formula for PostInc Uses.\n");
5140 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5141 LSRUse &LU =
Uses[LUIdx];
5143 if (LU.Kind != LSRUse::Address)
5149 size_t MinRegs = std::numeric_limits<size_t>::max();
5150 for (
const Formula &
F : LU.Formulae)
5151 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5154 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5156 Formula &
F = LU.Formulae[FIdx];
5157 if (
F.getNumRegs() > MinRegs) {
5160 LU.DeleteFormula(
F);
5167 LU.RecomputeRegs(LUIdx, RegUses);
5218void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5227 SmallPtrSet<const SCEV *, 4> UniqRegs;
5231 DenseMap <const SCEV *, float> RegNumMap;
5232 for (
const SCEV *
Reg : RegUses) {
5236 for (
const LSRUse &LU :
Uses) {
5237 if (!LU.Regs.count(
Reg))
5239 float P = LU.getNotSelectedProbability(
Reg);
5245 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5249 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5252 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5253 LSRUse &LU =
Uses[LUIdx];
5255 if (LU.Formulae.size() < 2)
5260 float FMinRegNum = LU.Formulae[0].getNumRegs();
5261 float FMinARegNum = LU.Formulae[0].getNumRegs();
5263 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5264 Formula &
F = LU.Formulae[i];
5267 for (
const SCEV *BaseReg :
F.BaseRegs) {
5268 if (UniqRegs.
count(BaseReg))
5270 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5273 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5275 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5276 if (!UniqRegs.
count(ScaledReg)) {
5278 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5281 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5284 if (FMinRegNum > FRegNum ||
5285 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5286 FMinRegNum = FRegNum;
5287 FMinARegNum = FARegNum;
5292 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5294 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5295 while (LU.Formulae.size() != 1) {
5298 LU.Formulae.pop_back();
5300 LU.RecomputeRegs(LUIdx, RegUses);
5301 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5302 Formula &
F = LU.Formulae[0];
5318 MemAccessTy AccessType) {
5328 return TTI.isLegalAddressingMode(
5329 AccessType.MemTy,
nullptr,
5330 Diff->getSExtValue(),
5331 true, 0, AccessType.AddrSpace) &&
5332 !
TTI.isLegalAddressingMode(
5333 AccessType.MemTy,
nullptr,
5334 -Diff->getSExtValue(),
5335 true, 0, AccessType.AddrSpace);
5341void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5344 SmallPtrSet<const SCEV *, 4> Taken;
5352 const SCEV *Best =
nullptr;
5353 unsigned BestNum = 0;
5354 for (
const SCEV *
Reg : RegUses) {
5359 BestNum = RegUses.getUsedByIndices(
Reg).count();
5361 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5362 if (
Count > BestNum) {
5370 if (
Count == BestNum) {
5371 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5372 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5374 Uses[LUIdx].AccessTy)) {
5381 assert(Best &&
"Failed to find best LSRUse candidate");
5383 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5384 <<
" will yield profitable reuse.\n");
5389 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5390 LSRUse &LU =
Uses[LUIdx];
5391 if (!LU.Regs.count(Best))
continue;
5394 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5395 Formula &
F = LU.Formulae[i];
5396 if (!
F.referencesReg(Best)) {
5398 LU.DeleteFormula(
F);
5402 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5408 LU.RecomputeRegs(LUIdx, RegUses);
5419void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5420 NarrowSearchSpaceByDetectingSupersets();
5421 NarrowSearchSpaceByCollapsingUnrolledCode();
5422 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5424 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5425 NarrowSearchSpaceByFilterPostInc();
5427 NarrowSearchSpaceByDeletingCostlyFormulas();
5429 NarrowSearchSpaceByPickingWinnerRegs();
5433void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5435 SmallVectorImpl<const Formula *> &Workspace,
5436 const Cost &CurCost,
5437 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5438 DenseSet<const SCEV *> &VisitedRegs)
const {
5449 const LSRUse &LU =
Uses[Workspace.
size()];
5455 SmallSetVector<const SCEV *, 4> ReqRegs;
5456 for (
const SCEV *S : CurRegs)
5457 if (LU.Regs.count(S))
5460 SmallPtrSet<const SCEV *, 16> NewRegs;
5461 Cost NewCost(L, SE,
TTI, AMK);
5462 for (
const Formula &
F : LU.Formulae) {
5470 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5471 for (
const SCEV *
Reg : ReqRegs) {
5472 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5475 if (NumReqRegsToFind == 0)
5479 if (NumReqRegsToFind != 0) {
5490 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5491 if (NewCost.isLess(SolutionCost)) {
5493 if (Workspace.
size() !=
Uses.size()) {
5494 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5495 NewRegs, VisitedRegs);
5496 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5497 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5500 dbgs() <<
".\nRegs:\n";
5501 for (
const SCEV *S : NewRegs)
dbgs()
5502 <<
"- " << *S <<
"\n";
5505 SolutionCost = NewCost;
5506 Solution = Workspace;
5515void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5517 Cost SolutionCost(L, SE,
TTI, AMK);
5518 SolutionCost.Lose();
5519 Cost CurCost(L, SE,
TTI, AMK);
5520 SmallPtrSet<const SCEV *, 16> CurRegs;
5521 DenseSet<const SCEV *> VisitedRegs;
5525 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5526 CurRegs, VisitedRegs);
5527 if (Solution.
empty()) {
5534 "The chosen solution requires ";
5535 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5536 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5541 Solution[i]->print(
dbgs());
5547 const bool EnableDropUnprofitableSolution = [&] {
5559 if (BaselineCost.isLess(SolutionCost)) {
5560 if (!EnableDropUnprofitableSolution)
5562 dbgs() <<
"Baseline is more profitable than chosen solution, "
5563 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5566 "solution, dropping LSR solution.\n";);
5577 const SmallVectorImpl<Instruction *> &Inputs)
5581 bool AllDominate =
true;
5588 for (Instruction *Inst : Inputs) {
5589 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5590 AllDominate =
false;
5595 if (Tentative->
getParent() == Inst->getParent() &&
5596 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5606 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5607 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5611 if (!Rung)
return IP;
5612 Rung = Rung->getIDom();
5613 if (!Rung)
return IP;
5614 IDom = Rung->getBlock();
5617 const Loop *IDomLoop = LI.getLoopFor(IDom);
5618 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5619 if (IDomDepth <= IPLoopDepth &&
5620 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5637 SmallVector<Instruction *, 4> Inputs;
5640 if (LU.Kind == LSRUse::ICmpZero)
5641 if (Instruction *
I =
5644 if (LF.PostIncLoops.
count(L)) {
5645 if (LF.isUseFullyOutsideLoop(L))
5646 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5652 for (
const Loop *PIL : LF.PostIncLoops) {
5653 if (PIL == L)
continue;
5658 if (!ExitingBlocks.
empty()) {
5660 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5667 "Insertion point must be a normal instruction");
5677 while (IP->isEHPad()) ++IP;
5682 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5690Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5692 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5693 if (LU.RigidFormula)
5694 return LF.OperandValToReplace;
5698 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5703 Rewriter.setPostInc(LF.PostIncLoops);
5708 Type *Ty =
F.getType();
5722 for (
const SCEV *
Reg :
F.BaseRegs) {
5723 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5731 Value *ICmpScaledV =
nullptr;
5733 const SCEV *ScaledS =
F.ScaledReg;
5739 if (LU.Kind == LSRUse::ICmpZero) {
5749 "The only scale supported by ICmpZero uses is -1!");
5750 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5758 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5768 Ops.push_back(ScaledS);
5794 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5795 "Expanding mismatched offsets\n");
5797 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5798 if (
Offset.isNonZero()) {
5799 if (LU.Kind == LSRUse::ICmpZero) {
5806 IntTy, -(uint64_t)
Offset.getFixedValue(),
true);
5815 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5820 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5821 if (UnfoldedOffset.isNonZero()) {
5823 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5827 const SCEV *FullS =
Ops.empty() ?
5838 if (LU.Kind == LSRUse::ICmpZero) {
5842 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5843 "a scale at the same time!");
5844 if (
F.Scale == -1) {
5845 if (ICmpScaledV->
getType() != OpTy) {
5855 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5856 "ICmp does not support folding a global value and "
5857 "a scale at the same time!");
5861 -(uint64_t)
Offset.getFixedValue(),
5863 if (
C->getType() != OpTy) {
5867 assert(
C &&
"Cast of ConstantInt should have folded");
5880void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
5881 const LSRFixup &LF,
const Formula &
F,
5882 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5883 DenseMap<BasicBlock *, Value *>
Inserted;
5887 bool needUpdateFixups =
false;
5898 Loop *PNLoop = LI.getLoopFor(Parent);
5899 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5905 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5906 .setMergeIdenticalEdges()
5907 .setKeepOneInputPHIs());
5910 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5921 if (
L->contains(BB) && !
L->contains(PN))
5929 needUpdateFixups =
true;
5934 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5947 LF.OperandValToReplace->
getType(),
"tmp",
5954 if (
L->contains(
I) && !
L->contains(BB))
5955 InsertedNonLCSSAInsts.insert(
I);
5958 Pair.first->second = FullV;
5965 if (needUpdateFixups) {
5966 for (LSRUse &LU :
Uses)
5967 for (LSRFixup &
Fixup : LU.Fixups)
5971 if (
Fixup.UserInst == PN) {
5974 bool foundInOriginalPHI =
false;
5976 if (val ==
Fixup.OperandValToReplace) {
5977 foundInOriginalPHI =
true;
5982 if (foundInOriginalPHI)
5993 if (val ==
Fixup.OperandValToReplace)
5994 Fixup.UserInst = NewPN;
6004void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6006 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6010 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6016 if (FullV->
getType() != OpTy) {
6028 if (LU.Kind == LSRUse::ICmpZero)
6044 const LSRFixup &
Fixup,
const LSRUse &LU,
6048 if (LU.Kind != LSRUse::Address)
6049 return IVIncInsertPos;
6053 Type *Ty =
I->getType();
6056 return IVIncInsertPos;
6063 return IVIncInsertPos;
6070void LSRInstance::ImplementSolution(
6071 const SmallVectorImpl<const Formula *> &Solution) {
6077 for (
const IVChain &Chain : IVChainVec) {
6083 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6084 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6087 Rewriter.setIVIncInsertPos(L, InsertPos);
6088 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6092 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6095 for (
const IVChain &Chain : IVChainVec) {
6096 GenerateIVChain(Chain, DeadInsts);
6100 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6118 for (PHINode &PN :
L->getHeader()->phis()) {
6119 BinaryOperator *BO =
nullptr;
6125 case Instruction::Sub:
6130 case Instruction::Add:
6147 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6156LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6157 DominatorTree &DT, LoopInfo &LI,
6158 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6159 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6160 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6163 :
TTI.getPreferredAddressingMode(
L, &SE)),
6166 if (!
L->isLoopSimplifyForm())
6174 unsigned NumUsers = 0;
6178 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6186 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6196 L->getHeader()->printAsOperand(
dbgs(),
false);
6202 HardwareLoopProfitable =
6203 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6207#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6210 Rewriter.disableCanonicalMode();
6211 Rewriter.enableLSRMode();
6215 OptimizeLoopTermCond();
6218 if (IU.empty())
return;
6221 if (!
L->isInnermost()) {
6234 CollectInterestingTypesAndFactors();
6235 CollectFixupsAndInitialFormulae();
6236 CollectLoopInvariantFixupsAndFormulae();
6242 print_uses(
dbgs()));
6244 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6248 GenerateAllReuseFormulae();
6250 FilterOutUndesirableDedicatedRegisters();
6251 NarrowSearchSpaceUsingHeuristics();
6261 if (Solution.
empty())
6266 for (
const LSRUse &LU :
Uses) {
6267 for (
const Formula &
F : LU.Formulae)
6269 F) &&
"Illegal formula generated!");
6274 ImplementSolution(Solution);
6277#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6278void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6279 if (Factors.empty() &&
Types.empty())
return;
6281 OS <<
"LSR has identified the following interesting factors and types: ";
6284 for (int64_t Factor : Factors)
6285 OS <<
LS <<
'*' << Factor;
6287 for (
Type *Ty : Types)
6288 OS <<
LS <<
'(' << *Ty <<
')';
6292void LSRInstance::print_fixups(raw_ostream &OS)
const {
6293 OS <<
"LSR is examining the following fixup sites:\n";
6294 for (
const LSRUse &LU :
Uses)
6295 for (
const LSRFixup &LF : LU.Fixups) {
6302void LSRInstance::print_uses(raw_ostream &OS)
const {
6303 OS <<
"LSR is examining the following uses:\n";
6304 for (
const LSRUse &LU :
Uses) {
6308 for (
const Formula &
F : LU.Formulae) {
6316void LSRInstance::print(raw_ostream &OS)
const {
6317 print_factors_and_types(OS);
6329class LoopStrengthReduce :
public LoopPass {
6333 LoopStrengthReduce();
6336 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6337 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6342LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6346void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6373ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6374 llvm::DIExpression::expr_op_iterator Begin =
6375 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6376 llvm::DIExpression::expr_op_iterator End =
6377 llvm::DIExpression::expr_op_iterator(Expr.
end());
6378 return {Begin, End};
6381struct SCEVDbgValueBuilder {
6382 SCEVDbgValueBuilder() =
default;
6383 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6385 void clone(
const SCEVDbgValueBuilder &
Base) {
6386 LocationOps =
Base.LocationOps;
6391 LocationOps.
clear();
6398 SmallVector<Value *, 2> LocationOps;
6401 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6408 unsigned ArgIndex = 0;
6409 if (It != LocationOps.
end()) {
6410 ArgIndex = std::distance(LocationOps.
begin(), It);
6412 ArgIndex = LocationOps.
size();
6418 void pushValue(
const SCEVUnknown *U) {
6423 bool pushConst(
const SCEVConstant *
C) {
6424 if (
C->getAPInt().getSignificantBits() > 64)
6426 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6427 Expr.
push_back(
C->getAPInt().getSExtValue());
6434 return ToDwarfOpIter(Expr);
6439 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6442 "Expected arithmetic SCEV type");
6444 unsigned EmitOperator = 0;
6445 for (
const auto &
Op : CommExpr->
operands()) {
6448 if (EmitOperator >= 1)
6449 pushOperator(DwarfOp);
6456 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6457 const llvm::SCEV *Inner =
C->getOperand(0);
6458 const llvm::Type *
Type =
C->getType();
6459 uint64_t ToWidth =
Type->getIntegerBitWidth();
6460 bool Success = pushSCEV(Inner);
6462 IsSigned ? llvm::dwarf::DW_ATE_signed
6463 : llvm::dwarf::DW_ATE_unsigned};
6464 for (
const auto &
Op : CastOps)
6470 bool pushSCEV(
const llvm::SCEV *S) {
6473 Success &= pushConst(StartInt);
6478 pushLocation(
U->getValue());
6481 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6484 Success &= pushSCEV(UDiv->getLHS());
6485 Success &= pushSCEV(UDiv->getRHS());
6486 pushOperator(llvm::dwarf::DW_OP_div);
6492 "Unexpected cast type in SCEV.");
6496 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6511 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6513 if (
C->getAPInt().getSignificantBits() > 64)
6515 int64_t
I =
C->getAPInt().getSExtValue();
6517 case llvm::dwarf::DW_OP_plus:
6518 case llvm::dwarf::DW_OP_minus:
6520 case llvm::dwarf::DW_OP_mul:
6521 case llvm::dwarf::DW_OP_div:
6534 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6540 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6541 if (!pushSCEV(Stride))
6543 pushOperator(llvm::dwarf::DW_OP_mul);
6545 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6546 if (!pushSCEV(Start))
6548 pushOperator(llvm::dwarf::DW_OP_plus);
6554 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6555 pushLocation(OffsetValue);
6558 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6559 << std::to_string(
Offset) <<
"\n");
6565 bool createIterCountExpr(
const SCEV *S,
6566 const SCEVDbgValueBuilder &IterationCount,
6567 ScalarEvolution &SE) {
6576 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6580 if (!Rec->isAffine())
6588 clone(IterationCount);
6589 if (!SCEVToValueExpr(*Rec, SE))
6600 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6601 ScalarEvolution &SE) {
6607 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6608 if (!pushSCEV(Start))
6610 pushOperator(llvm::dwarf::DW_OP_minus);
6612 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6613 if (!pushSCEV(Stride))
6615 pushOperator(llvm::dwarf::DW_OP_div);
6623 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6624 SmallVectorImpl<Value *> &DestLocations) {
6626 "Expected the locations vector to contain the IV");
6631 "Expected the location ops to contain the IV.");
6635 for (
const auto &
Op : LocationOps) {
6636 auto It =
find(DestLocations,
Op);
6637 if (It != DestLocations.
end()) {
6639 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6647 for (
const auto &
Op : expr_ops()) {
6649 Op.appendToVector(DestExpr);
6656 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6664struct DVIRecoveryRec {
6665 DVIRecoveryRec(DbgVariableRecord *DVR)
6666 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6668 DbgVariableRecord *DbgRef;
6670 bool HadLocationArgList;
6676 for (
auto &RE : RecoveryExprs)
6678 RecoveryExprs.clear();
6681 ~DVIRecoveryRec() { clear(); }
6689 auto expr_ops = ToDwarfOpIter(Expr);
6691 for (
auto Op : expr_ops)
6700template <
typename T>
6704 "contain any DW_OP_llvm_arg operands.");
6711template <
typename T>
6716 "Expected expression that references DIArglist locations using "
6717 "DW_OP_llvm_arg operands.");
6719 for (
Value *V : Locations)
6736 if (NumLLVMArgs == 0) {
6743 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6773 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6774 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6775 assert(DVIRec.Expr &&
"Expected an expression");
6780 if (!DVIRec.HadLocationArgList) {
6781 assert(DVIRec.LocationOps.size() == 1 &&
6782 "Unexpected number of location ops.");
6786 Value *CachedValue =
6791 for (
WeakVH VH : DVIRec.LocationOps) {
6799 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6804 const SCEV *SCEVInductionVar,
6805 SCEVDbgValueBuilder IterCountExpr) {
6819 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6821 NewLocationOps.
push_back(LSRInductionVar);
6823 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6824 WeakVH VH = DVIRec.LocationOps[i];
6830 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6832 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6840 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6841 <<
" refers to a location that is now undef or erased. "
6842 "Salvage abandoned.\n");
6846 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6847 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6849 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6850 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6854 if (std::optional<APInt>
Offset =
6856 if (
Offset->getSignificantBits() <= 64)
6857 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6860 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6869 assert(DVIRec.RecoveryExprs.size() == 1 &&
6870 "Expected only a single recovery expression for an empty "
6872 assert(DVIRec.RecoveryExprs[0] &&
6873 "Expected a SCEVDbgSalvageBuilder for location 0");
6874 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6875 B->appendToVectors(
NewExpr, NewLocationOps);
6877 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
6885 SCEVDbgValueBuilder *DbgBuilder =
6886 DVIRec.RecoveryExprs[LocationArgIndex].get();
6892 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6893 "Expected a positive index for the location-op position.");
6894 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6898 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6902 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6910 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6911 if (DVIToUpdate.empty())
6915 assert(SCEVInductionVar &&
6916 "Anticipated a SCEV for the post-LSR induction variable");
6920 if (!IVAddRec->isAffine())
6928 SCEVDbgValueBuilder IterCountExpr;
6929 IterCountExpr.pushLocation(LSRInductionVar);
6930 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6933 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6936 for (
auto &DVIRec : DVIToUpdate) {
6937 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6948 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6949 for (
const auto &
B : L->getBlocks()) {
6950 for (
auto &
I : *
B) {
6952 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6957 if (DbgVal.isKillLocation())
6962 const auto &HasTranslatableLocationOps =
6964 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6978 if (!HasTranslatableLocationOps(DbgVal))
6981 std::unique_ptr<DVIRecoveryRec> NewRec =
6982 std::make_unique<DVIRecoveryRec>(&DbgVal);
6986 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6987 for (
const auto LocOp : DbgVal.location_ops()) {
6988 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6989 NewRec->LocationOps.push_back(LocOp);
6990 NewRec->HadLocationArgList = DbgVal.hasArgList();
6992 SalvageableDVISCEVs.push_back(std::move(NewRec));
7002 const LSRInstance &LSR) {
7004 auto IsSuitableIV = [&](
PHINode *
P) {
7015 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7022 if (IsSuitableIV(
P))
7026 for (
PHINode &
P : L.getHeader()->phis()) {
7027 if (IsSuitableIV(&
P))
7045 std::unique_ptr<MemorySSAUpdater> MSSAU;
7047 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7050 const LSRInstance &Reducer =
7051 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7052 Changed |= Reducer.getChanged();
7059#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7062 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7076 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7090 if (SalvageableDVIRecords.
empty())
7096 for (
const auto &L : LI) {
7100 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7101 "could not be identified.\n");
7105 for (
auto &Rec : SalvageableDVIRecords)
7107 SalvageableDVIRecords.
clear();
7111bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7115 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7116 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7117 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7118 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7119 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7120 *
L->getHeader()->getParent());
7121 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7122 *
L->getHeader()->getParent());
7123 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7124 *
L->getHeader()->getParent());
7125 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7128 MSSA = &MSSAAnalysis->getMSSA();
7145char LoopStrengthReduce::ID = 0;
7148 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.