84#include "llvm/Config/llvm-config.h"
133#define DEBUG_TYPE "loop-reduce"
150 cl::desc(
"Enable LSR phi elimination"));
155 cl::desc(
"Add instruction count to a LSR cost model"));
160 cl::desc(
"Narrow LSR complex solution using"
161 " expectation of registers number"));
167 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
168 " with the same ScaledReg and Scale"));
172 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Don't prefer any addressing mode"),
178 "Prefer pre-indexed addressing mode"),
181 "Prefer post-indexed addressing mode")));
185 cl::init(std::numeric_limits<uint16_t>::max()),
186 cl::desc(
"LSR search space complexity limit"));
190 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
194 cl::desc(
"Attempt to replace primary IV with other IV."));
198 cl::desc(
"Attempt to drop solution if it is less profitable"));
202 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
206 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
209 "Number of terminating condition fold recognized and performed");
215 cl::desc(
"Stress test LSR IV chains"));
224 static const unsigned UnknownAddressSpace =
225 std::numeric_limits<unsigned>::max();
227 Type *MemTy =
nullptr;
228 unsigned AddrSpace = UnknownAddressSpace;
230 MemAccessTy() =
default;
231 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
234 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
240 unsigned AS = UnknownAddressSpace) {
261 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
262 : FixedOrScalableQuantity(MinVal, Scalable) {}
264 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
265 : FixedOrScalableQuantity(
V) {}
268 constexpr Immediate() =
delete;
270 static constexpr Immediate getFixed(ScalarTy MinVal) {
271 return {MinVal,
false};
273 static constexpr Immediate getScalable(ScalarTy MinVal) {
274 return {MinVal,
true};
276 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
277 return {MinVal, Scalable};
279 static constexpr Immediate getZero() {
return {0,
false}; }
280 static constexpr Immediate getFixedMin() {
281 return {std::numeric_limits<int64_t>::min(),
false};
283 static constexpr Immediate getFixedMax() {
284 return {std::numeric_limits<int64_t>::max(),
false};
286 static constexpr Immediate getScalableMin() {
287 return {std::numeric_limits<int64_t>::min(),
true};
289 static constexpr Immediate getScalableMax() {
290 return {std::numeric_limits<int64_t>::max(),
true};
293 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
295 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
297 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
298 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
301 constexpr bool isMin()
const {
302 return Quantity == std::numeric_limits<ScalarTy>::min();
305 constexpr bool isMax()
const {
306 return Quantity == std::numeric_limits<ScalarTy>::max();
310 constexpr Immediate addUnsigned(
const Immediate &RHS)
const {
311 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
313 return {
Value, Scalable ||
RHS.isScalable()};
316 constexpr Immediate subUnsigned(
const Immediate &RHS)
const {
317 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
319 return {
Value, Scalable ||
RHS.isScalable()};
323 constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const {
325 return {
Value, Scalable};
355struct KeyOrderTargetImmediate {
356 bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const {
357 if (
LHS.isScalable() && !
RHS.isScalable())
359 if (!
LHS.isScalable() &&
RHS.isScalable())
361 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
368struct KeyOrderSizeTAndImmediate {
369 bool operator()(
const std::pair<size_t, Immediate> &LHS,
370 const std::pair<size_t, Immediate> &RHS)
const {
371 size_t LSize =
LHS.first;
372 size_t RSize =
RHS.first;
374 return LSize < RSize;
375 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
380#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
382 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
396 RegUsesTy RegUsesMap;
400 void countRegister(
const SCEV *Reg,
size_t LUIdx);
401 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
402 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
404 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
422RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
423 std::pair<RegUsesTy::iterator, bool> Pair =
424 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
425 RegSortData &RSD = Pair.first->second;
428 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
429 RSD.UsedByIndices.set(LUIdx);
433RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
434 RegUsesTy::iterator It = RegUsesMap.find(Reg);
435 assert(It != RegUsesMap.end());
436 RegSortData &RSD = It->second;
437 assert(RSD.UsedByIndices.size() > LUIdx);
438 RSD.UsedByIndices.reset(LUIdx);
442RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
443 assert(LUIdx <= LastLUIdx);
447 for (
auto &Pair : RegUsesMap) {
449 if (LUIdx < UsedByIndices.
size())
450 UsedByIndices[LUIdx] =
451 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
452 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
457RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
458 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
459 if (
I == RegUsesMap.end())
463 if (i == -1)
return false;
464 if ((
size_t)i != LUIdx)
return true;
469 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
470 assert(
I != RegUsesMap.end() &&
"Unknown register!");
471 return I->second.UsedByIndices;
474void RegUseTracker::clear() {
488 Immediate BaseOffset = Immediate::getZero();
491 bool HasBaseReg =
false;
514 const SCEV *ScaledReg =
nullptr;
519 Immediate UnfoldedOffset = Immediate::getZero();
527 void canonicalize(
const Loop &L);
531 bool hasZeroEnd()
const;
533 size_t getNumRegs()
const;
536 void deleteBaseReg(
const SCEV *&S);
538 bool referencesReg(
const SCEV *S)
const;
539 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
540 const RegUseTracker &RegUses)
const;
561 for (
const SCEV *S :
Add->operands())
568 if (!AR->getStart()->isZero() && AR->isAffine()) {
571 AR->getStepRecurrence(SE),
587 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
589 for (
const SCEV *S : MyGood)
591 for (
const SCEV *S : MyBad)
610 BaseRegs.push_back(Sum);
616 BaseRegs.push_back(Sum);
624 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
631bool Formula::isCanonical(
const Loop &L)
const {
633 return BaseRegs.size() <= 1;
638 if (Scale == 1 && BaseRegs.empty())
658void Formula::canonicalize(
const Loop &L) {
662 if (BaseRegs.empty()) {
664 assert(ScaledReg &&
"Expected 1*reg => reg");
665 assert(Scale == 1 &&
"Expected 1*reg => reg");
666 BaseRegs.push_back(ScaledReg);
674 ScaledReg = BaseRegs.pop_back_val();
685 if (
I != BaseRegs.end())
695bool Formula::unscale() {
699 BaseRegs.push_back(ScaledReg);
704bool Formula::hasZeroEnd()
const {
705 if (UnfoldedOffset || BaseOffset)
707 if (BaseRegs.size() != 1 || ScaledReg)
714size_t Formula::getNumRegs()
const {
715 return !!ScaledReg + BaseRegs.size();
720Type *Formula::getType()
const {
721 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
722 ScaledReg ? ScaledReg->getType() :
723 BaseGV ? BaseGV->getType() :
728void Formula::deleteBaseReg(
const SCEV *&S) {
729 if (&S != &BaseRegs.back())
735bool Formula::referencesReg(
const SCEV *S)
const {
741bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
742 const RegUseTracker &RegUses)
const {
744 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
746 for (
const SCEV *BaseReg : BaseRegs)
747 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
752#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
757 BaseGV->printAsOperand(
OS,
false);
759 if (BaseOffset.isNonZero()) {
763 for (
const SCEV *BaseReg : BaseRegs) {
765 OS <<
"reg(" << *BaseReg <<
')';
767 if (HasBaseReg && BaseRegs.empty()) {
769 OS <<
"**error: HasBaseReg**";
770 }
else if (!HasBaseReg && !BaseRegs.empty()) {
772 OS <<
"**error: !HasBaseReg**";
776 OS << Scale <<
"*reg(";
783 if (UnfoldedOffset.isNonZero()) {
785 OS <<
"imm(" << UnfoldedOffset <<
')';
826 bool IgnoreSignificantBits =
false) {
837 if (
RA.isAllOnes()) {
851 const APInt &LA =
C->getAPInt();
860 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
862 IgnoreSignificantBits);
863 if (!Step)
return nullptr;
865 IgnoreSignificantBits);
866 if (!Start)
return nullptr;
879 for (
const SCEV *S :
Add->operands()) {
881 if (!
Op)
return nullptr;
897 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
912 IgnoreSignificantBits)) {
931 if (
C->getAPInt().getSignificantBits() <= 64) {
933 return Immediate::getFixed(
C->getValue()->getSExtValue());
938 if (Result.isNonZero())
941 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
944 if (Result.isNonZero())
949 }
else if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
951 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(M->getOperand(0)))
952 if (isa<SCEVVScale>(M->getOperand(1))) {
954 return Immediate::getScalable(
C->getValue()->getSExtValue());
958 return Immediate::getZero();
964 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
965 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
975 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
991 bool isAddress = isa<LoadInst>(Inst);
992 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
993 if (SI->getPointerOperand() == OperandVal)
998 switch (
II->getIntrinsicID()) {
999 case Intrinsic::memset:
1000 case Intrinsic::prefetch:
1001 case Intrinsic::masked_load:
1002 if (
II->getArgOperand(0) == OperandVal)
1005 case Intrinsic::masked_store:
1006 if (
II->getArgOperand(1) == OperandVal)
1009 case Intrinsic::memmove:
1010 case Intrinsic::memcpy:
1011 if (
II->getArgOperand(0) == OperandVal ||
1012 II->getArgOperand(1) == OperandVal)
1018 if (IntrInfo.
PtrVal == OperandVal)
1023 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1024 if (RMW->getPointerOperand() == OperandVal)
1027 if (CmpX->getPointerOperand() == OperandVal)
1036 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1040 AccessTy.MemTy = Ty;
1043 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1044 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1045 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1046 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1047 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1048 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1050 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1052 switch (
II->getIntrinsicID()) {
1053 case Intrinsic::prefetch:
1054 case Intrinsic::memset:
1055 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1056 AccessTy.MemTy = OperandVal->
getType();
1058 case Intrinsic::memmove:
1059 case Intrinsic::memcpy:
1061 AccessTy.MemTy = OperandVal->
getType();
1063 case Intrinsic::masked_load:
1064 AccessTy.AddrSpace =
1065 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1067 case Intrinsic::masked_store:
1068 AccessTy.AddrSpace =
1069 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1129 if (!Processed.
insert(S).second)
1133 for (
const SCEV *S :
Add->operands()) {
1149 Value *UVal = U->getValue();
1153 if (UI && UI->
getOpcode() == Instruction::Mul &&
1187 const LSRUse &LU,
const Formula &
F);
1191 const LSRUse &LU,
const Formula &
F,
1198 const Loop *
L =
nullptr;
1208 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1226 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1227 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1228 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1229 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1235 return C.NumRegs == ~0
u;
1238 void RateFormula(
const Formula &
F,
1248 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1250 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1263 Value *OperandValToReplace =
nullptr;
1273 Immediate
Offset = Immediate::getZero();
1275 LSRFixup() =
default;
1277 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1285struct UniquifierDenseMapInfo {
1288 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1294 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1330 MemAccessTy AccessTy;
1336 Immediate MinOffset = Immediate::getFixedMax();
1337 Immediate MaxOffset = Immediate::getFixedMin();
1341 bool AllFixupsOutsideLoop =
true;
1348 bool RigidFormula =
false;
1354 Type *WidestFixupType =
nullptr;
1364 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1366 LSRFixup &getNewFixup() {
1367 Fixups.push_back(LSRFixup());
1371 void pushFixup(LSRFixup &f) {
1373 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1374 MaxOffset =
f.Offset;
1375 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1376 MinOffset =
f.Offset;
1379 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1380 float getNotSelectedProbability(
const SCEV *Reg)
const;
1381 bool InsertFormula(
const Formula &
F,
const Loop &L);
1382 void DeleteFormula(Formula &
F);
1383 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1392 LSRUse::KindType Kind, MemAccessTy AccessTy,
1394 bool HasBaseReg, int64_t Scale,
1398 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1402 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1404 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1406 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1408 [&](
unsigned i,
const SCEV *Reg) {
1409 return i + getSetupCost(Reg, Depth - 1);
1411 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1418void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1424 if (AR->getLoop() != L) {
1431 if (!AR->getLoop()->contains(L)) {
1441 unsigned LoopCost = 1;
1448 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1449 if (Step->getAPInt() ==
F.BaseOffset.getFixedValue())
1452 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1453 if (isa<SCEVConstant>(LoopStep)) {
1454 const SCEV *LoopStart = AR->getStart();
1455 if (!isa<SCEVConstant>(LoopStart) &&
1461 C.AddRecCost += LoopCost;
1465 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1466 if (!Regs.
count(AR->getOperand(1))) {
1467 RateRegister(
F, AR->getOperand(1), Regs);
1479 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1481 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1488void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1491 if (LoserRegs && LoserRegs->
count(Reg)) {
1495 if (Regs.
insert(Reg).second) {
1496 RateRegister(
F, Reg, Regs);
1497 if (LoserRegs && isLoser())
1502void Cost::RateFormula(
const Formula &
F,
1509 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1511 unsigned PrevAddRecCost =
C.AddRecCost;
1512 unsigned PrevNumRegs =
C.NumRegs;
1513 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1514 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1515 if (VisitedRegs.
count(ScaledReg)) {
1519 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1523 for (
const SCEV *BaseReg :
F.BaseRegs) {
1524 if (VisitedRegs.
count(BaseReg)) {
1528 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1534 size_t NumBaseParts =
F.getNumRegs();
1535 if (NumBaseParts > 1)
1540 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1546 for (
const LSRFixup &
Fixup : LU.Fixups) {
1547 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1548 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1552 else if (
Offset.isNonZero())
1558 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1579 if (
C.NumRegs > TTIRegNum) {
1582 if (PrevNumRegs > TTIRegNum)
1583 C.Insns += (
C.NumRegs - PrevNumRegs);
1585 C.Insns += (
C.NumRegs - TTIRegNum);
1598 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1602 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1605 if (LU.Kind != LSRUse::ICmpZero)
1606 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1612 C.Insns = std::numeric_limits<unsigned>::max();
1613 C.NumRegs = std::numeric_limits<unsigned>::max();
1614 C.AddRecCost = std::numeric_limits<unsigned>::max();
1615 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1616 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1617 C.ImmCost = std::numeric_limits<unsigned>::max();
1618 C.SetupCost = std::numeric_limits<unsigned>::max();
1619 C.ScaleCost = std::numeric_limits<unsigned>::max();
1623bool Cost::isLess(
const Cost &
Other)
const {
1625 C.Insns !=
Other.C.Insns)
1626 return C.Insns <
Other.C.Insns;
1630#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1633 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1634 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1635 if (
C.AddRecCost != 0)
1636 OS <<
", with addrec cost " <<
C.AddRecCost;
1637 if (
C.NumIVMuls != 0)
1638 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1639 << (
C.NumIVMuls == 1 ?
"" :
"s");
1640 if (
C.NumBaseAdds != 0)
1641 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1642 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1643 if (
C.ScaleCost != 0)
1644 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1646 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1647 if (
C.SetupCost != 0)
1648 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1657bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1659 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1660 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1661 if (PN->getIncomingValue(i) == OperandValToReplace &&
1662 L->contains(PN->getIncomingBlock(i)))
1667 return !
L->contains(UserInst);
1670#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1674 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1676 Store->getOperand(0)->printAsOperand(
OS,
false);
1677 }
else if (UserInst->getType()->isVoidTy())
1678 OS << UserInst->getOpcodeName();
1680 UserInst->printAsOperand(
OS,
false);
1682 OS <<
", OperandValToReplace=";
1683 OperandValToReplace->printAsOperand(
OS,
false);
1685 for (
const Loop *PIL : PostIncLoops) {
1686 OS <<
", PostIncLoop=";
1687 PIL->getHeader()->printAsOperand(
OS,
false);
1701bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1703 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1706 return Uniquifier.
count(Key);
1710float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1712 for (
const Formula &
F : Formulae)
1713 if (
F.referencesReg(Reg))
1715 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1720bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1721 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1723 if (!Formulae.empty() && RigidFormula)
1727 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1731 if (!Uniquifier.
insert(Key).second)
1735 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1736 "Zero allocated in a scaled register!");
1738 for (
const SCEV *BaseReg :
F.BaseRegs)
1739 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1743 Formulae.push_back(
F);
1746 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1754void LSRUse::DeleteFormula(Formula &
F) {
1755 if (&
F != &Formulae.back())
1757 Formulae.pop_back();
1761void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1765 for (
const Formula &
F : Formulae) {
1766 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1767 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1771 for (
const SCEV *S : OldRegs)
1773 RegUses.dropRegister(S, LUIdx);
1776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1778 OS <<
"LSR Use: Kind=";
1780 case Basic:
OS <<
"Basic";
break;
1781 case Special:
OS <<
"Special";
break;
1782 case ICmpZero:
OS <<
"ICmpZero";
break;
1784 OS <<
"Address of ";
1785 if (AccessTy.MemTy->isPointerTy())
1788 OS << *AccessTy.MemTy;
1791 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1794 OS <<
", Offsets={";
1795 bool NeedComma =
false;
1796 for (
const LSRFixup &
Fixup : Fixups) {
1797 if (NeedComma)
OS <<
',';
1803 if (AllFixupsOutsideLoop)
1804 OS <<
", all-fixups-outside-loop";
1806 if (WidestFixupType)
1807 OS <<
", widest fixup type: " << *WidestFixupType;
1816 LSRUse::KindType Kind, MemAccessTy AccessTy,
1818 bool HasBaseReg, int64_t Scale,
1821 case LSRUse::Address: {
1822 int64_t FixedOffset =
1823 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1824 int64_t ScalableOffset =
1825 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1827 HasBaseReg, Scale, AccessTy.AddrSpace,
1828 Fixup, ScalableOffset);
1830 case LSRUse::ICmpZero:
1837 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1842 if (Scale != 0 && Scale != -1)
1847 if (BaseOffset.isNonZero()) {
1850 if (BaseOffset.isScalable())
1860 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1869 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1871 case LSRUse::Special:
1873 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1880 Immediate MinOffset, Immediate MaxOffset,
1881 LSRUse::KindType Kind, MemAccessTy AccessTy,
1883 bool HasBaseReg, int64_t Scale) {
1884 if (BaseOffset.isNonZero() &&
1885 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1886 BaseOffset.isScalable() != MaxOffset.isScalable()))
1889 int64_t
Base = BaseOffset.getKnownMinValue();
1890 int64_t Min = MinOffset.getKnownMinValue();
1891 int64_t Max = MaxOffset.getKnownMinValue();
1894 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1897 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1900 HasBaseReg, Scale) &&
1906 Immediate MinOffset, Immediate MaxOffset,
1907 LSRUse::KindType Kind, MemAccessTy AccessTy,
1908 const Formula &
F,
const Loop &L) {
1916 assert((
F.isCanonical(L) ||
F.Scale != 0));
1918 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1923 Immediate MaxOffset, LSRUse::KindType Kind,
1925 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1928 BaseOffset, HasBaseReg, Scale) ||
1933 BaseGV, BaseOffset,
true, 0));
1937 Immediate MaxOffset, LSRUse::KindType Kind,
1938 MemAccessTy AccessTy,
const Formula &
F) {
1939 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1940 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1952 const LSRUse &LU,
const Formula &
F) {
1955 for (
const LSRFixup &
Fixup : LU.Fixups)
1957 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1958 F.Scale,
Fixup.UserInst))
1964 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1969 const LSRUse &LU,
const Formula &
F,
1978 return F.Scale != 1;
1981 case LSRUse::Address: {
1983 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1984 if (
F.BaseOffset.isScalable()) {
1985 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1986 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1988 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1989 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1993 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1996 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1999 "Legal addressing mode has an illegal cost!");
2000 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
2002 case LSRUse::ICmpZero:
2004 case LSRUse::Special:
2014 LSRUse::KindType Kind, MemAccessTy AccessTy,
2018 if (BaseOffset.isZero() && !BaseGV)
2023 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2027 if (!HasBaseReg && Scale == 1) {
2037 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2047 Immediate MaxOffset, LSRUse::KindType Kind,
2048 MemAccessTy AccessTy,
const SCEV *S,
2051 if (S->
isZero())
return true;
2059 if (!S->
isZero())
return false;
2062 if (BaseOffset.isZero() && !BaseGV)
2065 if (BaseOffset.isScalable())
2070 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2073 BaseOffset, HasBaseReg, Scale);
2090 const SCEV *IncExpr;
2093 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2100 const SCEV *ExprBase =
nullptr;
2102 IVChain() =
default;
2103 IVChain(
const IVInc &Head,
const SCEV *
Base)
2104 : Incs(1, Head), ExprBase(
Base) {}
2111 return std::next(Incs.
begin());
2118 bool hasIncs()
const {
return Incs.
size() >= 2; }
2127 bool isProfitableIncrement(
const SCEV *OperExpr,
2128 const SCEV *IncExpr,
2153 bool Changed =
false;
2180 RegUseTracker RegUses;
2185 static const unsigned MaxChains = 8;
2196 void OptimizeShadowIV();
2199 void OptimizeLoopTermCond();
2203 void FinalizeChain(IVChain &Chain);
2204 void CollectChains();
2205 void GenerateIVChain(
const IVChain &Chain,
2208 void CollectInterestingTypesAndFactors();
2209 void CollectFixupsAndInitialFormulae();
2215 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2216 LSRUse::KindType Kind, MemAccessTy AccessTy);
2218 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2219 MemAccessTy AccessTy);
2221 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2223 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2225 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2226 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2227 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2228 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2230 void CollectLoopInvariantFixupsAndFormulae();
2232 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2233 unsigned Depth = 0);
2235 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2237 size_t Idx,
bool IsScaledReg =
false);
2238 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2239 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2240 const Formula &
Base,
size_t Idx,
2241 bool IsScaledReg =
false);
2242 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2243 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2244 const Formula &
Base,
2246 size_t Idx,
bool IsScaledReg =
false);
2247 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2248 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2250 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2251 void GenerateCrossUseConstantOffsets();
2252 void GenerateAllReuseFormulae();
2254 void FilterOutUndesirableDedicatedRegisters();
2256 size_t EstimateSearchSpaceComplexity()
const;
2257 void NarrowSearchSpaceByDetectingSupersets();
2258 void NarrowSearchSpaceByCollapsingUnrolledCode();
2259 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2260 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2261 void NarrowSearchSpaceByFilterPostInc();
2262 void NarrowSearchSpaceByDeletingCostlyFormulas();
2263 void NarrowSearchSpaceByPickingWinnerRegs();
2264 void NarrowSearchSpaceUsingHeuristics();
2269 const Cost &CurCost,
2279 const LSRUse &LU)
const;
2281 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2284 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2287 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2296 bool getChanged()
const {
return Changed; }
2298 return ScalarEvolutionIVs;
2312void LSRInstance::OptimizeShadowIV() {
2314 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2322 Type *DestTy =
nullptr;
2323 bool IsSigned =
false;
2337 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2339 DestTy = UCast->getDestTy();
2341 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2343 DestTy = SCast->getDestTy();
2345 if (!DestTy)
continue;
2365 if (Mantissa == -1)
continue;
2369 unsigned Entry, Latch;
2379 if (!
Init)
continue;
2380 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2381 (
double)
Init->getSExtValue() :
2382 (
double)
Init->getZExtValue());
2386 if (!Incr)
continue;
2387 if (Incr->
getOpcode() != Instruction::Add
2388 && Incr->
getOpcode() != Instruction::Sub)
2404 if (!
C->getValue().isStrictlyPositive())
2412 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2414 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2415 : Instruction::FSub,
2434 if (
U.getUser() ==
Cond) {
2502 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2507 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2508 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2515 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2516 Pred = ICmpInst::ICMP_SLE;
2518 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2519 Pred = ICmpInst::ICMP_SLT;
2521 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2522 Pred = ICmpInst::ICMP_ULT;
2531 if (
Max->getNumOperands() != 2)
2534 const SCEV *MaxLHS =
Max->getOperand(0);
2535 const SCEV *MaxRHS =
Max->getOperand(1);
2540 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2553 "Loop condition operand is an addrec in a different loop!");
2557 Value *NewRHS =
nullptr;
2558 if (ICmpInst::isTrueWhenEqual(Pred)) {
2561 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2562 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2563 NewRHS = BO->getOperand(0);
2565 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2566 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2567 NewRHS = BO->getOperand(0);
2574 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2575 NewRHS = SU->getValue();
2588 Cond->getOperand(0), NewRHS,
"scmp");
2592 Cond->replaceAllUsesWith(NewCond);
2595 Cond->eraseFromParent();
2597 if (
Cmp->use_empty())
2598 Cmp->eraseFromParent();
2604LSRInstance::OptimizeLoopTermCond() {
2621 L->getExitingBlocks(ExitingBlocks);
2629 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2635 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2645 if (!FindIVUserForCond(
Cond, CondUse))
2659 if (!DT.dominates(ExitingBlock, LatchBlock))
2664 if (LatchBlock != ExitingBlock)
2668 if (&*UI != CondUse &&
2669 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2672 const SCEV *
A = IU.getStride(*CondUse, L);
2673 const SCEV *
B = IU.getStride(*UI, L);
2674 if (!
A || !
B)
continue;
2687 if (
C->isOne() ||
C->isMinusOne())
2688 goto decline_post_inc;
2690 if (
C->getValue().getSignificantBits() >= 64 ||
2691 C->getValue().isMinSignedValue())
2692 goto decline_post_inc;
2696 TTI, UI->getUser(), UI->getOperandValToReplace());
2697 int64_t Scale =
C->getSExtValue();
2701 AccessTy.AddrSpace))
2702 goto decline_post_inc;
2707 AccessTy.AddrSpace))
2708 goto decline_post_inc;
2713 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2719 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2720 if (
Cond->hasOneUse()) {
2721 Cond->moveBefore(TermBr);
2725 Cond = cast<ICmpInst>(
Cond->clone());
2726 Cond->setName(
L->getHeader()->getName() +
".termcond");
2748 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2750 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2755bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2756 bool HasBaseReg, LSRUse::KindType Kind,
2757 MemAccessTy AccessTy) {
2758 Immediate NewMinOffset = LU.MinOffset;
2759 Immediate NewMaxOffset = LU.MaxOffset;
2760 MemAccessTy NewAccessTy = AccessTy;
2765 if (LU.Kind != Kind)
2771 if (Kind == LSRUse::Address) {
2772 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2773 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2774 AccessTy.AddrSpace);
2779 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2781 LU.MaxOffset - NewOffset, HasBaseReg))
2783 NewMinOffset = NewOffset;
2784 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2786 NewOffset - LU.MinOffset, HasBaseReg))
2788 NewMaxOffset = NewOffset;
2794 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2795 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2799 LU.MinOffset = NewMinOffset;
2800 LU.MaxOffset = NewMaxOffset;
2801 LU.AccessTy = NewAccessTy;
2808std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2809 LSRUse::KindType Kind,
2810 MemAccessTy AccessTy) {
2818 Offset = Immediate::getFixed(0);
2821 std::pair<UseMapTy::iterator, bool>
P =
2825 size_t LUIdx =
P.first->second;
2826 LSRUse &LU =
Uses[LUIdx];
2827 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2829 return std::make_pair(LUIdx,
Offset);
2833 size_t LUIdx =
Uses.size();
2834 P.first->second = LUIdx;
2835 Uses.push_back(LSRUse(Kind, AccessTy));
2836 LSRUse &LU =
Uses[LUIdx];
2840 return std::make_pair(LUIdx,
Offset);
2844void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2845 if (&LU != &
Uses.back())
2850 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2856LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2857 const LSRUse &OrigLU) {
2859 for (LSRUse &LU :
Uses) {
2865 if (&LU != &OrigLU &&
2866 LU.Kind != LSRUse::ICmpZero &&
2867 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2868 LU.WidestFixupType == OrigLU.WidestFixupType &&
2869 LU.HasFormulaWithSameRegs(OrigF)) {
2871 for (
const Formula &
F : LU.Formulae) {
2874 if (
F.BaseRegs == OrigF.BaseRegs &&
2875 F.ScaledReg == OrigF.ScaledReg &&
2876 F.BaseGV == OrigF.BaseGV &&
2877 F.Scale == OrigF.Scale &&
2878 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2879 if (
F.BaseOffset.isZero())
2894void LSRInstance::CollectInterestingTypesAndFactors() {
2900 const SCEV *Expr = IU.getExpr(U);
2918 }
while (!Worklist.
empty());
2923 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2925 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2926 const SCEV *OldStride = *
I;
2927 const SCEV *NewStride = *NewStrideIter;
2938 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2940 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2941 Factors.insert(Factor->getAPInt().getSExtValue());
2946 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2947 Factors.insert(Factor->getAPInt().getSExtValue());
2953 if (
Types.size() == 1)
2965 for(; OI != OE; ++OI) {
2966 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2971 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2983 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2984 return Trunc->getOperand(0);
3006 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3008 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3010 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3017 if (SubExpr->getSCEVType() ==
scAddExpr)
3020 if (SubExpr->getSCEVType() !=
scMulExpr)
3026 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3036bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3037 const SCEV *IncExpr,
3045 if (!isa<SCEVConstant>(IncExpr)) {
3047 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3072 if (!Chain.hasIncs())
3075 if (!
Users.empty()) {
3076 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3078 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3081 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3089 if (isa<PHINode>(Chain.tailUserInst())
3090 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3093 const SCEV *LastIncExpr =
nullptr;
3094 unsigned NumConstIncrements = 0;
3095 unsigned NumVarIncrements = 0;
3096 unsigned NumReusedIncrements = 0;
3101 for (
const IVInc &Inc : Chain) {
3104 if (Inc.IncExpr->isZero())
3109 if (isa<SCEVConstant>(Inc.IncExpr)) {
3110 ++NumConstIncrements;
3114 if (Inc.IncExpr == LastIncExpr)
3115 ++NumReusedIncrements;
3119 LastIncExpr = Inc.IncExpr;
3124 if (NumConstIncrements > 1)
3131 cost += NumVarIncrements;
3135 cost -= NumReusedIncrements;
3137 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3154 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3155 const SCEV *LastIncExpr =
nullptr;
3156 for (; ChainIdx < NChains; ++ChainIdx) {
3157 IVChain &Chain = IVChainVec[ChainIdx];
3171 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3177 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3180 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3181 LastIncExpr = IncExpr;
3187 if (ChainIdx == NChains) {
3188 if (isa<PHINode>(UserInst))
3194 LastIncExpr = OperExpr;
3198 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3201 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3203 ChainUsersVec.
resize(NChains);
3204 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3205 <<
") IV=" << *LastIncExpr <<
"\n");
3207 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3208 <<
") IV+" << *LastIncExpr <<
"\n");
3210 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3212 IVChain &Chain = IVChainVec[ChainIdx];
3216 if (!LastIncExpr->
isZero()) {
3217 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3233 IVChain::const_iterator IncIter = Chain.Incs.begin();
3234 IVChain::const_iterator IncEnd = Chain.Incs.end();
3235 for( ; IncIter != IncEnd; ++IncIter) {
3236 if (IncIter->UserInst == OtherUse)
3239 if (IncIter != IncEnd)
3243 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3244 && IU.isIVUserOrOperand(OtherUse)) {
3247 NearUsers.
insert(OtherUse);
3252 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3277void LSRInstance::CollectChains() {
3283 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3284 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3293 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3303 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3304 ChainIdx < NChains; ++ChainIdx) {
3305 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3311 while (IVOpIter != IVOpEnd) {
3312 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3313 if (UniqueOperands.
insert(IVOpInst).second)
3314 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3315 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3320 for (
PHINode &PN :
L->getHeader()->phis()) {
3325 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3327 ChainInstruction(&PN, IncV, ChainUsersVec);
3330 unsigned ChainIdx = 0;
3331 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3332 UsersIdx < NChains; ++UsersIdx) {
3334 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3337 if (ChainIdx != UsersIdx)
3338 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3339 FinalizeChain(IVChainVec[ChainIdx]);
3342 IVChainVec.resize(ChainIdx);
3345void LSRInstance::FinalizeChain(IVChain &Chain) {
3346 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3347 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3349 for (
const IVInc &Inc : Chain) {
3351 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3352 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3353 IVIncSet.insert(UseI);
3360 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3361 Immediate IncOffset = Immediate::getZero();
3368 auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3369 if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3370 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3372 auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3373 if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3375 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3391void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3395 const IVInc &Head = Chain.Incs[0];
3400 Value *IVSrc =
nullptr;
3401 while (IVOpIter != IVOpEnd) {
3412 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3413 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3416 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3418 if (IVOpIter == IVOpEnd) {
3420 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3423 assert(IVSrc &&
"Failed to find IV chain source");
3428 const SCEV *LeftOverExpr =
nullptr;
3433 for (
const IVInc &Inc : Chain) {
3435 if (isa<PHINode>(InsertPt))
3436 InsertPt =
L->getLoopLatch()->getTerminator();
3440 Value *IVOper = IVSrc;
3441 if (!Inc.IncExpr->isZero()) {
3446 LeftOverExpr = LeftOverExpr ?
3447 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3451 bool FoundBase =
false;
3452 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3455 if (!Remainder->
isZero()) {
3457 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3458 const SCEV *IVOperExpr =
3460 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3469 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3472 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3475 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3479 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3482 LeftOverExpr =
nullptr;
3485 Type *OperTy = Inc.IVOperand->getType();
3486 if (IVTy != OperTy) {
3488 "cannot extend a chained IV");
3490 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3492 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3493 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3498 if (isa<PHINode>(Chain.tailUserInst())) {
3499 for (
PHINode &Phi :
L->getHeader()->phis()) {
3503 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3506 Value *IVOper = IVSrc;
3508 if (IVTy != PostIncTy) {
3510 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3511 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3512 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3514 Phi.replaceUsesOfWith(PostIncV, IVOper);
3520void LSRInstance::CollectFixupsAndInitialFormulae() {
3522 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3534 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3535 if (IVIncSet.count(UseI)) {
3536 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3540 LSRUse::KindType
Kind = LSRUse::Basic;
3541 MemAccessTy AccessTy;
3543 Kind = LSRUse::Address;
3547 const SCEV *S = IU.getExpr(U);
3558 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3561 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3563 if (CI->isEquality()) {
3566 Value *
NV = CI->getOperand(1);
3567 if (NV ==
U.getOperandValToReplace()) {
3568 CI->setOperand(1, CI->getOperand(0));
3569 CI->setOperand(0, NV);
3570 NV = CI->getOperand(1);
3577 (!
NV->getType()->isPointerTy() ||
3584 Kind = LSRUse::ICmpZero;
3586 }
else if (
L->isLoopInvariant(NV) &&
3587 (!isa<Instruction>(NV) ||
3588 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3589 !
NV->getType()->isPointerTy()) {
3600 Kind = LSRUse::ICmpZero;
3602 assert(!isa<SCEVCouldNotCompute>(S));
3607 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3608 if (Factors[i] != -1)
3609 Factors.insert(-(
uint64_t)Factors[i]);
3615 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3616 size_t LUIdx =
P.first;
3618 LSRUse &LU =
Uses[LUIdx];
3621 LSRFixup &LF = LU.getNewFixup();
3622 LF.UserInst = UserInst;
3623 LF.OperandValToReplace =
U.getOperandValToReplace();
3624 LF.PostIncLoops = TmpPostIncLoops;
3626 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3629 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3631 F.initialMatch(S, L, SE);
3632 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3633 VisitedLSRUse.
insert(LUIdx);
3636 if (!LU.WidestFixupType ||
3639 LU.WidestFixupType = LF.OperandValToReplace->getType();
3642 if (LU.Formulae.empty()) {
3643 InsertInitialFormula(S, LU, LUIdx);
3644 CountRegisters(LU.Formulae.back(), LUIdx);
3653void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3657 LU.RigidFormula =
true;
3660 F.initialMatch(S, L, SE);
3661 bool Inserted = InsertFormula(LU, LUIdx,
F);
3662 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3668LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3669 LSRUse &LU,
size_t LUIdx) {
3671 F.BaseRegs.push_back(S);
3672 F.HasBaseReg =
true;
3673 bool Inserted = InsertFormula(LU, LUIdx,
F);
3674 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3678void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3680 RegUses.countRegister(
F.ScaledReg, LUIdx);
3681 for (
const SCEV *BaseReg :
F.BaseRegs)
3682 RegUses.countRegister(BaseReg, LUIdx);
3687bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3690 "Formula is illegal");
3692 if (!LU.InsertFormula(
F, *L))
3695 CountRegisters(
F, LUIdx);
3705LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3714 while (!Worklist.
empty()) {
3718 if (!Visited.
insert(S).second)
3725 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3728 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3729 const Value *
V = US->getValue();
3730 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3732 if (
L->contains(Inst))
continue;
3733 }
else if (isa<Constant>(V))
3736 for (
const Use &U :
V->uses()) {
3737 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3746 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3749 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3751 cast<PHINode>(UserInst)->getIncomingBlock(
3753 if (!DT.dominates(
L->getHeader(), UseBB))
3766 if (isa<PHINode>(UserInst)) {
3767 const auto *PhiNode = cast<PHINode>(UserInst);
3768 bool HasIncompatibleEHPTerminatedBlock =
false;
3770 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3771 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3772 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3773 HasIncompatibleEHPTerminatedBlock =
true;
3778 if (HasIncompatibleEHPTerminatedBlock) {
3784 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3791 if (!isa<SCEVUnknown>(UserS))
3800 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3801 unsigned OtherIdx = !
U.getOperandNo();
3802 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3807 std::pair<size_t, Immediate>
P =
3808 getUse(S, LSRUse::Basic, MemAccessTy());
3809 size_t LUIdx =
P.first;
3811 LSRUse &LU =
Uses[LUIdx];
3812 LSRFixup &LF = LU.getNewFixup();
3813 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3814 LF.OperandValToReplace =
U;
3816 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3817 if (!LU.WidestFixupType ||
3820 LU.WidestFixupType = LF.OperandValToReplace->getType();
3821 InsertSupplementalFormula(US, LU, LUIdx);
3822 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3838 unsigned Depth = 0) {
3845 for (
const SCEV *S :
Add->operands()) {
3851 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3860 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3862 Remainder =
nullptr;
3880 const SCEV *Remainder =
3893 LSRUse &LU,
const SCEV *S,
const Loop *L,
3895 if (LU.Kind != LSRUse::Address ||
3896 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3902 if (!isa<SCEVConstant>(LoopStep))
3908 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3915void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3916 const Formula &
Base,
3919 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3931 if (AddOps.
size() == 1)
3945 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3951 InnerAddOps.append(std::next(J),
3956 if (InnerAddOps.size() == 1 &&
3958 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3966 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3970 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3975 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3978 F.ScaledReg =
nullptr;
3980 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3981 }
else if (IsScaledReg)
3982 F.ScaledReg = InnerSum;
3984 F.BaseRegs[
Idx] = InnerSum;
3990 SC->getValue()->getZExtValue()))
3992 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3993 SC->getValue()->getZExtValue());
3995 F.BaseRegs.push_back(*J);
4000 if (InsertFormula(LU, LUIdx,
F))
4007 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4013void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4015 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4020 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4021 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4023 if (
Base.Scale == 1)
4024 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4030void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4033 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4034 (
Base.UnfoldedOffset.isNonZero()) <=
4042 Formula NewBase =
Base;
4043 NewBase.BaseRegs.clear();
4044 Type *CombinedIntegerType =
nullptr;
4045 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4048 if (!CombinedIntegerType)
4053 NewBase.BaseRegs.push_back(BaseReg);
4057 if (Ops.
size() == 0)
4062 auto GenerateFormula = [&](
const SCEV *Sum) {
4063 Formula
F = NewBase;
4071 F.BaseRegs.push_back(Sum);
4073 (void)InsertFormula(LU, LUIdx,
F);
4077 if (Ops.
size() > 1) {
4084 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4085 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4087 NewBase.UnfoldedOffset.getFixedValue(),
true));
4088 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4094void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4095 const Formula &
Base,
size_t Idx,
4099 if (
G->isZero() || !GV)
4103 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4108 F.BaseRegs[
Idx] =
G;
4109 (void)InsertFormula(LU, LUIdx,
F);
4113void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4116 if (
Base.BaseGV)
return;
4118 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4119 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4120 if (
Base.Scale == 1)
4121 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4126void LSRInstance::GenerateConstantOffsetsImpl(
4127 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4130 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4132 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4134 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4136 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4138 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4144 F.ScaledReg =
nullptr;
4146 F.deleteBaseReg(
F.BaseRegs[
Idx]);
4148 }
else if (IsScaledReg)
4151 F.BaseRegs[
Idx] = NewG;
4153 (void)InsertFormula(LU, LUIdx,
F);
4168 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
4170 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4171 const APInt &StepInt = StepRec->getAPInt();
4175 for (Immediate
Offset : Worklist) {
4177 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4184 for (Immediate
Offset : Worklist)
4188 if (
G->isZero() ||
Imm.isZero() ||
4189 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4192 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4193 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4198 F.BaseRegs[
Idx] =
G;
4203 (void)InsertFormula(LU, LUIdx,
F);
4207void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4213 if (LU.MaxOffset != LU.MinOffset)
4216 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4217 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4218 if (
Base.Scale == 1)
4219 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4225void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4227 if (LU.Kind != LSRUse::ICmpZero)
return;
4235 if (LU.MinOffset != LU.MaxOffset)
return;
4238 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4240 for (
const SCEV *BaseReg :
Base.BaseRegs)
4243 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4246 for (int64_t Factor : Factors) {
4251 if (
Base.BaseOffset.isMin() && Factor == -1)
4254 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4256 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4257 assert(Factor != 0 &&
"Zero factor not expected!");
4258 if (NewBaseOffset.getFixedValue() / Factor !=
4259 Base.BaseOffset.getFixedValue())
4267 Immediate
Offset = LU.MinOffset;
4268 if (
Offset.isMin() && Factor == -1)
4271 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4279 F.BaseOffset = NewBaseOffset;
4286 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4291 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4305 if (
F.UnfoldedOffset.isNonZero()) {
4306 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4308 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4309 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4310 Base.UnfoldedOffset.getFixedValue())
4314 IntTy,
F.UnfoldedOffset.getFixedValue()))
4319 (void)InsertFormula(LU, LUIdx,
F);
4326void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4333 if (
Base.Scale != 0 && !
Base.unscale())
4336 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4339 for (int64_t Factor : Factors) {
4340 Base.Scale = Factor;
4341 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4343 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4347 if (LU.Kind == LSRUse::Basic &&
4348 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4349 LU.AccessTy,
Base) &&
4350 LU.AllFixupsOutsideLoop)
4351 LU.Kind = LSRUse::Special;
4357 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4358 Base.BaseOffset.isZero() && !
Base.BaseGV)
4361 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4363 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4370 if (!Quotient->isZero()) {
4373 F.ScaledReg = Quotient;
4374 F.deleteBaseReg(
F.BaseRegs[i]);
4378 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4379 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4383 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4385 (void)InsertFormula(LU, LUIdx,
F);
4401 const SCEV *Result =
nullptr;
4402 for (
auto &L :
Loops) {
4406 if (!New || (Result && New != Result))
4411 assert(Result &&
"failed to create expression");
4416void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4418 if (
Base.BaseGV)
return;
4428 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4431 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4435 for (
auto &LF : LU.Fixups)
4436 Loops.push_back(LF.PostIncLoops);
4438 for (
Type *SrcTy : Types) {
4447 const SCEV *NewScaledReg =
4449 if (!NewScaledReg || NewScaledReg->
isZero())
4451 F.ScaledReg = NewScaledReg;
4453 bool HasZeroBaseReg =
false;
4454 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4455 const SCEV *NewBaseReg =
4457 if (!NewBaseReg || NewBaseReg->
isZero()) {
4458 HasZeroBaseReg =
true;
4461 BaseReg = NewBaseReg;
4468 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4472 (void)InsertFormula(LU, LUIdx,
F);
4485 const SCEV *OrigReg;
4488 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4496#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4498 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4499 <<
" , add offset " <<
Imm;
4509void LSRInstance::GenerateCrossUseConstantOffsets() {
4511 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4516 for (
const SCEV *
Use : RegUses) {
4519 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4522 Pair.first->second.insert(std::make_pair(Imm,
Use));
4523 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4532 for (
const SCEV *Reg : Sequence) {
4533 const ImmMapTy &Imms =
Map.find(Reg)->second;
4536 if (Imms.size() == 1)
4539 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4540 for (
const auto &Entry
4542 <<
' ' <<
Entry.first;
4546 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4548 const SCEV *OrigReg = J->second;
4550 Immediate JImm = J->first;
4551 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4553 if (!isa<SCEVConstant>(OrigReg) &&
4554 UsedByIndicesMap[Reg].
count() == 1) {
4562 Immediate
First = Imms.begin()->first;
4563 Immediate
Last = std::prev(Imms.end())->first;
4564 if (!
First.isCompatibleImmediate(
Last)) {
4571 bool Scalable =
First.isScalable() ||
Last.isScalable();
4572 int64_t FI =
First.getKnownMinValue();
4573 int64_t LI =
Last.getKnownMinValue();
4576 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4579 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4580 ImmMapTy::const_iterator OtherImms[] = {
4581 Imms.begin(), std::prev(Imms.end()),
4582 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4583 for (
const auto &M : OtherImms) {
4584 if (M == J || M == JE)
continue;
4585 if (!JImm.isCompatibleImmediate(
M->first))
4589 Immediate
Imm = JImm.subUnsigned(
M->first);
4590 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4592 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4600 UsedByIndicesMap.
clear();
4601 UniqueItems.
clear();
4604 for (
const WorkItem &WI : WorkItems) {
4605 size_t LUIdx = WI.LUIdx;
4606 LSRUse &LU =
Uses[LUIdx];
4607 Immediate
Imm = WI.Imm;
4608 const SCEV *OrigReg = WI.OrigReg;
4611 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4615 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4616 Formula
F = LU.Formulae[
L];
4623 if (
F.ScaledReg == OrigReg) {
4624 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4626 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4628 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4629 if (
F.referencesReg(S))
4632 NewF.BaseOffset =
Offset;
4633 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4636 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4641 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4645 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4647 if (
C->getValue()->isNegative() !=
4648 (NewF.BaseOffset.isLessThanZero()) &&
4650 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4655 NewF.canonicalize(*this->L);
4656 (void)InsertFormula(LU, LUIdx, NewF);
4659 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4660 const SCEV *BaseReg =
F.BaseRegs[
N];
4661 if (BaseReg != OrigReg)
4664 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4665 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4666 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4668 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4670 LU.Kind, LU.AccessTy, NewF)) {
4674 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4678 NewF.UnfoldedOffset = NewUnfoldedOffset;
4680 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4685 for (
const SCEV *NewReg : NewF.BaseRegs)
4686 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewReg)) {
4687 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4689 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4691 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4692 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4694 (
unsigned)llvm::countr_zero<uint64_t>(
4695 NewF.BaseOffset.getFixedValue()))
4700 NewF.canonicalize(*this->L);
4701 (void)InsertFormula(LU, LUIdx, NewF);
4712LSRInstance::GenerateAllReuseFormulae() {
4715 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4716 LSRUse &LU =
Uses[LUIdx];
4717 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4718 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4719 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4720 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4722 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4723 LSRUse &LU =
Uses[LUIdx];
4724 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4725 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4726 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4727 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4728 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4730 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4731 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4733 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4734 LSRUse &LU =
Uses[LUIdx];
4735 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4736 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4739 GenerateCrossUseConstantOffsets();
4742 "After generating reuse formulae:\n";
4743 print_uses(
dbgs()));
4748void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4753 bool ChangedFormulae =
false;
4758 using BestFormulaeTy =
4761 BestFormulaeTy BestFormulae;
4763 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4764 LSRUse &LU =
Uses[LUIdx];
4769 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4770 FIdx != NumForms; ++FIdx) {
4771 Formula &
F = LU.Formulae[FIdx];
4782 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4783 if (CostF.isLoser()) {
4795 for (
const SCEV *Reg :
F.BaseRegs) {
4796 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4800 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4801 Key.push_back(
F.ScaledReg);
4806 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4807 BestFormulae.insert(std::make_pair(Key, FIdx));
4811 Formula &Best = LU.Formulae[
P.first->second];
4813 Cost CostBest(L, SE,
TTI, AMK);
4815 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4816 if (CostF.isLess(CostBest))
4820 " in favor of formula ";
4821 Best.print(
dbgs());
dbgs() <<
'\n');
4824 ChangedFormulae =
true;
4826 LU.DeleteFormula(
F);
4834 LU.RecomputeRegs(LUIdx, RegUses);
4837 BestFormulae.clear();
4842 "After filtering out undesirable candidates:\n";
4850size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4852 for (
const LSRUse &LU :
Uses) {
4853 size_t FSize = LU.Formulae.size();
4868void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4872 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4873 "which use a superset of registers used by other "
4876 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4877 LSRUse &LU =
Uses[LUIdx];
4879 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4880 Formula &
F = LU.Formulae[i];
4881 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4887 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4893 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4894 (
uint64_t)
C->getValue()->getSExtValue());
4895 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4896 (
I -
F.BaseRegs.begin()));
4897 if (LU.HasFormulaWithSameRegs(NewF)) {
4900 LU.DeleteFormula(
F);
4906 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4907 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4911 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4912 (
I -
F.BaseRegs.begin()));
4913 if (LU.HasFormulaWithSameRegs(NewF)) {
4916 LU.DeleteFormula(
F);
4927 LU.RecomputeRegs(LUIdx, RegUses);
4936void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4941 dbgs() <<
"The search space is too complex.\n"
4942 "Narrowing the search space by assuming that uses separated "
4943 "by a constant offset will use the same registers.\n");
4947 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4948 LSRUse &LU =
Uses[LUIdx];
4949 for (
const Formula &
F : LU.Formulae) {
4950 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4953 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4957 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4958 LU.Kind, LU.AccessTy))
4963 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4966 for (LSRFixup &
Fixup : LU.Fixups) {
4967 Fixup.Offset +=
F.BaseOffset;
4968 LUThatHas->pushFixup(
Fixup);
4974 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4975 Formula &
F = LUThatHas->Formulae[i];
4976 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4977 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4979 LUThatHas->DeleteFormula(
F);
4987 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4990 DeleteUse(LU, LUIdx);
5003void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5007 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5008 "undesirable dedicated registers.\n");
5010 FilterOutUndesirableDedicatedRegisters();
5025void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5030 dbgs() <<
"The search space is too complex.\n"
5031 "Narrowing the search space by choosing the best Formula "
5032 "from the Formulae with the same Scale and ScaledReg.\n");
5037 BestFormulaeTy BestFormulae;
5039 bool ChangedFormulae =
false;
5044 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5045 LSRUse &LU =
Uses[LUIdx];
5050 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5055 size_t FARegNum = 0;
5056 for (
const SCEV *Reg : FA.BaseRegs) {
5057 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5058 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5060 size_t FBRegNum = 0;
5061 for (
const SCEV *Reg : FB.BaseRegs) {
5062 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5063 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5065 if (FARegNum != FBRegNum)
5066 return FARegNum < FBRegNum;
5073 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
5075 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
5076 return CostFA.isLess(CostFB);
5080 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5082 Formula &
F = LU.Formulae[FIdx];
5085 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5089 Formula &Best = LU.Formulae[
P.first->second];
5090 if (IsBetterThan(
F, Best))
5094 " in favor of formula ";
5095 Best.print(
dbgs());
dbgs() <<
'\n');
5097 ChangedFormulae =
true;
5099 LU.DeleteFormula(
F);
5105 LU.RecomputeRegs(LUIdx, RegUses);
5108 BestFormulae.clear();
5113 "After filtering out undesirable candidates:\n";
5120void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5127 "Narrowing the search space by choosing the lowest "
5128 "register Formula for PostInc Uses.\n");
5130 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5131 LSRUse &LU =
Uses[LUIdx];
5133 if (LU.Kind != LSRUse::Address)
5139 size_t MinRegs = std::numeric_limits<size_t>::max();
5140 for (
const Formula &
F : LU.Formulae)
5141 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5144 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5146 Formula &
F = LU.Formulae[FIdx];
5147 if (
F.getNumRegs() > MinRegs) {
5150 LU.DeleteFormula(
F);
5157 LU.RecomputeRegs(LUIdx, RegUses);
5208void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5221 DenseMap <const SCEV *, float> RegNumMap;
5222 for (
const SCEV *Reg : RegUses) {
5223 if (UniqRegs.
count(Reg))
5226 for (
const LSRUse &LU :
Uses) {
5227 if (!LU.Regs.count(Reg))
5229 float P = LU.getNotSelectedProbability(Reg);
5235 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
5239 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5242 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5243 LSRUse &LU =
Uses[LUIdx];
5245 if (LU.Formulae.size() < 2)
5250 float FMinRegNum = LU.Formulae[0].getNumRegs();
5251 float FMinARegNum = LU.Formulae[0].getNumRegs();
5253 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5254 Formula &
F = LU.Formulae[i];
5257 for (
const SCEV *BaseReg :
F.BaseRegs) {
5258 if (UniqRegs.
count(BaseReg))
5260 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5261 if (isa<SCEVAddRecExpr>(BaseReg))
5263 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5265 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5266 if (!UniqRegs.
count(ScaledReg)) {
5268 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5269 if (isa<SCEVAddRecExpr>(ScaledReg))
5271 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5274 if (FMinRegNum > FRegNum ||
5275 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5276 FMinRegNum = FRegNum;
5277 FMinARegNum = FARegNum;
5282 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5284 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5285 while (LU.Formulae.size() != 1) {
5288 LU.Formulae.pop_back();
5290 LU.RecomputeRegs(LUIdx, RegUses);
5291 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5292 Formula &
F = LU.Formulae[0];
5295 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5308 MemAccessTy AccessType) {
5309 if (Best->
getType() != Reg->getType() ||
5310 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5311 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5312 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5314 const auto *Diff = dyn_cast<SCEVConstant>(SE.
getMinusSCEV(Best, Reg));
5319 AccessType.MemTy,
nullptr,
5320 Diff->getAPInt().getSExtValue(),
5321 true, 0, AccessType.AddrSpace) &&
5323 AccessType.MemTy,
nullptr,
5324 -Diff->getAPInt().getSExtValue(),
5325 true, 0, AccessType.AddrSpace);
5331void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5342 const SCEV *Best =
nullptr;
5343 unsigned BestNum = 0;
5344 for (
const SCEV *Reg : RegUses) {
5345 if (Taken.
count(Reg))
5349 BestNum = RegUses.getUsedByIndices(Reg).count();
5351 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5352 if (Count > BestNum) {
5360 if (Count == BestNum) {
5361 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5362 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5364 Uses[LUIdx].AccessTy)) {
5371 assert(Best &&
"Failed to find best LSRUse candidate");
5373 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5374 <<
" will yield profitable reuse.\n");
5379 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5380 LSRUse &LU =
Uses[LUIdx];
5381 if (!LU.Regs.count(Best))
continue;
5384 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5385 Formula &
F = LU.Formulae[i];
5386 if (!
F.referencesReg(Best)) {
5388 LU.DeleteFormula(
F);
5392 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5398 LU.RecomputeRegs(LUIdx, RegUses);
5409void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5410 NarrowSearchSpaceByDetectingSupersets();
5411 NarrowSearchSpaceByCollapsingUnrolledCode();
5412 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5414 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5415 NarrowSearchSpaceByFilterPostInc();
5417 NarrowSearchSpaceByDeletingCostlyFormulas();
5419 NarrowSearchSpaceByPickingWinnerRegs();
5426 const Cost &CurCost,
5439 const LSRUse &LU =
Uses[Workspace.
size()];
5446 for (
const SCEV *S : CurRegs)
5447 if (LU.Regs.count(S))
5451 Cost NewCost(L, SE,
TTI, AMK);
5452 for (
const Formula &
F : LU.Formulae) {
5460 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5461 for (
const SCEV *Reg : ReqRegs) {
5462 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5465 if (NumReqRegsToFind == 0)
5469 if (NumReqRegsToFind != 0) {
5480 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5481 if (NewCost.isLess(SolutionCost)) {
5483 if (Workspace.
size() !=
Uses.size()) {
5484 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5485 NewRegs, VisitedRegs);
5486 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5487 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5490 dbgs() <<
".\nRegs:\n";
5491 for (
const SCEV *S : NewRegs)
dbgs()
5492 <<
"- " << *S <<
"\n";
5495 SolutionCost = NewCost;
5496 Solution = Workspace;
5507 Cost SolutionCost(L, SE,
TTI, AMK);
5508 SolutionCost.Lose();
5509 Cost CurCost(L, SE,
TTI, AMK);
5515 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5516 CurRegs, VisitedRegs);
5517 if (Solution.
empty()) {
5524 "The chosen solution requires ";
5525 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5526 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5531 Solution[i]->print(
dbgs());
5537 const bool EnableDropUnprofitableSolution = [&] {
5549 if (BaselineCost.isLess(SolutionCost)) {
5550 if (!EnableDropUnprofitableSolution)
5552 dbgs() <<
"Baseline is more profitable than chosen solution, "
5553 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5556 "solution, dropping LSR solution.\n";);
5571 bool AllDominate =
true;
5575 if (isa<CatchSwitchInst>(Tentative))
5579 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5580 AllDominate =
false;
5585 if (Tentative->
getParent() == Inst->getParent() &&
5586 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5596 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5597 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5600 for (
DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
5601 if (!Rung)
return IP;
5602 Rung = Rung->getIDom();
5603 if (!Rung)
return IP;
5604 IDom = Rung->getBlock();
5607 const Loop *IDomLoop = LI.getLoopFor(IDom);
5608 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5609 if (IDomDepth <= IPLoopDepth &&
5610 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5628 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5630 if (LU.Kind == LSRUse::ICmpZero)
5632 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5634 if (LF.PostIncLoops.count(L)) {
5635 if (LF.isUseFullyOutsideLoop(L))
5636 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5642 for (
const Loop *PIL : LF.PostIncLoops) {
5643 if (PIL == L)
continue;
5648 if (!ExitingBlocks.
empty()) {
5650 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5651 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5656 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5657 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5658 "Insertion point must be a normal instruction");
5665 while (isa<PHINode>(IP)) ++IP;
5668 while (IP->isEHPad()) ++IP;
5671 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5676 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5684Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5687 if (LU.RigidFormula)
5688 return LF.OperandValToReplace;
5692 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5697 Rewriter.setPostInc(LF.PostIncLoops);
5700 Type *OpTy = LF.OperandValToReplace->getType();
5702 Type *Ty =
F.getType();
5716 for (
const SCEV *Reg :
F.BaseRegs) {
5717 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5725 Value *ICmpScaledV =
nullptr;
5727 const SCEV *ScaledS =
F.ScaledReg;
5733 if (LU.Kind == LSRUse::ICmpZero) {
5743 "The only scale supported by ICmpZero uses is -1!");
5744 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5752 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5788 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5789 "Expanding mismatched offsets\n");
5791 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5792 if (
Offset.isNonZero()) {
5793 if (LU.Kind == LSRUse::ICmpZero) {
5801 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5811 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5812 if (UnfoldedOffset.isNonZero()) {
5814 Ops.
push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5829 if (LU.Kind == LSRUse::ICmpZero) {
5830 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5831 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5833 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5834 "a scale at the same time!");
5835 if (
F.Scale == -1) {
5836 if (ICmpScaledV->
getType() != OpTy) {
5846 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5847 "ICmp does not support folding a global value and "
5848 "a scale at the same time!");
5851 if (
C->getType() != OpTy) {
5855 assert(
C &&
"Cast of ConstantInt should have folded");
5868void LSRInstance::RewriteForPHI(
5869 PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
5881 bool needUpdateFixups =
false;
5892 Loop *PNLoop = LI.getLoopFor(Parent);
5893 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5900 .setMergeIdenticalEdges()
5901 .setKeepOneInputPHIs());
5915 if (
L->contains(BB) && !
L->contains(PN))
5923 needUpdateFixups =
true;
5928 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5929 Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5937 Type *OpTy = LF.OperandValToReplace->getType();
5941 LF.OperandValToReplace->getType(),
"tmp",
5947 if (
auto *
I = dyn_cast<Instruction>(FullV))
5948 if (
L->contains(
I) && !
L->contains(BB))
5952 Pair.first->second = FullV;
5959 if (needUpdateFixups) {
5960 for (LSRUse &LU :
Uses)
5961 for (LSRFixup &
Fixup : LU.Fixups)
5965 if (
Fixup.UserInst == PN) {
5968 bool foundInOriginalPHI =
false;
5970 if (val ==
Fixup.OperandValToReplace) {
5971 foundInOriginalPHI =
true;
5976 if (foundInOriginalPHI)
5987 if (val ==
Fixup.OperandValToReplace)
5988 Fixup.UserInst = NewPN;
6000void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6005 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
6006 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6008 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
6011 Type *OpTy = LF.OperandValToReplace->getType();
6012 if (FullV->
getType() != OpTy) {
6015 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
6024 if (LU.Kind == LSRUse::ICmpZero)
6027 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
6030 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
6040 if (LU.Kind != LSRUse::Address)
6047 if (IVIncInsertPos->
getParent() == LHeader)
6050 if (!
Fixup.OperandValToReplace ||
6052 Instruction *UI = cast<Instruction>(U);
6053 return UI->getParent() != LHeader;
6058 Type *Ty =
I->getType();
6066void LSRInstance::ImplementSolution(
6073 for (
const IVChain &Chain : IVChainVec) {
6074 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
6079 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6080 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6083 ?
L->getHeader()->getTerminator()
6085 Rewriter.setIVIncInsertPos(L, InsertPos);
6086 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6090 for (
const IVChain &Chain : IVChainVec) {
6091 GenerateIVChain(Chain, DeadInsts);
6097 ScalarEvolutionIVs.push_back(
IV);
6113 for (
PHINode &PN :
L->getHeader()->phis()) {
6115 Value *Start =
nullptr, *Step =
nullptr;
6120 case Instruction::Sub:
6125 case Instruction::Add:
6131 if (!isa<Constant>(Step))
6136 if (BO->
getParent() == IVIncInsertPos->getParent())
6142 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6155 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6158 :
TTI.getPreferredAddressingMode(
L, &SE)),
6160 BaselineCost(
L, SE,
TTI, AMK) {
6162 if (!
L->isLoopSimplifyForm())
6166 if (IU.
empty())
return;
6170 unsigned NumUsers = 0;
6174 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6181 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
6182 auto *FirstNonPHI = PN->
getParent()->getFirstNonPHI();
6183 if (isa<FuncletPadInst>(FirstNonPHI) ||
6184 isa<CatchSwitchInst>(FirstNonPHI))
6186 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
6192 L->getHeader()->printAsOperand(
dbgs(),
false);
6205 OptimizeLoopTermCond();
6208 if (IU.empty())
return;
6211 if (!
L->isInnermost()) {
6224 CollectInterestingTypesAndFactors();
6225 CollectFixupsAndInitialFormulae();
6226 CollectLoopInvariantFixupsAndFormulae();
6232 print_uses(
dbgs()));
6234 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6238 GenerateAllReuseFormulae();
6240 FilterOutUndesirableDedicatedRegisters();
6241 NarrowSearchSpaceUsingHeuristics();
6251 if (Solution.
empty())
6256 for (
const LSRUse &LU :
Uses) {
6257 for (
const Formula &
F : LU.Formulae)
6259 F) &&
"Illegal formula generated!");
6264 ImplementSolution(Solution);
6267#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6268void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
6269 if (Factors.empty() &&
Types.empty())
return;
6271 OS <<
"LSR has identified the following interesting factors and types: ";
6274 for (int64_t Factor : Factors) {
6277 OS <<
'*' << Factor;
6280 for (
Type *Ty : Types) {
6283 OS <<
'(' << *Ty <<
')';
6289 OS <<
"LSR is examining the following fixup sites:\n";
6290 for (
const LSRUse &LU :
Uses)
6291 for (
const LSRFixup &LF : LU.Fixups) {
6299 OS <<
"LSR is examining the following uses:\n";
6300 for (
const LSRUse &LU :
Uses) {
6304 for (
const Formula &
F : LU.Formulae) {
6313 print_factors_and_types(
OS);
6325class LoopStrengthReduce :
public LoopPass {
6329 LoopStrengthReduce();
6338LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6342void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6374 return {Begin,
End};
6377struct SCEVDbgValueBuilder {
6378 SCEVDbgValueBuilder() =
default;
6379 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6381 void clone(
const SCEVDbgValueBuilder &
Base) {
6382 LocationOps =
Base.LocationOps;
6387 LocationOps.clear();
6404 unsigned ArgIndex = 0;
6405 if (It != LocationOps.
end()) {
6406 ArgIndex = std::distance(LocationOps.
begin(), It);
6408 ArgIndex = LocationOps.
size();
6420 if (
C->getAPInt().getSignificantBits() > 64)
6422 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6423 Expr.
push_back(
C->getAPInt().getSExtValue());
6430 return ToDwarfOpIter(Expr);
6437 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6438 "Expected arithmetic SCEV type");
6440 unsigned EmitOperator = 0;
6441 for (
const auto &
Op : CommExpr->
operands()) {
6444 if (EmitOperator >= 1)
6445 pushOperator(DwarfOp);
6456 bool Success = pushSCEV(Inner);
6458 IsSigned ? llvm::dwarf::DW_ATE_signed
6459 : llvm::dwarf::DW_ATE_unsigned};
6460 for (
const auto &
Op : CastOps)
6468 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6469 Success &= pushConst(StartInt);
6471 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6474 pushLocation(
U->getValue());
6476 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6477 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6479 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6480 Success &= pushSCEV(UDiv->getLHS());
6481 Success &= pushSCEV(UDiv->getRHS());
6482 pushOperator(llvm::dwarf::DW_OP_div);
6484 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6486 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6487 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6488 "Unexpected cast type in SCEV.");
6489 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6491 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6492 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6494 }
else if (isa<SCEVAddRecExpr>(S)) {
6509 if (
C->getAPInt().getSignificantBits() > 64)
6511 int64_t
I =
C->getAPInt().getSExtValue();
6513 case llvm::dwarf::DW_OP_plus:
6514 case llvm::dwarf::DW_OP_minus:
6516 case llvm::dwarf::DW_OP_mul:
6517 case llvm::dwarf::DW_OP_div:
6533 if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6540 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6541 if (!pushSCEV(Stride))
6543 pushOperator(llvm::dwarf::DW_OP_mul);
6545 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6546 if (!pushSCEV(Start))
6548 pushOperator(llvm::dwarf::DW_OP_plus);
6554 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6555 pushLocation(OffsetValue);
6558 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6559 << std::to_string(
Offset) <<
"\n");
6565 bool createIterCountExpr(
const SCEV *S,
6566 const SCEVDbgValueBuilder &IterationCount,
6573 if (!isa<SCEVAddRecExpr>(S))
6576 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6579 const auto *Rec = cast<SCEVAddRecExpr>(S);
6580 if (!Rec->isAffine())
6588 clone(IterationCount);
6589 if (!SCEVToValueExpr(*Rec, SE))
6603 if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6604 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6612 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6613 if (!pushSCEV(Start))
6615 pushOperator(llvm::dwarf::DW_OP_minus);
6617 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6618 if (!pushSCEV(Stride))
6620 pushOperator(llvm::dwarf::DW_OP_div);
6631 "Expected the locations vector to contain the IV");
6636 "Expected the location ops to contain the IV.");
6640 for (
const auto &
Op : LocationOps) {
6641 auto It =
find(DestLocations,
Op);
6642 if (It != DestLocations.
end()) {
6644 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6652 for (
const auto &
Op : expr_ops()) {
6654 Op.appendToVector(DestExpr);
6661 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6669struct DVIRecoveryRec {
6672 HadLocationArgList(
false) {}
6674 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6678 bool HadLocationArgList;
6684 for (
auto &RE : RecoveryExprs)
6686 RecoveryExprs.clear();
6689 ~DVIRecoveryRec() {
clear(); }
6697 auto expr_ops = ToDwarfOpIter(Expr);
6699 for (
auto Op : expr_ops)
6708template <
typename T>
6712 "contain any DW_OP_llvm_arg operands.");
6714 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6715 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6719template <
typename T>
6724 "Expected expression that references DIArglist locations using "
6725 "DW_OP_llvm_arg operands.");
6727 for (
Value *V : Locations)
6731 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6742 auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6744 if (NumLLVMArgs == 0) {
6751 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6763 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6766 DbgVal->setExpression(SalvageExpr);
6769 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6770 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6772 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6786 auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6787 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6788 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6789 assert(DVIRec.Expr &&
"Expected an expression");
6790 DbgVal->setExpression(DVIRec.Expr);
6794 if (!DVIRec.HadLocationArgList) {
6795 assert(DVIRec.LocationOps.size() == 1 &&
6796 "Unexpected number of location ops.");
6800 Value *CachedValue =
6805 for (
WeakVH VH : DVIRec.LocationOps) {
6810 DbgVal->setRawLocation(
6813 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6815 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6816 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6818 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6823 const SCEV *SCEVInductionVar,
6824 SCEVDbgValueBuilder IterCountExpr) {
6826 if (isa<DbgValueInst *>(DVIRec.DbgRef)
6827 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6828 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6840 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6842 NewLocationOps.
push_back(LSRInductionVar);
6844 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6845 WeakVH VH = DVIRec.LocationOps[i];
6849 if (VH && !isa<UndefValue>(VH)) {
6851 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6853 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6861 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6862 <<
" refers to a location that is now undef or erased. "
6863 "Salvage abandoned.\n");
6867 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6868 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6870 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6871 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6875 if (std::optional<APInt>
Offset =
6877 if (
Offset->getSignificantBits() <= 64)
6878 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6879 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6887 if (DVIRec.Expr->getNumElements() == 0) {
6888 assert(DVIRec.RecoveryExprs.size() == 1 &&
6889 "Expected only a single recovery expression for an empty "
6891 assert(DVIRec.RecoveryExprs[0] &&
6892 "Expected a SCEVDbgSalvageBuilder for location 0");
6893 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6894 B->appendToVectors(
NewExpr, NewLocationOps);
6896 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6904 SCEVDbgValueBuilder *DbgBuilder =
6905 DVIRec.RecoveryExprs[LocationArgIndex].get();
6911 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6912 "Expected a positive index for the location-op position.");
6913 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6917 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6921 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6923 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6926 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6934 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6935 if (DVIToUpdate.empty())
6939 assert(SCEVInductionVar &&
6940 "Anticipated a SCEV for the post-LSR induction variable");
6943 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6944 if (!IVAddRec->isAffine())
6952 SCEVDbgValueBuilder IterCountExpr;
6953 IterCountExpr.pushLocation(LSRInductionVar);
6954 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6957 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6960 for (
auto &DVIRec : DVIToUpdate) {
6961 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6972 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6974 for (
const auto &
B : L->getBlocks()) {
6975 for (
auto &
I : *
B) {
6976 auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6979 if (DbgVal->isKillLocation())
6984 const auto &HasTranslatableLocationOps =
6985 [&](
const auto *DbgValToTranslate) ->
bool {
6986 for (
const auto LocOp : DbgValToTranslate->location_ops()) {
7000 if (!HasTranslatableLocationOps(DbgVal))
7003 std::unique_ptr<DVIRecoveryRec> NewRec =
7004 std::make_unique<DVIRecoveryRec>(DbgVal);
7008 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
7009 for (
const auto LocOp : DbgVal->location_ops()) {
7010 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7011 NewRec->LocationOps.push_back(LocOp);
7012 NewRec->HadLocationArgList = DbgVal->hasArgList();
7014 SalvageableDVISCEVs.push_back(std::move(NewRec));
7018 if (DVR.isDbgValue() || DVR.isDbgAssign())
7019 ProcessDbgValue(&DVR);
7021 auto DVI = dyn_cast<DbgValueInst>(&
I);
7024 if (ProcessDbgValue(DVI))
7025 DVIHandles.insert(DVI);
7034 const LSRInstance &LSR) {
7036 auto IsSuitableIV = [&](
PHINode *
P) {
7047 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7054 if (IsSuitableIV(
P))
7058 for (
PHINode &
P : L.getHeader()->phis()) {
7059 if (IsSuitableIV(&
P))
7065static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
7068 if (!L->isInnermost()) {
7070 return std::nullopt;
7073 if (!L->isLoopSimplifyForm()) {
7075 return std::nullopt;
7079 LLVM_DEBUG(
dbgs() <<
"Cannot fold on backedge that is loop variant\n");
7080 return std::nullopt;
7086 return std::nullopt;
7087 auto *TermCond = dyn_cast<ICmpInst>(BI->
getCondition());
7090 dbgs() <<
"Cannot fold on branching condition that is not an ICmpInst");
7091 return std::nullopt;
7093 if (!TermCond->hasOneUse()) {
7096 <<
"Cannot replace terminating condition with more than one use\n");
7097 return std::nullopt;
7101 Value *
RHS = TermCond->getOperand(1);
7102 if (!
LHS || !L->isLoopInvariant(
RHS))
7105 return std::nullopt;
7109 Value *ToFoldStart, *ToFoldStep;
7111 return std::nullopt;
7114 if (ToFold->
getParent() != L->getHeader())
7115 return std::nullopt;
7120 return std::nullopt;
7124 const unsigned ExpansionBudget = [&]() {
7127 return std::min(Budget, SmallTC);
7129 return std::min(Budget, *SmallTC);
7135 const DataLayout &
DL = L->getHeader()->getDataLayout();
7138 PHINode *ToHelpFold =
nullptr;
7139 const SCEV *TermValueS =
nullptr;
7140 bool MustDropPoison =
false;
7141 auto InsertPt = L->getLoopPreheader()->getTerminator();
7142 for (
PHINode &PN : L->getHeader()->phis()) {
7148 <<
"' is not SCEV-able, not qualified for the "
7149 "terminating condition folding.\n");
7154 if (!AddRec || !AddRec->
isAffine()) {
7156 <<
"' is not an affine add recursion, not qualified "
7157 "for the terminating condition folding.\n");
7175 const SCEV *TermValueSLocal =
PostInc->evaluateAtIteration(BECount, SE);
7178 dbgs() <<
"Is not safe to expand terminating value for phi node" << PN
7186 dbgs() <<
"Is too expensive to expand terminating value for phi node"
7203 bool MustDropPoisonLocal =
false;
7225 TermValueS = TermValueSLocal;
7226 MustDropPoison = MustDropPoisonLocal;
7230 <<
"Cannot find other AddRec IV to help folding\n";);
7233 <<
"\nFound loop that can fold terminating condition\n"
7235 <<
" TermCond: " << *TermCond <<
"\n"
7236 <<
" BrandInst: " << *BI <<
"\n"
7237 <<
" ToFold: " << *ToFold <<
"\n"
7238 <<
" ToHelpFold: " << *ToHelpFold <<
"\n");
7240 if (!ToFold || !ToHelpFold)
7241 return std::nullopt;
7242 return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
7257 bool Changed =
false;
7258 std::unique_ptr<MemorySSAUpdater> MSSAU;
7260 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7263 const LSRInstance &Reducer =
7264 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7265 Changed |= Reducer.getChanged();
7271 const DataLayout &
DL = L->getHeader()->getDataLayout();
7276 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7290 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7292 const DataLayout &
DL = L->getHeader()->getDataLayout();
7305 const bool EnableFormTerm = [&] {
7317 if (EnableFormTerm) {
7319 auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7324 BasicBlock *LoopPreheader = L->getLoopPreheader();
7330 <<
"New term-cond phi-node:\n"
7331 << *ToHelpFold <<
"\n");
7333 Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
7335 Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
7339 cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
7342 const DataLayout &
DL = L->getHeader()->getDataLayout();
7346 "Terminating value was checked safe in canFoldTerminatingCondition");
7353 << *StartValue <<
"\n"
7354 <<
"Terminating value of new term-cond phi-node:\n"
7355 << *TermValue <<
"\n");
7361 Value *NewTermCond =
7363 "lsr_fold_term_cond.replaced_term_cond");
7369 << *OldTermCond <<
"\n"
7370 <<
"New term-cond:\n" << *NewTermCond <<
"\n");
7380 if (SalvageableDVIRecords.
empty())
7386 for (
const auto &L : LI) {
7390 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7391 "could not be identified.\n");
7395 for (
auto &Rec : SalvageableDVIRecords)
7397 SalvageableDVIRecords.
clear();
7406 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7407 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7408 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7409 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7410 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7411 *
L->getHeader()->getParent());
7412 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7413 *
L->getHeader()->getParent());
7414 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7415 *
L->getHeader()->getParent());
7416 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7419 MSSA = &MSSAAnalysis->getMSSA();
7436char LoopStrengthReduce::ID = 0;
7439 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This defines the Use class.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static std::optional< std::tuple< PHINode *, PHINode *, const SCEV *, bool > > canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, const LoopInfo &LI, const TargetTransformInfo &TTI)
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static cl::opt< cl::boolOrDefault > AllowTerminatingConditionFoldingAfterLSR("lsr-term-fold", cl::Hidden, cl::desc("Attempt to replace primary IV with other IV."))
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool hasPoisonGeneratingFlags() const LLVM_READONLY
Return true if this operator has flags which may cause this instruction to evaluate to poison despite...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSelfWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getVScale(Type *Ty)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.