84#include "llvm/Config/llvm-config.h"
133#define DEBUG_TYPE "loop-reduce"
150 cl::desc(
"Enable LSR phi elimination"));
155 cl::desc(
"Add instruction count to a LSR cost model"));
160 cl::desc(
"Narrow LSR complex solution using"
161 " expectation of registers number"));
167 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
168 " with the same ScaledReg and Scale"));
172 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Don't prefer any addressing mode"),
178 "Prefer pre-indexed addressing mode"),
181 "Prefer post-indexed addressing mode")));
185 cl::init(std::numeric_limits<uint16_t>::max()),
186 cl::desc(
"LSR search space complexity limit"));
190 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
194 cl::desc(
"Attempt to replace primary IV with other IV."));
198 cl::desc(
"Attempt to drop solution if it is less profitable"));
202 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
206 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
209 "Number of terminating condition fold recognized and performed");
215 cl::desc(
"Stress test LSR IV chains"));
224 static const unsigned UnknownAddressSpace =
225 std::numeric_limits<unsigned>::max();
227 Type *MemTy =
nullptr;
228 unsigned AddrSpace = UnknownAddressSpace;
230 MemAccessTy() =
default;
231 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
234 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
240 unsigned AS = UnknownAddressSpace) {
261 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
262 : FixedOrScalableQuantity(MinVal, Scalable) {}
264 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
265 : FixedOrScalableQuantity(
V) {}
268 constexpr Immediate() =
delete;
270 static constexpr Immediate getFixed(ScalarTy MinVal) {
271 return {MinVal,
false};
273 static constexpr Immediate getScalable(ScalarTy MinVal) {
274 return {MinVal,
true};
276 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
277 return {MinVal, Scalable};
279 static constexpr Immediate getZero() {
return {0,
false}; }
280 static constexpr Immediate getFixedMin() {
281 return {std::numeric_limits<int64_t>::min(),
false};
283 static constexpr Immediate getFixedMax() {
284 return {std::numeric_limits<int64_t>::max(),
false};
286 static constexpr Immediate getScalableMin() {
287 return {std::numeric_limits<int64_t>::min(),
true};
289 static constexpr Immediate getScalableMax() {
290 return {std::numeric_limits<int64_t>::max(),
true};
293 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
295 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
297 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
298 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
301 constexpr bool isMin()
const {
302 return Quantity == std::numeric_limits<ScalarTy>::min();
305 constexpr bool isMax()
const {
306 return Quantity == std::numeric_limits<ScalarTy>::max();
310 constexpr Immediate addUnsigned(
const Immediate &RHS)
const {
311 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
313 return {
Value, Scalable ||
RHS.isScalable()};
316 constexpr Immediate subUnsigned(
const Immediate &RHS)
const {
317 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
319 return {
Value, Scalable ||
RHS.isScalable()};
323 constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const {
325 return {
Value, Scalable};
355struct KeyOrderTargetImmediate {
356 bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const {
357 if (
LHS.isScalable() && !
RHS.isScalable())
359 if (!
LHS.isScalable() &&
RHS.isScalable())
361 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
368struct KeyOrderSizeTAndImmediate {
369 bool operator()(
const std::pair<size_t, Immediate> &LHS,
370 const std::pair<size_t, Immediate> &RHS)
const {
371 size_t LSize =
LHS.first;
372 size_t RSize =
RHS.first;
374 return LSize < RSize;
375 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
380#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
382 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
396 RegUsesTy RegUsesMap;
400 void countRegister(
const SCEV *Reg,
size_t LUIdx);
401 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
402 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
404 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
422RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
423 std::pair<RegUsesTy::iterator, bool> Pair =
424 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
425 RegSortData &RSD = Pair.first->second;
428 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
429 RSD.UsedByIndices.set(LUIdx);
433RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
434 RegUsesTy::iterator It = RegUsesMap.find(Reg);
435 assert(It != RegUsesMap.end());
436 RegSortData &RSD = It->second;
437 assert(RSD.UsedByIndices.size() > LUIdx);
438 RSD.UsedByIndices.reset(LUIdx);
442RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
443 assert(LUIdx <= LastLUIdx);
447 for (
auto &Pair : RegUsesMap) {
449 if (LUIdx < UsedByIndices.
size())
450 UsedByIndices[LUIdx] =
451 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
452 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
457RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
458 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
459 if (
I == RegUsesMap.end())
463 if (i == -1)
return false;
464 if ((
size_t)i != LUIdx)
return true;
469 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
470 assert(
I != RegUsesMap.end() &&
"Unknown register!");
471 return I->second.UsedByIndices;
474void RegUseTracker::clear() {
488 Immediate BaseOffset = Immediate::getZero();
491 bool HasBaseReg =
false;
514 const SCEV *ScaledReg =
nullptr;
519 Immediate UnfoldedOffset = Immediate::getZero();
527 void canonicalize(
const Loop &L);
531 bool hasZeroEnd()
const;
533 size_t getNumRegs()
const;
536 void deleteBaseReg(
const SCEV *&S);
538 bool referencesReg(
const SCEV *S)
const;
539 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
540 const RegUseTracker &RegUses)
const;
561 for (
const SCEV *S :
Add->operands())
568 if (!AR->getStart()->isZero() && AR->isAffine()) {
571 AR->getStepRecurrence(SE),
587 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
589 for (
const SCEV *S : MyGood)
591 for (
const SCEV *S : MyBad)
610 BaseRegs.push_back(Sum);
616 BaseRegs.push_back(Sum);
624 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
631bool Formula::isCanonical(
const Loop &L)
const {
633 return BaseRegs.size() <= 1;
638 if (Scale == 1 && BaseRegs.empty())
658void Formula::canonicalize(
const Loop &L) {
662 if (BaseRegs.empty()) {
664 assert(ScaledReg &&
"Expected 1*reg => reg");
665 assert(Scale == 1 &&
"Expected 1*reg => reg");
666 BaseRegs.push_back(ScaledReg);
674 ScaledReg = BaseRegs.pop_back_val();
685 if (
I != BaseRegs.end())
695bool Formula::unscale() {
699 BaseRegs.push_back(ScaledReg);
704bool Formula::hasZeroEnd()
const {
705 if (UnfoldedOffset || BaseOffset)
707 if (BaseRegs.size() != 1 || ScaledReg)
714size_t Formula::getNumRegs()
const {
715 return !!ScaledReg + BaseRegs.size();
720Type *Formula::getType()
const {
721 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
722 ScaledReg ? ScaledReg->getType() :
723 BaseGV ? BaseGV->getType() :
728void Formula::deleteBaseReg(
const SCEV *&S) {
729 if (&S != &BaseRegs.back())
735bool Formula::referencesReg(
const SCEV *S)
const {
741bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
742 const RegUseTracker &RegUses)
const {
744 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
746 for (
const SCEV *BaseReg : BaseRegs)
747 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
752#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
757 BaseGV->printAsOperand(
OS,
false);
759 if (BaseOffset.isNonZero()) {
763 for (
const SCEV *BaseReg : BaseRegs) {
765 OS <<
"reg(" << *BaseReg <<
')';
767 if (HasBaseReg && BaseRegs.empty()) {
769 OS <<
"**error: HasBaseReg**";
770 }
else if (!HasBaseReg && !BaseRegs.empty()) {
772 OS <<
"**error: !HasBaseReg**";
776 OS << Scale <<
"*reg(";
783 if (UnfoldedOffset.isNonZero()) {
785 OS <<
"imm(" << UnfoldedOffset <<
')';
826 bool IgnoreSignificantBits =
false) {
837 if (
RA.isAllOnes()) {
851 const APInt &LA =
C->getAPInt();
860 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
862 IgnoreSignificantBits);
863 if (!Step)
return nullptr;
865 IgnoreSignificantBits);
866 if (!Start)
return nullptr;
879 for (
const SCEV *S :
Add->operands()) {
881 if (!
Op)
return nullptr;
897 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
912 IgnoreSignificantBits)) {
931 if (
C->getAPInt().getSignificantBits() <= 64) {
933 return Immediate::getFixed(
C->getValue()->getSExtValue());
938 if (Result.isNonZero())
941 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
944 if (Result.isNonZero())
950 if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
951 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(M->getOperand(0)))
952 if (isa<SCEVVScale>(M->getOperand(1))) {
954 return Immediate::getScalable(
C->getValue()->getSExtValue());
956 return Immediate::getZero();
962 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
963 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
973 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
989 bool isAddress = isa<LoadInst>(Inst);
990 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
991 if (SI->getPointerOperand() == OperandVal)
996 switch (
II->getIntrinsicID()) {
997 case Intrinsic::memset:
998 case Intrinsic::prefetch:
999 case Intrinsic::masked_load:
1000 if (
II->getArgOperand(0) == OperandVal)
1003 case Intrinsic::masked_store:
1004 if (
II->getArgOperand(1) == OperandVal)
1007 case Intrinsic::memmove:
1008 case Intrinsic::memcpy:
1009 if (
II->getArgOperand(0) == OperandVal ||
1010 II->getArgOperand(1) == OperandVal)
1016 if (IntrInfo.
PtrVal == OperandVal)
1021 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1022 if (RMW->getPointerOperand() == OperandVal)
1025 if (CmpX->getPointerOperand() == OperandVal)
1034 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1038 AccessTy.MemTy = Ty;
1041 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1042 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1043 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1044 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1045 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1046 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1048 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1050 switch (
II->getIntrinsicID()) {
1051 case Intrinsic::prefetch:
1052 case Intrinsic::memset:
1053 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1054 AccessTy.MemTy = OperandVal->
getType();
1056 case Intrinsic::memmove:
1057 case Intrinsic::memcpy:
1059 AccessTy.MemTy = OperandVal->
getType();
1061 case Intrinsic::masked_load:
1062 AccessTy.AddrSpace =
1063 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1065 case Intrinsic::masked_store:
1066 AccessTy.AddrSpace =
1067 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1127 if (!Processed.
insert(S).second)
1131 for (
const SCEV *S :
Add->operands()) {
1147 Value *UVal = U->getValue();
1151 if (UI && UI->
getOpcode() == Instruction::Mul &&
1185 const LSRUse &LU,
const Formula &
F);
1189 const LSRUse &LU,
const Formula &
F,
1196 const Loop *
L =
nullptr;
1206 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1224 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1225 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1226 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1227 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1233 return C.NumRegs == ~0
u;
1236 void RateFormula(
const Formula &
F,
1246 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1248 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1261 Value *OperandValToReplace =
nullptr;
1271 Immediate
Offset = Immediate::getZero();
1273 LSRFixup() =
default;
1275 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1283struct UniquifierDenseMapInfo {
1286 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1292 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1328 MemAccessTy AccessTy;
1334 Immediate MinOffset = Immediate::getFixedMax();
1335 Immediate MaxOffset = Immediate::getFixedMin();
1339 bool AllFixupsOutsideLoop =
true;
1346 bool RigidFormula =
false;
1352 Type *WidestFixupType =
nullptr;
1362 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1364 LSRFixup &getNewFixup() {
1365 Fixups.push_back(LSRFixup());
1369 void pushFixup(LSRFixup &f) {
1371 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1372 MaxOffset =
f.Offset;
1373 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1374 MinOffset =
f.Offset;
1377 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1378 float getNotSelectedProbability(
const SCEV *Reg)
const;
1379 bool InsertFormula(
const Formula &
F,
const Loop &L);
1380 void DeleteFormula(Formula &
F);
1381 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1390 LSRUse::KindType Kind, MemAccessTy AccessTy,
1392 bool HasBaseReg, int64_t Scale,
1396 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1400 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1402 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1404 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1406 [&](
unsigned i,
const SCEV *Reg) {
1407 return i + getSetupCost(Reg, Depth - 1);
1409 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1416void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1422 if (AR->getLoop() != L) {
1429 if (!AR->getLoop()->contains(L)) {
1439 unsigned LoopCost = 1;
1446 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1447 if (Step->getAPInt() ==
F.BaseOffset.getFixedValue())
1450 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1451 if (isa<SCEVConstant>(LoopStep)) {
1452 const SCEV *LoopStart = AR->getStart();
1453 if (!isa<SCEVConstant>(LoopStart) &&
1459 C.AddRecCost += LoopCost;
1463 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1464 if (!Regs.
count(AR->getOperand(1))) {
1465 RateRegister(
F, AR->getOperand(1), Regs);
1477 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1479 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1486void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1489 if (LoserRegs && LoserRegs->
count(Reg)) {
1493 if (Regs.
insert(Reg).second) {
1494 RateRegister(
F, Reg, Regs);
1495 if (LoserRegs && isLoser())
1500void Cost::RateFormula(
const Formula &
F,
1507 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1509 unsigned PrevAddRecCost =
C.AddRecCost;
1510 unsigned PrevNumRegs =
C.NumRegs;
1511 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1512 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1513 if (VisitedRegs.
count(ScaledReg)) {
1517 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1521 for (
const SCEV *BaseReg :
F.BaseRegs) {
1522 if (VisitedRegs.
count(BaseReg)) {
1526 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1532 size_t NumBaseParts =
F.getNumRegs();
1533 if (NumBaseParts > 1)
1538 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1544 for (
const LSRFixup &
Fixup : LU.Fixups) {
1545 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1546 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1550 else if (
Offset.isNonZero())
1556 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1577 if (
C.NumRegs > TTIRegNum) {
1580 if (PrevNumRegs > TTIRegNum)
1581 C.Insns += (
C.NumRegs - PrevNumRegs);
1583 C.Insns += (
C.NumRegs - TTIRegNum);
1596 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1600 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1603 if (LU.Kind != LSRUse::ICmpZero)
1604 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1610 C.Insns = std::numeric_limits<unsigned>::max();
1611 C.NumRegs = std::numeric_limits<unsigned>::max();
1612 C.AddRecCost = std::numeric_limits<unsigned>::max();
1613 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1614 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1615 C.ImmCost = std::numeric_limits<unsigned>::max();
1616 C.SetupCost = std::numeric_limits<unsigned>::max();
1617 C.ScaleCost = std::numeric_limits<unsigned>::max();
1621bool Cost::isLess(
const Cost &
Other)
const {
1623 C.Insns !=
Other.C.Insns)
1624 return C.Insns <
Other.C.Insns;
1628#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1631 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1632 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1633 if (
C.AddRecCost != 0)
1634 OS <<
", with addrec cost " <<
C.AddRecCost;
1635 if (
C.NumIVMuls != 0)
1636 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1637 << (
C.NumIVMuls == 1 ?
"" :
"s");
1638 if (
C.NumBaseAdds != 0)
1639 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1640 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1641 if (
C.ScaleCost != 0)
1642 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1644 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1645 if (
C.SetupCost != 0)
1646 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1655bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1657 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1658 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1659 if (PN->getIncomingValue(i) == OperandValToReplace &&
1660 L->contains(PN->getIncomingBlock(i)))
1665 return !
L->contains(UserInst);
1668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1672 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1674 Store->getOperand(0)->printAsOperand(
OS,
false);
1675 }
else if (UserInst->getType()->isVoidTy())
1676 OS << UserInst->getOpcodeName();
1678 UserInst->printAsOperand(
OS,
false);
1680 OS <<
", OperandValToReplace=";
1681 OperandValToReplace->printAsOperand(
OS,
false);
1683 for (
const Loop *PIL : PostIncLoops) {
1684 OS <<
", PostIncLoop=";
1685 PIL->getHeader()->printAsOperand(
OS,
false);
1699bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1701 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1704 return Uniquifier.
count(Key);
1708float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1710 for (
const Formula &
F : Formulae)
1711 if (
F.referencesReg(Reg))
1713 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1718bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1719 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1721 if (!Formulae.empty() && RigidFormula)
1725 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1729 if (!Uniquifier.
insert(Key).second)
1733 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1734 "Zero allocated in a scaled register!");
1736 for (
const SCEV *BaseReg :
F.BaseRegs)
1737 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1741 Formulae.push_back(
F);
1744 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1752void LSRUse::DeleteFormula(Formula &
F) {
1753 if (&
F != &Formulae.back())
1755 Formulae.pop_back();
1759void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1763 for (
const Formula &
F : Formulae) {
1764 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1765 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1769 for (
const SCEV *S : OldRegs)
1771 RegUses.dropRegister(S, LUIdx);
1774#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1776 OS <<
"LSR Use: Kind=";
1778 case Basic:
OS <<
"Basic";
break;
1779 case Special:
OS <<
"Special";
break;
1780 case ICmpZero:
OS <<
"ICmpZero";
break;
1782 OS <<
"Address of ";
1783 if (AccessTy.MemTy->isPointerTy())
1786 OS << *AccessTy.MemTy;
1789 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1792 OS <<
", Offsets={";
1793 bool NeedComma =
false;
1794 for (
const LSRFixup &
Fixup : Fixups) {
1795 if (NeedComma)
OS <<
',';
1801 if (AllFixupsOutsideLoop)
1802 OS <<
", all-fixups-outside-loop";
1804 if (WidestFixupType)
1805 OS <<
", widest fixup type: " << *WidestFixupType;
1814 LSRUse::KindType Kind, MemAccessTy AccessTy,
1816 bool HasBaseReg, int64_t Scale,
1819 case LSRUse::Address: {
1820 int64_t FixedOffset =
1821 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1822 int64_t ScalableOffset =
1823 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1825 HasBaseReg, Scale, AccessTy.AddrSpace,
1826 Fixup, ScalableOffset);
1828 case LSRUse::ICmpZero:
1835 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1840 if (Scale != 0 && Scale != -1)
1845 if (BaseOffset.isNonZero()) {
1848 if (BaseOffset.isScalable())
1858 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1867 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1869 case LSRUse::Special:
1871 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1878 Immediate MinOffset, Immediate MaxOffset,
1879 LSRUse::KindType Kind, MemAccessTy AccessTy,
1881 bool HasBaseReg, int64_t Scale) {
1882 if (BaseOffset.isNonZero() &&
1883 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1884 BaseOffset.isScalable() != MaxOffset.isScalable()))
1887 int64_t
Base = BaseOffset.getKnownMinValue();
1888 int64_t Min = MinOffset.getKnownMinValue();
1889 int64_t Max = MaxOffset.getKnownMinValue();
1892 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1895 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1898 HasBaseReg, Scale) &&
1904 Immediate MinOffset, Immediate MaxOffset,
1905 LSRUse::KindType Kind, MemAccessTy AccessTy,
1906 const Formula &
F,
const Loop &L) {
1914 assert((
F.isCanonical(L) ||
F.Scale != 0));
1916 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1921 Immediate MaxOffset, LSRUse::KindType Kind,
1923 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1926 BaseOffset, HasBaseReg, Scale) ||
1931 BaseGV, BaseOffset,
true, 0));
1935 Immediate MaxOffset, LSRUse::KindType Kind,
1936 MemAccessTy AccessTy,
const Formula &
F) {
1937 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1938 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1950 const LSRUse &LU,
const Formula &
F) {
1953 for (
const LSRFixup &
Fixup : LU.Fixups)
1955 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1956 F.Scale,
Fixup.UserInst))
1962 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1967 const LSRUse &LU,
const Formula &
F,
1976 return F.Scale != 1;
1979 case LSRUse::Address: {
1981 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1982 if (
F.BaseOffset.isScalable()) {
1983 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1984 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1986 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1987 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1991 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1994 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1997 "Legal addressing mode has an illegal cost!");
1998 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
2000 case LSRUse::ICmpZero:
2002 case LSRUse::Special:
2012 LSRUse::KindType Kind, MemAccessTy AccessTy,
2016 if (BaseOffset.isZero() && !BaseGV)
2021 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2025 if (!HasBaseReg && Scale == 1) {
2035 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2045 Immediate MaxOffset, LSRUse::KindType Kind,
2046 MemAccessTy AccessTy,
const SCEV *S,
2049 if (S->
isZero())
return true;
2057 if (!S->
isZero())
return false;
2060 if (BaseOffset.isZero() && !BaseGV)
2063 if (BaseOffset.isScalable())
2068 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2071 BaseOffset, HasBaseReg, Scale);
2088 const SCEV *IncExpr;
2091 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2098 const SCEV *ExprBase =
nullptr;
2100 IVChain() =
default;
2101 IVChain(
const IVInc &Head,
const SCEV *
Base)
2102 : Incs(1, Head), ExprBase(
Base) {}
2109 return std::next(Incs.
begin());
2116 bool hasIncs()
const {
return Incs.
size() >= 2; }
2125 bool isProfitableIncrement(
const SCEV *OperExpr,
2126 const SCEV *IncExpr,
2151 bool Changed =
false;
2178 RegUseTracker RegUses;
2183 static const unsigned MaxChains = 8;
2194 void OptimizeShadowIV();
2197 void OptimizeLoopTermCond();
2201 void FinalizeChain(IVChain &Chain);
2202 void CollectChains();
2203 void GenerateIVChain(
const IVChain &Chain,
2206 void CollectInterestingTypesAndFactors();
2207 void CollectFixupsAndInitialFormulae();
2213 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2214 LSRUse::KindType Kind, MemAccessTy AccessTy);
2216 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2217 MemAccessTy AccessTy);
2219 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2221 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2223 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2224 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2225 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2226 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2228 void CollectLoopInvariantFixupsAndFormulae();
2230 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2231 unsigned Depth = 0);
2233 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2235 size_t Idx,
bool IsScaledReg =
false);
2236 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2237 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2238 const Formula &
Base,
size_t Idx,
2239 bool IsScaledReg =
false);
2240 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2241 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2242 const Formula &
Base,
2244 size_t Idx,
bool IsScaledReg =
false);
2245 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2246 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2247 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2248 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2249 void GenerateCrossUseConstantOffsets();
2250 void GenerateAllReuseFormulae();
2252 void FilterOutUndesirableDedicatedRegisters();
2254 size_t EstimateSearchSpaceComplexity()
const;
2255 void NarrowSearchSpaceByDetectingSupersets();
2256 void NarrowSearchSpaceByCollapsingUnrolledCode();
2257 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2258 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2259 void NarrowSearchSpaceByFilterPostInc();
2260 void NarrowSearchSpaceByDeletingCostlyFormulas();
2261 void NarrowSearchSpaceByPickingWinnerRegs();
2262 void NarrowSearchSpaceUsingHeuristics();
2267 const Cost &CurCost,
2277 const LSRUse &LU)
const;
2279 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2282 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2285 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2294 bool getChanged()
const {
return Changed; }
2296 return ScalarEvolutionIVs;
2310void LSRInstance::OptimizeShadowIV() {
2312 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2320 Type *DestTy =
nullptr;
2321 bool IsSigned =
false;
2335 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2337 DestTy = UCast->getDestTy();
2339 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2341 DestTy = SCast->getDestTy();
2343 if (!DestTy)
continue;
2363 if (Mantissa == -1)
continue;
2367 unsigned Entry, Latch;
2377 if (!
Init)
continue;
2378 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2379 (
double)
Init->getSExtValue() :
2380 (
double)
Init->getZExtValue());
2384 if (!Incr)
continue;
2385 if (Incr->
getOpcode() != Instruction::Add
2386 && Incr->
getOpcode() != Instruction::Sub)
2402 if (!
C->getValue().isStrictlyPositive())
2410 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2412 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2413 : Instruction::FSub,
2432 if (
U.getUser() ==
Cond) {
2500 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2505 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2506 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2513 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2514 Pred = ICmpInst::ICMP_SLE;
2516 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2517 Pred = ICmpInst::ICMP_SLT;
2519 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2520 Pred = ICmpInst::ICMP_ULT;
2529 if (
Max->getNumOperands() != 2)
2532 const SCEV *MaxLHS =
Max->getOperand(0);
2533 const SCEV *MaxRHS =
Max->getOperand(1);
2538 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2551 "Loop condition operand is an addrec in a different loop!");
2555 Value *NewRHS =
nullptr;
2556 if (ICmpInst::isTrueWhenEqual(Pred)) {
2559 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2560 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2561 NewRHS = BO->getOperand(0);
2563 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2564 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2565 NewRHS = BO->getOperand(0);
2572 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2573 NewRHS = SU->getValue();
2586 Cond->getOperand(0), NewRHS,
"scmp");
2590 Cond->replaceAllUsesWith(NewCond);
2593 Cond->eraseFromParent();
2595 if (
Cmp->use_empty())
2596 Cmp->eraseFromParent();
2602LSRInstance::OptimizeLoopTermCond() {
2619 L->getExitingBlocks(ExitingBlocks);
2627 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2633 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2643 if (!FindIVUserForCond(
Cond, CondUse))
2657 if (!DT.dominates(ExitingBlock, LatchBlock))
2662 if (LatchBlock != ExitingBlock)
2666 if (&*UI != CondUse &&
2667 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2670 const SCEV *
A = IU.getStride(*CondUse, L);
2671 const SCEV *
B = IU.getStride(*UI, L);
2672 if (!
A || !
B)
continue;
2685 if (
C->isOne() ||
C->isMinusOne())
2686 goto decline_post_inc;
2688 if (
C->getValue().getSignificantBits() >= 64 ||
2689 C->getValue().isMinSignedValue())
2690 goto decline_post_inc;
2694 TTI, UI->getUser(), UI->getOperandValToReplace());
2695 int64_t Scale =
C->getSExtValue();
2699 AccessTy.AddrSpace))
2700 goto decline_post_inc;
2705 AccessTy.AddrSpace))
2706 goto decline_post_inc;
2711 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2717 if (
Cond->getNextNonDebugInstruction() != TermBr) {
2718 if (
Cond->hasOneUse()) {
2719 Cond->moveBefore(TermBr);
2723 Cond = cast<ICmpInst>(
Cond->clone());
2724 Cond->setName(
L->getHeader()->getName() +
".termcond");
2746 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2748 IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
2753bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2754 bool HasBaseReg, LSRUse::KindType Kind,
2755 MemAccessTy AccessTy) {
2756 Immediate NewMinOffset = LU.MinOffset;
2757 Immediate NewMaxOffset = LU.MaxOffset;
2758 MemAccessTy NewAccessTy = AccessTy;
2763 if (LU.Kind != Kind)
2769 if (Kind == LSRUse::Address) {
2770 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2771 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2772 AccessTy.AddrSpace);
2777 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2779 LU.MaxOffset - NewOffset, HasBaseReg))
2781 NewMinOffset = NewOffset;
2782 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2784 NewOffset - LU.MinOffset, HasBaseReg))
2786 NewMaxOffset = NewOffset;
2792 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2793 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2797 LU.MinOffset = NewMinOffset;
2798 LU.MaxOffset = NewMaxOffset;
2799 LU.AccessTy = NewAccessTy;
2806std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2807 LSRUse::KindType Kind,
2808 MemAccessTy AccessTy) {
2816 Offset = Immediate::getFixed(0);
2819 std::pair<UseMapTy::iterator, bool>
P =
2823 size_t LUIdx =
P.first->second;
2824 LSRUse &LU =
Uses[LUIdx];
2825 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2827 return std::make_pair(LUIdx,
Offset);
2831 size_t LUIdx =
Uses.size();
2832 P.first->second = LUIdx;
2833 Uses.push_back(LSRUse(Kind, AccessTy));
2834 LSRUse &LU =
Uses[LUIdx];
2838 return std::make_pair(LUIdx,
Offset);
2842void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2843 if (&LU != &
Uses.back())
2848 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2854LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2855 const LSRUse &OrigLU) {
2857 for (LSRUse &LU :
Uses) {
2863 if (&LU != &OrigLU &&
2864 LU.Kind != LSRUse::ICmpZero &&
2865 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2866 LU.WidestFixupType == OrigLU.WidestFixupType &&
2867 LU.HasFormulaWithSameRegs(OrigF)) {
2869 for (
const Formula &
F : LU.Formulae) {
2872 if (
F.BaseRegs == OrigF.BaseRegs &&
2873 F.ScaledReg == OrigF.ScaledReg &&
2874 F.BaseGV == OrigF.BaseGV &&
2875 F.Scale == OrigF.Scale &&
2876 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2877 if (
F.BaseOffset.isZero())
2892void LSRInstance::CollectInterestingTypesAndFactors() {
2898 const SCEV *Expr = IU.getExpr(U);
2916 }
while (!Worklist.
empty());
2921 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2923 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2924 const SCEV *OldStride = *
I;
2925 const SCEV *NewStride = *NewStrideIter;
2936 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2938 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2939 Factors.insert(Factor->getAPInt().getSExtValue());
2944 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2945 Factors.insert(Factor->getAPInt().getSExtValue());
2951 if (
Types.size() == 1)
2963 for(; OI != OE; ++OI) {
2964 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2969 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2981 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2982 return Trunc->getOperand(0);
3004 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
3006 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
3008 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3015 if (SubExpr->getSCEVType() ==
scAddExpr)
3018 if (SubExpr->getSCEVType() !=
scMulExpr)
3024 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3034bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3035 const SCEV *IncExpr,
3043 if (!isa<SCEVConstant>(IncExpr)) {
3045 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3070 if (!Chain.hasIncs())
3073 if (!
Users.empty()) {
3074 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3076 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3079 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3087 if (isa<PHINode>(Chain.tailUserInst())
3088 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3091 const SCEV *LastIncExpr =
nullptr;
3092 unsigned NumConstIncrements = 0;
3093 unsigned NumVarIncrements = 0;
3094 unsigned NumReusedIncrements = 0;
3099 for (
const IVInc &Inc : Chain) {
3102 if (Inc.IncExpr->isZero())
3107 if (isa<SCEVConstant>(Inc.IncExpr)) {
3108 ++NumConstIncrements;
3112 if (Inc.IncExpr == LastIncExpr)
3113 ++NumReusedIncrements;
3117 LastIncExpr = Inc.IncExpr;
3122 if (NumConstIncrements > 1)
3129 cost += NumVarIncrements;
3133 cost -= NumReusedIncrements;
3135 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3152 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3153 const SCEV *LastIncExpr =
nullptr;
3154 for (; ChainIdx < NChains; ++ChainIdx) {
3155 IVChain &Chain = IVChainVec[ChainIdx];
3169 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3175 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3178 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3179 LastIncExpr = IncExpr;
3185 if (ChainIdx == NChains) {
3186 if (isa<PHINode>(UserInst))
3192 LastIncExpr = OperExpr;
3196 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3199 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3201 ChainUsersVec.
resize(NChains);
3202 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3203 <<
") IV=" << *LastIncExpr <<
"\n");
3205 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3206 <<
") IV+" << *LastIncExpr <<
"\n");
3208 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3210 IVChain &Chain = IVChainVec[ChainIdx];
3214 if (!LastIncExpr->
isZero()) {
3215 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3231 IVChain::const_iterator IncIter = Chain.Incs.begin();
3232 IVChain::const_iterator IncEnd = Chain.Incs.end();
3233 for( ; IncIter != IncEnd; ++IncIter) {
3234 if (IncIter->UserInst == OtherUse)
3237 if (IncIter != IncEnd)
3241 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3242 && IU.isIVUserOrOperand(OtherUse)) {
3245 NearUsers.
insert(OtherUse);
3250 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3275void LSRInstance::CollectChains() {
3281 for (
DomTreeNode *Rung = DT.getNode(
L->getLoopLatch());
3282 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3291 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3301 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3302 ChainIdx < NChains; ++ChainIdx) {
3303 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3309 while (IVOpIter != IVOpEnd) {
3310 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3311 if (UniqueOperands.
insert(IVOpInst).second)
3312 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3313 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3318 for (
PHINode &PN :
L->getHeader()->phis()) {
3323 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3325 ChainInstruction(&PN, IncV, ChainUsersVec);
3328 unsigned ChainIdx = 0;
3329 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3330 UsersIdx < NChains; ++UsersIdx) {
3332 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3335 if (ChainIdx != UsersIdx)
3336 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3337 FinalizeChain(IVChainVec[ChainIdx]);
3340 IVChainVec.resize(ChainIdx);
3343void LSRInstance::FinalizeChain(IVChain &Chain) {
3344 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3345 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3347 for (
const IVInc &Inc : Chain) {
3349 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3350 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3351 IVIncSet.insert(UseI);
3358 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3359 Immediate IncOffset = Immediate::getZero();
3366 auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
3367 if (!IncVScale || IncVScale->getNumOperands() != 2 ||
3368 !isa<SCEVVScale>(IncVScale->getOperand(1)))
3370 auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
3371 if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
3373 IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
3389void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3393 const IVInc &Head = Chain.Incs[0];
3398 Value *IVSrc =
nullptr;
3399 while (IVOpIter != IVOpEnd) {
3410 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3411 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3414 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3416 if (IVOpIter == IVOpEnd) {
3418 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3421 assert(IVSrc &&
"Failed to find IV chain source");
3426 const SCEV *LeftOverExpr =
nullptr;
3431 for (
const IVInc &Inc : Chain) {
3433 if (isa<PHINode>(InsertPt))
3434 InsertPt =
L->getLoopLatch()->getTerminator();
3438 Value *IVOper = IVSrc;
3439 if (!Inc.IncExpr->isZero()) {
3444 LeftOverExpr = LeftOverExpr ?
3445 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3449 bool FoundBase =
false;
3450 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3453 if (!Remainder->
isZero()) {
3455 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3456 const SCEV *IVOperExpr =
3458 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3467 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3470 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3473 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3477 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3480 LeftOverExpr =
nullptr;
3483 Type *OperTy = Inc.IVOperand->getType();
3484 if (IVTy != OperTy) {
3486 "cannot extend a chained IV");
3488 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3490 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3491 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3496 if (isa<PHINode>(Chain.tailUserInst())) {
3497 for (
PHINode &Phi :
L->getHeader()->phis()) {
3501 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3504 Value *IVOper = IVSrc;
3506 if (IVTy != PostIncTy) {
3508 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3509 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3510 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3512 Phi.replaceUsesOfWith(PostIncV, IVOper);
3518void LSRInstance::CollectFixupsAndInitialFormulae() {
3520 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3532 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3533 if (IVIncSet.count(UseI)) {
3534 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3538 LSRUse::KindType
Kind = LSRUse::Basic;
3539 MemAccessTy AccessTy;
3541 Kind = LSRUse::Address;
3545 const SCEV *S = IU.getExpr(U);
3556 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3559 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3561 if (CI->isEquality()) {
3564 Value *
NV = CI->getOperand(1);
3565 if (NV ==
U.getOperandValToReplace()) {
3566 CI->setOperand(1, CI->getOperand(0));
3567 CI->setOperand(0, NV);
3568 NV = CI->getOperand(1);
3575 (!
NV->getType()->isPointerTy() ||
3582 Kind = LSRUse::ICmpZero;
3584 }
else if (
L->isLoopInvariant(NV) &&
3585 (!isa<Instruction>(NV) ||
3586 DT.dominates(cast<Instruction>(NV),
L->getHeader())) &&
3587 !
NV->getType()->isPointerTy()) {
3598 Kind = LSRUse::ICmpZero;
3600 assert(!isa<SCEVCouldNotCompute>(S));
3605 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3606 if (Factors[i] != -1)
3607 Factors.insert(-(
uint64_t)Factors[i]);
3613 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3614 size_t LUIdx =
P.first;
3616 LSRUse &LU =
Uses[LUIdx];
3619 LSRFixup &LF = LU.getNewFixup();
3620 LF.UserInst = UserInst;
3621 LF.OperandValToReplace =
U.getOperandValToReplace();
3622 LF.PostIncLoops = TmpPostIncLoops;
3624 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3627 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3629 F.initialMatch(S, L, SE);
3630 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU);
3631 VisitedLSRUse.
insert(LUIdx);
3634 if (!LU.WidestFixupType ||
3637 LU.WidestFixupType = LF.OperandValToReplace->getType();
3640 if (LU.Formulae.empty()) {
3641 InsertInitialFormula(S, LU, LUIdx);
3642 CountRegisters(LU.Formulae.back(), LUIdx);
3651void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3655 LU.RigidFormula =
true;
3658 F.initialMatch(S, L, SE);
3659 bool Inserted = InsertFormula(LU, LUIdx,
F);
3660 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3666LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3667 LSRUse &LU,
size_t LUIdx) {
3669 F.BaseRegs.push_back(S);
3670 F.HasBaseReg =
true;
3671 bool Inserted = InsertFormula(LU, LUIdx,
F);
3672 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3676void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3678 RegUses.countRegister(
F.ScaledReg, LUIdx);
3679 for (
const SCEV *BaseReg :
F.BaseRegs)
3680 RegUses.countRegister(BaseReg, LUIdx);
3685bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3688 "Formula is illegal");
3690 if (!LU.InsertFormula(
F, *L))
3693 CountRegisters(
F, LUIdx);
3703LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3712 while (!Worklist.
empty()) {
3716 if (!Visited.
insert(S).second)
3723 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3726 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3727 const Value *
V = US->getValue();
3728 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3730 if (
L->contains(Inst))
continue;
3731 }
else if (isa<Constant>(V))
3734 for (
const Use &U :
V->uses()) {
3735 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3744 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3747 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3749 cast<PHINode>(UserInst)->getIncomingBlock(
3751 if (!DT.dominates(
L->getHeader(), UseBB))
3764 if (isa<PHINode>(UserInst)) {
3765 const auto *PhiNode = cast<PHINode>(UserInst);
3766 bool HasIncompatibleEHPTerminatedBlock =
false;
3768 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3769 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3770 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3771 HasIncompatibleEHPTerminatedBlock =
true;
3776 if (HasIncompatibleEHPTerminatedBlock) {
3782 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3789 if (!isa<SCEVUnknown>(UserS))
3798 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3799 unsigned OtherIdx = !
U.getOperandNo();
3800 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3805 std::pair<size_t, Immediate>
P =
3806 getUse(S, LSRUse::Basic, MemAccessTy());
3807 size_t LUIdx =
P.first;
3809 LSRUse &LU =
Uses[LUIdx];
3810 LSRFixup &LF = LU.getNewFixup();
3811 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3812 LF.OperandValToReplace =
U;
3814 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3815 if (!LU.WidestFixupType ||
3818 LU.WidestFixupType = LF.OperandValToReplace->getType();
3819 InsertSupplementalFormula(US, LU, LUIdx);
3820 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3836 unsigned Depth = 0) {
3843 for (
const SCEV *S :
Add->operands()) {
3849 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3858 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3860 Remainder =
nullptr;
3878 const SCEV *Remainder =
3891 LSRUse &LU,
const SCEV *S,
const Loop *L,
3893 if (LU.Kind != LSRUse::Address ||
3894 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3900 if (!isa<SCEVConstant>(LoopStep))
3906 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3913void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3914 const Formula &
Base,
3917 const SCEV *BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[
Idx];
3929 if (AddOps.
size() == 1)
3943 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3949 InnerAddOps.append(std::next(J),
3954 if (InnerAddOps.size() == 1 &&
3956 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3964 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3968 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3973 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3976 F.ScaledReg =
nullptr;
3978 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3979 }
else if (IsScaledReg)
3980 F.ScaledReg = InnerSum;
3982 F.BaseRegs[
Idx] = InnerSum;
3988 SC->getValue()->getZExtValue()))
3990 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3991 SC->getValue()->getZExtValue());
3993 F.BaseRegs.push_back(*J);
3998 if (InsertFormula(LU, LUIdx,
F))
4005 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4011void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4013 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4018 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4019 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4021 if (
Base.Scale == 1)
4022 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4028void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4031 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4032 (
Base.UnfoldedOffset.isNonZero()) <=
4040 Formula NewBase =
Base;
4041 NewBase.BaseRegs.clear();
4042 Type *CombinedIntegerType =
nullptr;
4043 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4046 if (!CombinedIntegerType)
4051 NewBase.BaseRegs.push_back(BaseReg);
4055 if (Ops.
size() == 0)
4060 auto GenerateFormula = [&](
const SCEV *Sum) {
4061 Formula
F = NewBase;
4069 F.BaseRegs.push_back(Sum);
4071 (void)InsertFormula(LU, LUIdx,
F);
4075 if (Ops.
size() > 1) {
4082 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4083 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4085 NewBase.UnfoldedOffset.getFixedValue(),
true));
4086 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4092void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4093 const Formula &
Base,
size_t Idx,
4097 if (
G->isZero() || !GV)
4101 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4106 F.BaseRegs[
Idx] =
G;
4107 (void)InsertFormula(LU, LUIdx,
F);
4111void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4114 if (
Base.BaseGV)
return;
4116 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4117 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4118 if (
Base.Scale == 1)
4119 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4124void LSRInstance::GenerateConstantOffsetsImpl(
4125 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4128 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4130 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4132 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4134 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4136 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4142 F.ScaledReg =
nullptr;
4144 F.deleteBaseReg(
F.BaseRegs[
Idx]);
4146 }
else if (IsScaledReg)
4149 F.BaseRegs[
Idx] = NewG;
4151 (void)InsertFormula(LU, LUIdx,
F);
4166 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
4168 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
4169 const APInt &StepInt = StepRec->getAPInt();
4173 for (Immediate
Offset : Worklist) {
4175 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4182 for (Immediate
Offset : Worklist)
4186 if (
G->isZero() ||
Imm.isZero() ||
4187 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4190 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4191 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4196 F.BaseRegs[
Idx] =
G;
4201 (void)InsertFormula(LU, LUIdx,
F);
4205void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4211 if (LU.MaxOffset != LU.MinOffset)
4214 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4215 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4216 if (
Base.Scale == 1)
4217 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4223void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4225 if (LU.Kind != LSRUse::ICmpZero)
return;
4233 if (LU.MinOffset != LU.MaxOffset)
return;
4236 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4238 for (
const SCEV *BaseReg :
Base.BaseRegs)
4241 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4244 for (int64_t Factor : Factors) {
4249 if (
Base.BaseOffset.isMin() && Factor == -1)
4252 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4254 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4255 assert(Factor != 0 &&
"Zero factor not expected!");
4256 if (NewBaseOffset.getFixedValue() / Factor !=
4257 Base.BaseOffset.getFixedValue())
4265 Immediate
Offset = LU.MinOffset;
4266 if (
Offset.isMin() && Factor == -1)
4269 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4277 F.BaseOffset = NewBaseOffset;
4284 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4289 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4303 if (
F.UnfoldedOffset.isNonZero()) {
4304 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4306 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4307 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4308 Base.UnfoldedOffset.getFixedValue())
4312 IntTy,
F.UnfoldedOffset.getFixedValue()))
4317 (void)InsertFormula(LU, LUIdx,
F);
4324void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4331 if (
Base.Scale != 0 && !
Base.unscale())
4334 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4337 for (int64_t Factor : Factors) {
4338 Base.Scale = Factor;
4339 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4341 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4345 if (LU.Kind == LSRUse::Basic &&
4346 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4347 LU.AccessTy,
Base) &&
4348 LU.AllFixupsOutsideLoop)
4349 LU.Kind = LSRUse::Special;
4355 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4356 Base.BaseOffset.isZero() && !
Base.BaseGV)
4359 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4361 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4368 if (!Quotient->isZero()) {
4371 F.ScaledReg = Quotient;
4372 F.deleteBaseReg(
F.BaseRegs[i]);
4376 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4377 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4381 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4383 (void)InsertFormula(LU, LUIdx,
F);
4399 const SCEV *Result =
nullptr;
4400 for (
auto &L :
Loops) {
4404 if (!New || (Result && New != Result))
4409 assert(Result &&
"failed to create expression");
4414void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4416 if (
Base.BaseGV)
return;
4426 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4429 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4433 for (
auto &LF : LU.Fixups)
4434 Loops.push_back(LF.PostIncLoops);
4436 for (
Type *SrcTy : Types) {
4445 const SCEV *NewScaledReg =
4447 if (!NewScaledReg || NewScaledReg->
isZero())
4449 F.ScaledReg = NewScaledReg;
4451 bool HasZeroBaseReg =
false;
4452 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4453 const SCEV *NewBaseReg =
4455 if (!NewBaseReg || NewBaseReg->
isZero()) {
4456 HasZeroBaseReg =
true;
4459 BaseReg = NewBaseReg;
4466 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4470 (void)InsertFormula(LU, LUIdx,
F);
4483 const SCEV *OrigReg;
4486 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4494#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4496 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4497 <<
" , add offset " <<
Imm;
4507void LSRInstance::GenerateCrossUseConstantOffsets() {
4509 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4514 for (
const SCEV *
Use : RegUses) {
4517 auto Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
4520 Pair.first->second.insert(std::make_pair(Imm,
Use));
4521 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4530 for (
const SCEV *Reg : Sequence) {
4531 const ImmMapTy &Imms =
Map.find(Reg)->second;
4534 if (Imms.size() == 1)
4537 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4538 for (
const auto &Entry
4540 <<
' ' <<
Entry.first;
4544 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4546 const SCEV *OrigReg = J->second;
4548 Immediate JImm = J->first;
4549 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4551 if (!isa<SCEVConstant>(OrigReg) &&
4552 UsedByIndicesMap[Reg].
count() == 1) {
4560 Immediate
First = Imms.begin()->first;
4561 Immediate
Last = std::prev(Imms.end())->first;
4562 if (!
First.isCompatibleImmediate(
Last)) {
4569 bool Scalable =
First.isScalable() ||
Last.isScalable();
4570 int64_t FI =
First.getKnownMinValue();
4571 int64_t LI =
Last.getKnownMinValue();
4574 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4577 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4578 ImmMapTy::const_iterator OtherImms[] = {
4579 Imms.begin(), std::prev(Imms.end()),
4580 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4581 for (
const auto &M : OtherImms) {
4582 if (M == J || M == JE)
continue;
4583 if (!JImm.isCompatibleImmediate(
M->first))
4587 Immediate
Imm = JImm.subUnsigned(
M->first);
4588 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4590 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4598 UsedByIndicesMap.
clear();
4599 UniqueItems.
clear();
4602 for (
const WorkItem &WI : WorkItems) {
4603 size_t LUIdx = WI.LUIdx;
4604 LSRUse &LU =
Uses[LUIdx];
4605 Immediate
Imm = WI.Imm;
4606 const SCEV *OrigReg = WI.OrigReg;
4609 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4613 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4614 Formula
F = LU.Formulae[
L];
4621 if (
F.ScaledReg == OrigReg) {
4622 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4624 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4626 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4627 if (
F.referencesReg(S))
4630 NewF.BaseOffset =
Offset;
4631 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4634 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4639 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4643 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4645 if (
C->getValue()->isNegative() !=
4646 (NewF.BaseOffset.isLessThanZero()) &&
4648 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4653 NewF.canonicalize(*this->L);
4654 (void)InsertFormula(LU, LUIdx, NewF);
4657 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4658 const SCEV *BaseReg =
F.BaseRegs[
N];
4659 if (BaseReg != OrigReg)
4662 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4663 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4664 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4666 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4668 LU.Kind, LU.AccessTy, NewF)) {
4672 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4676 NewF.UnfoldedOffset = NewUnfoldedOffset;
4678 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4683 for (
const SCEV *NewReg : NewF.BaseRegs)
4684 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewReg)) {
4685 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4687 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4689 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4690 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4692 (
unsigned)llvm::countr_zero<uint64_t>(
4693 NewF.BaseOffset.getFixedValue()))
4698 NewF.canonicalize(*this->L);
4699 (void)InsertFormula(LU, LUIdx, NewF);
4710LSRInstance::GenerateAllReuseFormulae() {
4713 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4714 LSRUse &LU =
Uses[LUIdx];
4715 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4716 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4717 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4718 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4720 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4721 LSRUse &LU =
Uses[LUIdx];
4722 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4723 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4724 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4725 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4726 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4727 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4728 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4729 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4731 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4732 LSRUse &LU =
Uses[LUIdx];
4733 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4734 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4737 GenerateCrossUseConstantOffsets();
4740 "After generating reuse formulae:\n";
4741 print_uses(
dbgs()));
4746void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4751 bool ChangedFormulae =
false;
4756 using BestFormulaeTy =
4759 BestFormulaeTy BestFormulae;
4761 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4762 LSRUse &LU =
Uses[LUIdx];
4767 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4768 FIdx != NumForms; ++FIdx) {
4769 Formula &
F = LU.Formulae[FIdx];
4780 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4781 if (CostF.isLoser()) {
4793 for (
const SCEV *Reg :
F.BaseRegs) {
4794 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4798 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4799 Key.push_back(
F.ScaledReg);
4804 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4805 BestFormulae.insert(std::make_pair(Key, FIdx));
4809 Formula &Best = LU.Formulae[
P.first->second];
4811 Cost CostBest(L, SE,
TTI, AMK);
4813 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4814 if (CostF.isLess(CostBest))
4818 " in favor of formula ";
4819 Best.print(
dbgs());
dbgs() <<
'\n');
4822 ChangedFormulae =
true;
4824 LU.DeleteFormula(
F);
4832 LU.RecomputeRegs(LUIdx, RegUses);
4835 BestFormulae.clear();
4840 "After filtering out undesirable candidates:\n";
4848size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4850 for (
const LSRUse &LU :
Uses) {
4851 size_t FSize = LU.Formulae.size();
4866void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4870 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4871 "which use a superset of registers used by other "
4874 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4875 LSRUse &LU =
Uses[LUIdx];
4877 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4878 Formula &
F = LU.Formulae[i];
4879 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4885 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4891 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4892 (
uint64_t)
C->getValue()->getSExtValue());
4893 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4894 (
I -
F.BaseRegs.begin()));
4895 if (LU.HasFormulaWithSameRegs(NewF)) {
4898 LU.DeleteFormula(
F);
4904 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4905 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4909 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4910 (
I -
F.BaseRegs.begin()));
4911 if (LU.HasFormulaWithSameRegs(NewF)) {
4914 LU.DeleteFormula(
F);
4925 LU.RecomputeRegs(LUIdx, RegUses);
4934void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4939 dbgs() <<
"The search space is too complex.\n"
4940 "Narrowing the search space by assuming that uses separated "
4941 "by a constant offset will use the same registers.\n");
4945 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4946 LSRUse &LU =
Uses[LUIdx];
4947 for (
const Formula &
F : LU.Formulae) {
4948 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4951 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4955 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4956 LU.Kind, LU.AccessTy))
4961 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4964 for (LSRFixup &
Fixup : LU.Fixups) {
4965 Fixup.Offset +=
F.BaseOffset;
4966 LUThatHas->pushFixup(
Fixup);
4972 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4973 Formula &
F = LUThatHas->Formulae[i];
4974 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4975 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4977 LUThatHas->DeleteFormula(
F);
4985 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4988 DeleteUse(LU, LUIdx);
5001void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5005 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5006 "undesirable dedicated registers.\n");
5008 FilterOutUndesirableDedicatedRegisters();
5023void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5028 dbgs() <<
"The search space is too complex.\n"
5029 "Narrowing the search space by choosing the best Formula "
5030 "from the Formulae with the same Scale and ScaledReg.\n");
5035 BestFormulaeTy BestFormulae;
5037 bool ChangedFormulae =
false;
5042 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5043 LSRUse &LU =
Uses[LUIdx];
5048 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5053 size_t FARegNum = 0;
5054 for (
const SCEV *Reg : FA.BaseRegs) {
5055 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5056 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5058 size_t FBRegNum = 0;
5059 for (
const SCEV *Reg : FB.BaseRegs) {
5060 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5061 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5063 if (FARegNum != FBRegNum)
5064 return FARegNum < FBRegNum;
5071 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
5073 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
5074 return CostFA.isLess(CostFB);
5078 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5080 Formula &
F = LU.Formulae[FIdx];
5083 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5087 Formula &Best = LU.Formulae[
P.first->second];
5088 if (IsBetterThan(
F, Best))
5092 " in favor of formula ";
5093 Best.print(
dbgs());
dbgs() <<
'\n');
5095 ChangedFormulae =
true;
5097 LU.DeleteFormula(
F);
5103 LU.RecomputeRegs(LUIdx, RegUses);
5106 BestFormulae.clear();
5111 "After filtering out undesirable candidates:\n";
5118void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5125 "Narrowing the search space by choosing the lowest "
5126 "register Formula for PostInc Uses.\n");
5128 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5129 LSRUse &LU =
Uses[LUIdx];
5131 if (LU.Kind != LSRUse::Address)
5137 size_t MinRegs = std::numeric_limits<size_t>::max();
5138 for (
const Formula &
F : LU.Formulae)
5139 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5142 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5144 Formula &
F = LU.Formulae[FIdx];
5145 if (
F.getNumRegs() > MinRegs) {
5148 LU.DeleteFormula(
F);
5155 LU.RecomputeRegs(LUIdx, RegUses);
5206void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5219 DenseMap <const SCEV *, float> RegNumMap;
5220 for (
const SCEV *Reg : RegUses) {
5221 if (UniqRegs.
count(Reg))
5224 for (
const LSRUse &LU :
Uses) {
5225 if (!LU.Regs.count(Reg))
5227 float P = LU.getNotSelectedProbability(Reg);
5233 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
5237 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5240 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5241 LSRUse &LU =
Uses[LUIdx];
5243 if (LU.Formulae.size() < 2)
5248 float FMinRegNum = LU.Formulae[0].getNumRegs();
5249 float FMinARegNum = LU.Formulae[0].getNumRegs();
5251 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5252 Formula &
F = LU.Formulae[i];
5255 for (
const SCEV *BaseReg :
F.BaseRegs) {
5256 if (UniqRegs.
count(BaseReg))
5258 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5259 if (isa<SCEVAddRecExpr>(BaseReg))
5261 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5263 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5264 if (!UniqRegs.
count(ScaledReg)) {
5266 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5267 if (isa<SCEVAddRecExpr>(ScaledReg))
5269 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5272 if (FMinRegNum > FRegNum ||
5273 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5274 FMinRegNum = FRegNum;
5275 FMinARegNum = FARegNum;
5280 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5282 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5283 while (LU.Formulae.size() != 1) {
5286 LU.Formulae.pop_back();
5288 LU.RecomputeRegs(LUIdx, RegUses);
5289 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5290 Formula &
F = LU.Formulae[0];
5293 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
5306 MemAccessTy AccessType) {
5307 if (Best->
getType() != Reg->getType() ||
5308 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5309 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5310 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5312 const auto *Diff = dyn_cast<SCEVConstant>(SE.
getMinusSCEV(Best, Reg));
5317 AccessType.MemTy,
nullptr,
5318 Diff->getAPInt().getSExtValue(),
5319 true, 0, AccessType.AddrSpace) &&
5321 AccessType.MemTy,
nullptr,
5322 -Diff->getAPInt().getSExtValue(),
5323 true, 0, AccessType.AddrSpace);
5329void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5340 const SCEV *Best =
nullptr;
5341 unsigned BestNum = 0;
5342 for (
const SCEV *Reg : RegUses) {
5343 if (Taken.
count(Reg))
5347 BestNum = RegUses.getUsedByIndices(Reg).count();
5349 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5350 if (Count > BestNum) {
5358 if (Count == BestNum) {
5359 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5360 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5362 Uses[LUIdx].AccessTy)) {
5369 assert(Best &&
"Failed to find best LSRUse candidate");
5371 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5372 <<
" will yield profitable reuse.\n");
5377 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5378 LSRUse &LU =
Uses[LUIdx];
5379 if (!LU.Regs.count(Best))
continue;
5382 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5383 Formula &
F = LU.Formulae[i];
5384 if (!
F.referencesReg(Best)) {
5386 LU.DeleteFormula(
F);
5390 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5396 LU.RecomputeRegs(LUIdx, RegUses);
5407void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5408 NarrowSearchSpaceByDetectingSupersets();
5409 NarrowSearchSpaceByCollapsingUnrolledCode();
5410 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5412 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5413 NarrowSearchSpaceByFilterPostInc();
5415 NarrowSearchSpaceByDeletingCostlyFormulas();
5417 NarrowSearchSpaceByPickingWinnerRegs();
5424 const Cost &CurCost,
5437 const LSRUse &LU =
Uses[Workspace.
size()];
5444 for (
const SCEV *S : CurRegs)
5445 if (LU.Regs.count(S))
5449 Cost NewCost(L, SE,
TTI, AMK);
5450 for (
const Formula &
F : LU.Formulae) {
5458 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5459 for (
const SCEV *Reg : ReqRegs) {
5460 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5463 if (NumReqRegsToFind == 0)
5467 if (NumReqRegsToFind != 0) {
5478 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5479 if (NewCost.isLess(SolutionCost)) {
5481 if (Workspace.
size() !=
Uses.size()) {
5482 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5483 NewRegs, VisitedRegs);
5484 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5485 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5488 dbgs() <<
".\nRegs:\n";
5489 for (
const SCEV *S : NewRegs)
dbgs()
5490 <<
"- " << *S <<
"\n";
5493 SolutionCost = NewCost;
5494 Solution = Workspace;
5505 Cost SolutionCost(L, SE,
TTI, AMK);
5506 SolutionCost.Lose();
5507 Cost CurCost(L, SE,
TTI, AMK);
5513 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5514 CurRegs, VisitedRegs);
5515 if (Solution.
empty()) {
5522 "The chosen solution requires ";
5523 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5524 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5529 Solution[i]->print(
dbgs());
5535 const bool EnableDropUnprofitableSolution = [&] {
5547 if (BaselineCost.isLess(SolutionCost)) {
5548 if (!EnableDropUnprofitableSolution)
5550 dbgs() <<
"Baseline is more profitable than chosen solution, "
5551 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5554 "solution, dropping LSR solution.\n";);
5569 bool AllDominate =
true;
5573 if (isa<CatchSwitchInst>(Tentative))
5577 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5578 AllDominate =
false;
5583 if (Tentative->
getParent() == Inst->getParent() &&
5584 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5594 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5595 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5598 for (
DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
5599 if (!Rung)
return IP;
5600 Rung = Rung->getIDom();
5601 if (!Rung)
return IP;
5602 IDom = Rung->getBlock();
5605 const Loop *IDomLoop = LI.getLoopFor(IDom);
5606 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5607 if (IDomDepth <= IPLoopDepth &&
5608 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5626 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5628 if (LU.Kind == LSRUse::ICmpZero)
5630 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5632 if (LF.PostIncLoops.count(L)) {
5633 if (LF.isUseFullyOutsideLoop(L))
5634 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5640 for (
const Loop *PIL : LF.PostIncLoops) {
5641 if (PIL == L)
continue;
5646 if (!ExitingBlocks.
empty()) {
5648 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5649 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
5654 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5655 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5656 "Insertion point must be a normal instruction");
5663 while (isa<PHINode>(IP)) ++IP;
5666 while (IP->isEHPad()) ++IP;
5669 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5674 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5682Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5685 if (LU.RigidFormula)
5686 return LF.OperandValToReplace;
5690 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5695 Rewriter.setPostInc(LF.PostIncLoops);
5698 Type *OpTy = LF.OperandValToReplace->getType();
5700 Type *Ty =
F.getType();
5714 for (
const SCEV *Reg :
F.BaseRegs) {
5715 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5723 Value *ICmpScaledV =
nullptr;
5725 const SCEV *ScaledS =
F.ScaledReg;
5731 if (LU.Kind == LSRUse::ICmpZero) {
5741 "The only scale supported by ICmpZero uses is -1!");
5742 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5750 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5786 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5787 "Expanding mismatched offsets\n");
5789 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5790 if (
Offset.isNonZero()) {
5791 if (LU.Kind == LSRUse::ICmpZero) {
5799 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5809 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5810 if (UnfoldedOffset.isNonZero()) {
5812 Ops.
push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5827 if (LU.Kind == LSRUse::ICmpZero) {
5828 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5829 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5831 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5832 "a scale at the same time!");
5833 if (
F.Scale == -1) {
5834 if (ICmpScaledV->
getType() != OpTy) {
5844 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5845 "ICmp does not support folding a global value and "
5846 "a scale at the same time!");
5849 if (
C->getType() != OpTy) {
5853 assert(
C &&
"Cast of ConstantInt should have folded");
5866void LSRInstance::RewriteForPHI(
5867 PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
5879 bool needUpdateFixups =
false;
5890 Loop *PNLoop = LI.getLoopFor(Parent);
5891 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5898 .setMergeIdenticalEdges()
5899 .setKeepOneInputPHIs());
5913 if (
L->contains(BB) && !
L->contains(PN))
5921 needUpdateFixups =
true;
5926 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5927 Inserted.insert(std::make_pair(BB,
static_cast<Value *
>(
nullptr)));
5935 Type *OpTy = LF.OperandValToReplace->getType();
5939 LF.OperandValToReplace->getType(),
"tmp",
5945 if (
auto *
I = dyn_cast<Instruction>(FullV))
5946 if (
L->contains(
I) && !
L->contains(BB))
5950 Pair.first->second = FullV;
5957 if (needUpdateFixups) {
5958 for (LSRUse &LU :
Uses)
5959 for (LSRFixup &
Fixup : LU.Fixups)
5963 if (
Fixup.UserInst == PN) {
5966 bool foundInOriginalPHI =
false;
5968 if (val ==
Fixup.OperandValToReplace) {
5969 foundInOriginalPHI =
true;
5974 if (foundInOriginalPHI)
5985 if (val ==
Fixup.OperandValToReplace)
5986 Fixup.UserInst = NewPN;
5998void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
6003 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
6004 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6006 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
6009 Type *OpTy = LF.OperandValToReplace->getType();
6010 if (FullV->
getType() != OpTy) {
6013 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
6022 if (LU.Kind == LSRUse::ICmpZero)
6025 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
6028 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
6038 if (LU.Kind != LSRUse::Address)
6045 if (IVIncInsertPos->
getParent() == LHeader)
6048 if (!
Fixup.OperandValToReplace ||
6050 Instruction *UI = cast<Instruction>(U);
6051 return UI->getParent() != LHeader;
6056 Type *Ty =
I->getType();
6064void LSRInstance::ImplementSolution(
6071 for (
const IVChain &Chain : IVChainVec) {
6072 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
6077 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6078 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6081 ?
L->getHeader()->getTerminator()
6083 Rewriter.setIVIncInsertPos(L, InsertPos);
6084 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6088 for (
const IVChain &Chain : IVChainVec) {
6089 GenerateIVChain(Chain, DeadInsts);
6095 ScalarEvolutionIVs.push_back(
IV);
6111 for (
PHINode &PN :
L->getHeader()->phis()) {
6113 Value *Start =
nullptr, *Step =
nullptr;
6118 case Instruction::Sub:
6123 case Instruction::Add:
6129 if (!isa<Constant>(Step))
6134 if (BO->
getParent() == IVIncInsertPos->getParent())
6140 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6153 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6156 :
TTI.getPreferredAddressingMode(
L, &SE)),
6158 BaselineCost(
L, SE,
TTI, AMK) {
6160 if (!
L->isLoopSimplifyForm())
6164 if (IU.
empty())
return;
6168 unsigned NumUsers = 0;
6172 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6179 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
6180 auto *FirstNonPHI = PN->
getParent()->getFirstNonPHI();
6181 if (isa<FuncletPadInst>(FirstNonPHI) ||
6182 isa<CatchSwitchInst>(FirstNonPHI))
6184 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
6190 L->getHeader()->printAsOperand(
dbgs(),
false);
6203 OptimizeLoopTermCond();
6206 if (IU.empty())
return;
6209 if (!
L->isInnermost()) {
6222 CollectInterestingTypesAndFactors();
6223 CollectFixupsAndInitialFormulae();
6224 CollectLoopInvariantFixupsAndFormulae();
6230 print_uses(
dbgs()));
6232 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6236 GenerateAllReuseFormulae();
6238 FilterOutUndesirableDedicatedRegisters();
6239 NarrowSearchSpaceUsingHeuristics();
6249 if (Solution.
empty())
6254 for (
const LSRUse &LU :
Uses) {
6255 for (
const Formula &
F : LU.Formulae)
6257 F) &&
"Illegal formula generated!");
6262 ImplementSolution(Solution);
6265#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6266void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
6267 if (Factors.empty() &&
Types.empty())
return;
6269 OS <<
"LSR has identified the following interesting factors and types: ";
6272 for (int64_t Factor : Factors) {
6275 OS <<
'*' << Factor;
6278 for (
Type *Ty : Types) {
6281 OS <<
'(' << *Ty <<
')';
6287 OS <<
"LSR is examining the following fixup sites:\n";
6288 for (
const LSRUse &LU :
Uses)
6289 for (
const LSRFixup &LF : LU.Fixups) {
6297 OS <<
"LSR is examining the following uses:\n";
6298 for (
const LSRUse &LU :
Uses) {
6302 for (
const Formula &
F : LU.Formulae) {
6311 print_factors_and_types(
OS);
6323class LoopStrengthReduce :
public LoopPass {
6327 LoopStrengthReduce();
6336LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6340void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6372 return {Begin,
End};
6375struct SCEVDbgValueBuilder {
6376 SCEVDbgValueBuilder() =
default;
6377 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6379 void clone(
const SCEVDbgValueBuilder &
Base) {
6380 LocationOps =
Base.LocationOps;
6385 LocationOps.clear();
6402 unsigned ArgIndex = 0;
6403 if (It != LocationOps.
end()) {
6404 ArgIndex = std::distance(LocationOps.
begin(), It);
6406 ArgIndex = LocationOps.
size();
6418 if (
C->getAPInt().getSignificantBits() > 64)
6420 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6421 Expr.
push_back(
C->getAPInt().getSExtValue());
6428 return ToDwarfOpIter(Expr);
6435 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6436 "Expected arithmetic SCEV type");
6438 unsigned EmitOperator = 0;
6439 for (
const auto &
Op : CommExpr->
operands()) {
6442 if (EmitOperator >= 1)
6443 pushOperator(DwarfOp);
6454 bool Success = pushSCEV(Inner);
6456 IsSigned ? llvm::dwarf::DW_ATE_signed
6457 : llvm::dwarf::DW_ATE_unsigned};
6458 for (
const auto &
Op : CastOps)
6466 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6467 Success &= pushConst(StartInt);
6469 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6472 pushLocation(
U->getValue());
6474 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6475 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6477 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6478 Success &= pushSCEV(UDiv->getLHS());
6479 Success &= pushSCEV(UDiv->getRHS());
6480 pushOperator(llvm::dwarf::DW_OP_div);
6482 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6484 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6485 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6486 "Unexpected cast type in SCEV.");
6487 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6489 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6490 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6492 }
else if (isa<SCEVAddRecExpr>(S)) {
6507 if (
C->getAPInt().getSignificantBits() > 64)
6509 int64_t
I =
C->getAPInt().getSExtValue();
6511 case llvm::dwarf::DW_OP_plus:
6512 case llvm::dwarf::DW_OP_minus:
6514 case llvm::dwarf::DW_OP_mul:
6515 case llvm::dwarf::DW_OP_div:
6531 if (isa<SCEVAddRecExpr>(SAR.
getStart()))
6538 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6539 if (!pushSCEV(Stride))
6541 pushOperator(llvm::dwarf::DW_OP_mul);
6543 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6544 if (!pushSCEV(Start))
6546 pushOperator(llvm::dwarf::DW_OP_plus);
6552 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6553 pushLocation(OffsetValue);
6556 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6557 << std::to_string(
Offset) <<
"\n");
6563 bool createIterCountExpr(
const SCEV *S,
6564 const SCEVDbgValueBuilder &IterationCount,
6571 if (!isa<SCEVAddRecExpr>(S))
6574 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6577 const auto *Rec = cast<SCEVAddRecExpr>(S);
6578 if (!Rec->isAffine())
6586 clone(IterationCount);
6587 if (!SCEVToValueExpr(*Rec, SE))
6601 if (isa<SCEVAddRecExpr>(SAR.
getStart())) {
6602 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV. Unsupported nested AddRec: "
6610 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6611 if (!pushSCEV(Start))
6613 pushOperator(llvm::dwarf::DW_OP_minus);
6615 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6616 if (!pushSCEV(Stride))
6618 pushOperator(llvm::dwarf::DW_OP_div);
6629 "Expected the locations vector to contain the IV");
6634 "Expected the location ops to contain the IV.");
6638 for (
const auto &
Op : LocationOps) {
6639 auto It =
find(DestLocations,
Op);
6640 if (It != DestLocations.
end()) {
6642 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6650 for (
const auto &
Op : expr_ops()) {
6652 Op.appendToVector(DestExpr);
6659 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6667struct DVIRecoveryRec {
6670 HadLocationArgList(
false) {}
6672 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6676 bool HadLocationArgList;
6682 for (
auto &RE : RecoveryExprs)
6684 RecoveryExprs.clear();
6687 ~DVIRecoveryRec() {
clear(); }
6695 auto expr_ops = ToDwarfOpIter(Expr);
6697 for (
auto Op : expr_ops)
6706template <
typename T>
6710 "contain any DW_OP_llvm_arg operands.");
6712 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6713 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6717template <
typename T>
6722 "Expected expression that references DIArglist locations using "
6723 "DW_OP_llvm_arg operands.");
6725 for (
Value *V : Locations)
6729 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6740 auto UpdateDbgValueInstImpl = [&](
auto *DbgVal) {
6742 if (NumLLVMArgs == 0) {
6749 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6761 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6764 DbgVal->setExpression(SalvageExpr);
6767 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6768 UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6770 UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6784 auto RestorePreTransformStateImpl = [&](
auto *DbgVal) {
6785 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6786 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6787 assert(DVIRec.Expr &&
"Expected an expression");
6788 DbgVal->setExpression(DVIRec.Expr);
6792 if (!DVIRec.HadLocationArgList) {
6793 assert(DVIRec.LocationOps.size() == 1 &&
6794 "Unexpected number of location ops.");
6798 Value *CachedValue =
6803 for (
WeakVH VH : DVIRec.LocationOps) {
6808 DbgVal->setRawLocation(
6811 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6813 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6814 RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
6816 RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
6821 const SCEV *SCEVInductionVar,
6822 SCEVDbgValueBuilder IterCountExpr) {
6824 if (isa<DbgValueInst *>(DVIRec.DbgRef)
6825 ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
6826 : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
6838 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6840 NewLocationOps.
push_back(LSRInductionVar);
6842 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6843 WeakVH VH = DVIRec.LocationOps[i];
6847 if (VH && !isa<UndefValue>(VH)) {
6849 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6851 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6859 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6860 <<
" refers to a location that is now undef or erased. "
6861 "Salvage abandoned.\n");
6865 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6866 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6868 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6869 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6873 if (std::optional<APInt>
Offset =
6875 if (
Offset->getSignificantBits() <= 64)
6876 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6877 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6885 if (DVIRec.Expr->getNumElements() == 0) {
6886 assert(DVIRec.RecoveryExprs.size() == 1 &&
6887 "Expected only a single recovery expression for an empty "
6889 assert(DVIRec.RecoveryExprs[0] &&
6890 "Expected a SCEVDbgSalvageBuilder for location 0");
6891 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6892 B->appendToVectors(
NewExpr, NewLocationOps);
6894 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6902 SCEVDbgValueBuilder *DbgBuilder =
6903 DVIRec.RecoveryExprs[LocationArgIndex].get();
6909 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6910 "Expected a positive index for the location-op position.");
6911 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6915 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6919 if (isa<DbgValueInst *>(DVIRec.DbgRef))
6921 << *cast<DbgValueInst *>(DVIRec.DbgRef) <<
"\n");
6924 << *cast<DbgVariableRecord *>(DVIRec.DbgRef) <<
"\n");
6932 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6933 if (DVIToUpdate.empty())
6937 assert(SCEVInductionVar &&
6938 "Anticipated a SCEV for the post-LSR induction variable");
6941 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6942 if (!IVAddRec->isAffine())
6950 SCEVDbgValueBuilder IterCountExpr;
6951 IterCountExpr.pushLocation(LSRInductionVar);
6952 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6955 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6958 for (
auto &DVIRec : DVIToUpdate) {
6959 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6970 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6972 for (
const auto &
B : L->getBlocks()) {
6973 for (
auto &
I : *
B) {
6974 auto ProcessDbgValue = [&](
auto *DbgVal) ->
bool {
6977 if (DbgVal->isKillLocation())
6982 const auto &HasTranslatableLocationOps =
6983 [&](
const auto *DbgValToTranslate) ->
bool {
6984 for (
const auto LocOp : DbgValToTranslate->location_ops()) {
6998 if (!HasTranslatableLocationOps(DbgVal))
7001 std::unique_ptr<DVIRecoveryRec> NewRec =
7002 std::make_unique<DVIRecoveryRec>(DbgVal);
7006 NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
7007 for (
const auto LocOp : DbgVal->location_ops()) {
7008 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
7009 NewRec->LocationOps.push_back(LocOp);
7010 NewRec->HadLocationArgList = DbgVal->hasArgList();
7012 SalvageableDVISCEVs.push_back(std::move(NewRec));
7016 if (DVR.isDbgValue() || DVR.isDbgAssign())
7017 ProcessDbgValue(&DVR);
7019 auto DVI = dyn_cast<DbgValueInst>(&
I);
7022 if (ProcessDbgValue(DVI))
7023 DVIHandles.insert(DVI);
7032 const LSRInstance &LSR) {
7034 auto IsSuitableIV = [&](
PHINode *
P) {
7045 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7052 if (IsSuitableIV(
P))
7056 for (
PHINode &
P : L.getHeader()->phis()) {
7057 if (IsSuitableIV(&
P))
7063static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
7066 if (!L->isInnermost()) {
7068 return std::nullopt;
7071 if (!L->isLoopSimplifyForm()) {
7073 return std::nullopt;
7077 LLVM_DEBUG(
dbgs() <<
"Cannot fold on backedge that is loop variant\n");
7078 return std::nullopt;
7084 return std::nullopt;
7085 auto *TermCond = dyn_cast<ICmpInst>(BI->
getCondition());
7088 dbgs() <<
"Cannot fold on branching condition that is not an ICmpInst");
7089 return std::nullopt;
7091 if (!TermCond->hasOneUse()) {
7094 <<
"Cannot replace terminating condition with more than one use\n");
7095 return std::nullopt;
7099 Value *
RHS = TermCond->getOperand(1);
7100 if (!
LHS || !L->isLoopInvariant(
RHS))
7103 return std::nullopt;
7107 Value *ToFoldStart, *ToFoldStep;
7109 return std::nullopt;
7112 if (ToFold->
getParent() != L->getHeader())
7113 return std::nullopt;
7118 return std::nullopt;
7122 const unsigned ExpansionBudget = [&]() {
7125 return std::min(Budget, SmallTC);
7127 return std::min(Budget, *SmallTC);
7133 const DataLayout &
DL = L->getHeader()->getDataLayout();
7136 PHINode *ToHelpFold =
nullptr;
7137 const SCEV *TermValueS =
nullptr;
7138 bool MustDropPoison =
false;
7139 auto InsertPt = L->getLoopPreheader()->getTerminator();
7140 for (
PHINode &PN : L->getHeader()->phis()) {
7146 <<
"' is not SCEV-able, not qualified for the "
7147 "terminating condition folding.\n");
7152 if (!AddRec || !AddRec->
isAffine()) {
7154 <<
"' is not an affine add recursion, not qualified "
7155 "for the terminating condition folding.\n");
7173 const SCEV *TermValueSLocal =
PostInc->evaluateAtIteration(BECount, SE);
7176 dbgs() <<
"Is not safe to expand terminating value for phi node" << PN
7184 dbgs() <<
"Is too expensive to expand terminating value for phi node"
7201 bool MustDropPoisonLocal =
false;
7223 TermValueS = TermValueSLocal;
7224 MustDropPoison = MustDropPoisonLocal;
7228 <<
"Cannot find other AddRec IV to help folding\n";);
7231 <<
"\nFound loop that can fold terminating condition\n"
7233 <<
" TermCond: " << *TermCond <<
"\n"
7234 <<
" BrandInst: " << *BI <<
"\n"
7235 <<
" ToFold: " << *ToFold <<
"\n"
7236 <<
" ToHelpFold: " << *ToHelpFold <<
"\n");
7238 if (!ToFold || !ToHelpFold)
7239 return std::nullopt;
7240 return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
7255 bool Changed =
false;
7256 std::unique_ptr<MemorySSAUpdater> MSSAU;
7258 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7261 const LSRInstance &Reducer =
7262 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7263 Changed |= Reducer.getChanged();
7269 const DataLayout &
DL = L->getHeader()->getDataLayout();
7274 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7288 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7290 const DataLayout &
DL = L->getHeader()->getDataLayout();
7303 const bool EnableFormTerm = [&] {
7315 if (EnableFormTerm) {
7317 auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7322 BasicBlock *LoopPreheader = L->getLoopPreheader();
7328 <<
"New term-cond phi-node:\n"
7329 << *ToHelpFold <<
"\n");
7331 Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
7333 Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
7337 cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
7340 const DataLayout &
DL = L->getHeader()->getDataLayout();
7344 "Terminating value was checked safe in canFoldTerminatingCondition");
7351 << *StartValue <<
"\n"
7352 <<
"Terminating value of new term-cond phi-node:\n"
7353 << *TermValue <<
"\n");
7359 Value *NewTermCond =
7361 "lsr_fold_term_cond.replaced_term_cond");
7367 << *OldTermCond <<
"\n"
7368 <<
"New term-cond:\n" << *NewTermCond <<
"\n");
7378 if (SalvageableDVIRecords.
empty())
7384 for (
const auto &L : LI) {
7388 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7389 "could not be identified.\n");
7393 for (
auto &Rec : SalvageableDVIRecords)
7395 SalvageableDVIRecords.
clear();
7404 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7405 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7406 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7407 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7408 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7409 *
L->getHeader()->getParent());
7410 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7411 *
L->getHeader()->getParent());
7412 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7413 *
L->getHeader()->getParent());
7414 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7417 MSSA = &MSSAAnalysis->getMSSA();
7434char LoopStrengthReduce::ID = 0;
7437 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This defines the Use class.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static std::optional< std::tuple< PHINode *, PHINode *, const SCEV *, bool > > canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, const LoopInfo &LI, const TargetTransformInfo &TTI)
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static cl::opt< cl::boolOrDefault > AllowTerminatingConditionFoldingAfterLSR("lsr-term-fold", cl::Hidden, cl::desc("Attempt to replace primary IV with other IV."))
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs, SmallSet< AssertingVH< DbgValueInst >, 2 > &DVIHandles)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class recording the (high level) value of a variable.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This represents the llvm.dbg.value instruction.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
void print(raw_ostream &OS) const
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool hasPoisonGeneratingFlags() const LLVM_READONLY
Return true if this operator has flags which may cause this instruction to evaluate to poison despite...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSelfWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getVScale(Type *Ty)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static Type * getVoidTy(LLVMContext &C)
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Pass * createLoopStrengthReducePass()
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
void initializeLoopStrengthReducePass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.