132#define DEBUG_TYPE "loop-reduce" 
  149  cl::desc(
"Enable LSR phi elimination"));
 
  154  cl::desc(
"Add instruction count to a LSR cost model"));
 
  159  cl::desc(
"Narrow LSR complex solution using" 
  160           " expectation of registers number"));
 
  166    cl::desc(
"Narrow LSR search space by filtering non-optimal formulae" 
  167             " with the same ScaledReg and Scale"));
 
  171    cl::desc(
"A flag that overrides the target's preferred addressing mode."),
 
  175                   "Prefer pre-indexed addressing mode"),
 
  177                   "Prefer post-indexed addressing mode"),
 
  182  cl::init(std::numeric_limits<uint16_t>::max()),
 
  183  cl::desc(
"LSR search space complexity limit"));
 
  187    cl::desc(
"The limit on recursion depth for LSRs setup cost"));
 
  191    cl::desc(
"Attempt to drop solution if it is less profitable"));
 
  195    cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
 
  199    cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
 
  205  cl::desc(
"Stress test LSR IV chains"));
 
  215      std::numeric_limits<unsigned>::max();
 
  217  Type *MemTy = 
nullptr;
 
  220  MemAccessTy() = 
default;
 
  221  MemAccessTy(
Type *Ty, 
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
 
  224    return MemTy == 
Other.MemTy && AddrSpace == 
Other.AddrSpace;
 
  229  static MemAccessTy getUnknown(LLVMContext &Ctx,
 
  230                                unsigned AS = UnknownAddressSpace) {
 
  231    return MemAccessTy(Type::getVoidTy(Ctx), AS);
 
  242  SmallBitVector UsedByIndices;
 
  244  void print(raw_ostream &OS) 
const;
 
  251  constexpr Immediate(ScalarTy MinVal, 
bool Scalable)
 
  252      : FixedOrScalableQuantity(MinVal, Scalable) {}
 
  254  constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
 
  255      : FixedOrScalableQuantity(
V) {}
 
  258  constexpr Immediate() = 
delete;
 
  260  static constexpr Immediate getFixed(ScalarTy MinVal) {
 
  261    return {MinVal, 
false};
 
  263  static constexpr Immediate getScalable(ScalarTy MinVal) {
 
  264    return {MinVal, 
true};
 
  266  static constexpr Immediate 
get(ScalarTy MinVal, 
bool Scalable) {
 
  267    return {MinVal, Scalable};
 
  269  static constexpr Immediate getZero() { 
return {0, 
false}; }
 
  270  static constexpr Immediate getFixedMin() {
 
  271    return {std::numeric_limits<int64_t>::min(), 
false};
 
  273  static constexpr Immediate getFixedMax() {
 
  274    return {std::numeric_limits<int64_t>::max(), 
false};
 
  276  static constexpr Immediate getScalableMin() {
 
  277    return {std::numeric_limits<int64_t>::min(), 
true};
 
  279  static constexpr Immediate getScalableMax() {
 
  280    return {std::numeric_limits<int64_t>::max(), 
true};
 
  283  constexpr bool isLessThanZero()
 const { 
return Quantity < 0; }
 
  285  constexpr bool isGreaterThanZero()
 const { 
return Quantity > 0; }
 
  287  constexpr bool isCompatibleImmediate(
const Immediate &Imm)
 const {
 
  288    return isZero() || 
Imm.isZero() || 
Imm.Scalable == Scalable;
 
  291  constexpr bool isMin()
 const {
 
  292    return Quantity == std::numeric_limits<ScalarTy>::min();
 
  295  constexpr bool isMax()
 const {
 
  296    return Quantity == std::numeric_limits<ScalarTy>::max();
 
  300  constexpr Immediate addUnsigned(
const Immediate &
RHS)
 const {
 
  301    assert(isCompatibleImmediate(
RHS) && 
"Incompatible Immediates");
 
  302    ScalarTy 
Value = (uint64_t)Quantity + 
RHS.getKnownMinValue();
 
  303    return {
Value, Scalable || 
RHS.isScalable()};
 
  306  constexpr Immediate subUnsigned(
const Immediate &
RHS)
 const {
 
  307    assert(isCompatibleImmediate(
RHS) && 
"Incompatible Immediates");
 
  308    ScalarTy 
Value = (uint64_t)Quantity - 
RHS.getKnownMinValue();
 
  309    return {
Value, Scalable || 
RHS.isScalable()};
 
  313  constexpr Immediate mulUnsigned(
const ScalarTy 
RHS)
 const {
 
  314    ScalarTy 
Value = (uint64_t)Quantity * 
RHS;
 
  315    return {
Value, Scalable};
 
  319  const SCEV *getSCEV(ScalarEvolution &SE, 
Type *Ty)
 const {
 
  326  const SCEV *getNegativeSCEV(ScalarEvolution &SE, 
Type *Ty)
 const {
 
  327    const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
 
  333  const SCEV *getUnknownSCEV(ScalarEvolution &SE, 
Type *Ty)
 const {
 
  345struct KeyOrderTargetImmediate {
 
  346  bool operator()(
const Immediate &
LHS, 
const Immediate &
RHS)
 const {
 
  347    if (
LHS.isScalable() && !
RHS.isScalable())
 
  349    if (!
LHS.isScalable() && 
RHS.isScalable())
 
  351    return LHS.getKnownMinValue() < 
RHS.getKnownMinValue();
 
  358struct KeyOrderSizeTAndImmediate {
 
  359  bool operator()(
const std::pair<size_t, Immediate> &
LHS,
 
  360                  const std::pair<size_t, Immediate> &
RHS)
 const {
 
  361    size_t LSize = 
LHS.first;
 
  362    size_t RSize = 
RHS.first;
 
  364      return LSize < RSize;
 
  365    return KeyOrderTargetImmediate()(
LHS.second, 
RHS.second);
 
  370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
  372  OS << 
"[NumUses=" << UsedByIndices.
count() << 
']';
 
  384  using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
 
  386  RegUsesTy RegUsesMap;
 
  390  void countRegister(
const SCEV *
Reg, 
size_t LUIdx);
 
  391  void dropRegister(
const SCEV *
Reg, 
size_t LUIdx);
 
  392  void swapAndDropUse(
size_t LUIdx, 
size_t LastLUIdx);
 
  394  bool isRegUsedByUsesOtherThan(
const SCEV *
Reg, 
size_t LUIdx) 
const;
 
  396  const SmallBitVector &getUsedByIndices(
const SCEV *
Reg) 
const;
 
  412RegUseTracker::countRegister(
const SCEV *
Reg, 
size_t LUIdx) {
 
  413  std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
 
  414  RegSortData &RSD = Pair.first->second;
 
  417  RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
 
  418  RSD.UsedByIndices.
set(LUIdx);
 
  422RegUseTracker::dropRegister(
const SCEV *
Reg, 
size_t LUIdx) {
 
  423  RegUsesTy::iterator It = RegUsesMap.find(
Reg);
 
  424  assert(It != RegUsesMap.end());
 
  425  RegSortData &RSD = It->second;
 
  427  RSD.UsedByIndices.
reset(LUIdx);
 
  431RegUseTracker::swapAndDropUse(
size_t LUIdx, 
size_t LastLUIdx) {
 
  432  assert(LUIdx <= LastLUIdx);
 
  436  for (
auto &Pair : RegUsesMap) {
 
  437    SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
 
  438    if (LUIdx < UsedByIndices.
size())
 
  439      UsedByIndices[LUIdx] =
 
  440        LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] : 
false;
 
  441    UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
 
  446RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg, 
size_t LUIdx)
 const {
 
  447  RegUsesTy::const_iterator 
I = RegUsesMap.find(
Reg);
 
  448  if (
I == RegUsesMap.end())
 
  450  const SmallBitVector &UsedByIndices = 
I->second.UsedByIndices;
 
  452  if (i == -1) 
return false;
 
  453  if ((
size_t)i != LUIdx) 
return true;
 
  457const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
 const {
 
  458  RegUsesTy::const_iterator 
I = RegUsesMap.find(
Reg);
 
  459  assert(
I != RegUsesMap.end() && 
"Unknown register!");
 
  460  return I->second.UsedByIndices;
 
  463void RegUseTracker::clear() {
 
  474  GlobalValue *BaseGV = 
nullptr;
 
  477  Immediate BaseOffset = Immediate::getZero();
 
  480  bool HasBaseReg = 
false;
 
  503  const SCEV *ScaledReg = 
nullptr;
 
  508  Immediate UnfoldedOffset = Immediate::getZero();
 
  512  void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
 
  516  void canonicalize(
const Loop &L);
 
  520  bool hasZeroEnd() 
const;
 
  522  bool countsDownToZero() 
const;
 
  524  size_t getNumRegs() 
const;
 
  527  void deleteBaseReg(
const SCEV *&S);
 
  529  bool referencesReg(
const SCEV *S) 
const;
 
  530  bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
 
  531                                  const RegUseTracker &RegUses) 
const;
 
  533  void print(raw_ostream &OS) 
const;
 
  552    for (
const SCEV *S : 
Add->operands())
 
  558  const SCEV *Start, *Step;
 
  573    if (
Mul->getOperand(0)->isAllOnesValue()) {
 
  582      for (
const SCEV *S : MyGood)
 
  584      for (
const SCEV *S : MyBad)
 
 
  596void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
 
  603      BaseRegs.push_back(Sum);
 
  609      BaseRegs.push_back(Sum);
 
  624bool Formula::isCanonical(
const Loop &L)
 const {
 
  625  assert((Scale == 0 || ScaledReg) &&
 
  626         "ScaledReg must be non-null if Scale is non-zero");
 
  629    return BaseRegs.size() <= 1;
 
  634  if (Scale == 1 && BaseRegs.empty())
 
  643  return none_of(BaseRegs, [&L](
const SCEV *S) {
 
  654void Formula::canonicalize(
const Loop &L) {
 
  658  if (BaseRegs.empty()) {
 
  660    assert(ScaledReg && 
"Expected 1*reg => reg");
 
  661    assert(Scale == 1 && 
"Expected 1*reg => reg");
 
  662    BaseRegs.push_back(ScaledReg);
 
  670    ScaledReg = BaseRegs.pop_back_val();
 
  678    auto I = 
find_if(BaseRegs, [&L](
const SCEV *S) {
 
  681    if (
I != BaseRegs.end())
 
  691bool Formula::unscale() {
 
  695  BaseRegs.push_back(ScaledReg);
 
  700bool Formula::hasZeroEnd()
 const {
 
  701  if (UnfoldedOffset || BaseOffset)
 
  703  if (BaseRegs.size() != 1 || ScaledReg)
 
  708bool Formula::countsDownToZero()
 const {
 
  711  assert(BaseRegs.size() == 1 && 
"hasZeroEnd should mean one BaseReg");
 
  712  const APInt *StepInt;
 
  720size_t Formula::getNumRegs()
 const {
 
  721  return !!ScaledReg + BaseRegs.size();
 
  726Type *Formula::getType()
 const {
 
  727  return !BaseRegs.empty() ? BaseRegs.front()->getType() :
 
  728         ScaledReg ? ScaledReg->
getType() :
 
  734void Formula::deleteBaseReg(
const SCEV *&S) {
 
  735  if (&S != &BaseRegs.back())
 
  741bool Formula::referencesReg(
const SCEV *S)
 const {
 
  747bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
 
  748                                         const RegUseTracker &RegUses)
 const {
 
  750    if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
 
  752  for (
const SCEV *BaseReg : BaseRegs)
 
  753    if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
 
  758#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
  759void Formula::print(raw_ostream &OS)
 const {
 
  765  if (BaseOffset.isNonZero()) {
 
  769  for (
const SCEV *BaseReg : BaseRegs) {
 
  771    OS << 
"reg(" << *
BaseReg << 
')';
 
  773  if (HasBaseReg && BaseRegs.empty()) {
 
  775    OS << 
"**error: HasBaseReg**";
 
  776  } 
else if (!HasBaseReg && !BaseRegs.empty()) {
 
  778    OS << 
"**error: !HasBaseReg**";
 
  782    OS << Scale << 
"*reg(";
 
  789  if (UnfoldedOffset.isNonZero()) {
 
  790    if (!
First) OS << 
" + ";
 
  791    OS << 
"imm(" << UnfoldedOffset << 
')';
 
  832                                bool IgnoreSignificantBits = 
false) {
 
  843    if (
RA.isAllOnes()) {
 
  844      if (
LHS->getType()->isPointerTy())
 
  857    const APInt &LA = 
C->getAPInt();
 
  866    if ((IgnoreSignificantBits || 
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
 
  868                                      IgnoreSignificantBits);
 
  869      if (!Step) 
return nullptr;
 
  871                                       IgnoreSignificantBits);
 
  872      if (!Start) 
return nullptr;
 
  885      for (
const SCEV *S : 
Add->operands()) {
 
  887        if (!
Op) 
return nullptr;
 
  915      for (
const SCEV *S : 
Mul->operands()) {
 
  918                                           IgnoreSignificantBits)) {
 
 
  938    if (
C->getSignificantBits() <= 64) {
 
  940      return Immediate::getFixed(
C->getSExtValue());
 
  945    if (Result.isNonZero())
 
  951    if (Result.isNonZero())
 
  959    return Immediate::getScalable(
C->getSExtValue());
 
  961  return Immediate::getZero();
 
 
  996    if (
SI->getPointerOperand() == OperandVal)
 
 1001    switch (
II->getIntrinsicID()) {
 
 1002    case Intrinsic::memset:
 
 1003    case Intrinsic::prefetch:
 
 1004    case Intrinsic::masked_load:
 
 1005      if (
II->getArgOperand(0) == OperandVal)
 
 1008    case Intrinsic::masked_store:
 
 1009      if (
II->getArgOperand(1) == OperandVal)
 
 1012    case Intrinsic::memmove:
 
 1013    case Intrinsic::memcpy:
 
 1014      if (
II->getArgOperand(0) == OperandVal ||
 
 1015          II->getArgOperand(1) == OperandVal)
 
 1020      if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
 
 1021        if (IntrInfo.
PtrVal == OperandVal)
 
 1027    if (RMW->getPointerOperand() == OperandVal)
 
 1030    if (CmpX->getPointerOperand() == OperandVal)
 
 
 1039  MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
 
 1043    AccessTy.MemTy = Ty;
 
 1047    AccessTy.AddrSpace = 
SI->getPointerAddressSpace();
 
 1049    AccessTy.AddrSpace = LI->getPointerAddressSpace();
 
 1051    AccessTy.AddrSpace = RMW->getPointerAddressSpace();
 
 1053    AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
 
 1055    switch (
II->getIntrinsicID()) {
 
 1056    case Intrinsic::prefetch:
 
 1057    case Intrinsic::memset:
 
 1058      AccessTy.AddrSpace = 
II->getArgOperand(0)->getType()->getPointerAddressSpace();
 
 1059      AccessTy.MemTy = OperandVal->
getType();
 
 1061    case Intrinsic::memmove:
 
 1062    case Intrinsic::memcpy:
 
 1064      AccessTy.MemTy = OperandVal->
getType();
 
 1066    case Intrinsic::masked_load:
 
 1067      AccessTy.AddrSpace =
 
 1068          II->getArgOperand(0)->getType()->getPointerAddressSpace();
 
 1070    case Intrinsic::masked_store:
 
 1071      AccessTy.AddrSpace =
 
 1072          II->getArgOperand(1)->getType()->getPointerAddressSpace();
 
 1076      if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
 
 
 1132  if (!Processed.
insert(S).second)
 
 1136    for (
const SCEV *S : 
Add->operands()) {
 
 1143  const SCEV *Op0, *Op1;
 
 1152      Value *UVal = U->getValue();
 
 1156        if (UI && UI->
getOpcode() == Instruction::Mul &&
 
 
 1189                                 const LSRUse &LU, 
const Formula &
F);
 
 1193                                            const LSRUse &LU, 
const Formula &
F,
 
 1200  const Loop *
L = 
nullptr;
 
 1201  ScalarEvolution *SE = 
nullptr;
 
 1202  const TargetTransformInfo *
TTI = 
nullptr;
 
 1203  TargetTransformInfo::LSRCost 
C;
 
 1208  Cost(
const Loop *L, ScalarEvolution &SE, 
const TargetTransformInfo &
TTI,
 
 1210    L(
L), SE(&SE), 
TTI(&
TTI), AMK(AMK) {
 
 1228    return ((
C.Insns | 
C.NumRegs | 
C.AddRecCost | 
C.NumIVMuls | 
C.NumBaseAdds
 
 1229             | 
C.ImmCost | 
C.SetupCost | 
C.ScaleCost) != ~0u)
 
 1230      || ((
C.Insns & 
C.NumRegs & 
C.AddRecCost & 
C.NumIVMuls & 
C.NumBaseAdds
 
 1231           & 
C.ImmCost & 
C.SetupCost & 
C.ScaleCost) == ~0
u);
 
 1237    return C.NumRegs == ~0
u;
 
 1240  void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
 
 1241                   const DenseSet<const SCEV *> &VisitedRegs, 
const LSRUse &LU,
 
 1242                   bool HardwareLoopProfitable,
 
 1243                   SmallPtrSetImpl<const SCEV *> *LoserRegs = 
nullptr);
 
 1245  void print(raw_ostream &OS) 
const;
 
 1249  void RateRegister(
const Formula &
F, 
const SCEV *
Reg,
 
 1250                    SmallPtrSetImpl<const SCEV *> &Regs, 
const LSRUse &LU,
 
 1251                    bool HardwareLoopProfitable);
 
 1252  void RatePrimaryRegister(
const Formula &
F, 
const SCEV *
Reg,
 
 1253                           SmallPtrSetImpl<const SCEV *> &Regs,
 
 1254                           const LSRUse &LU, 
bool HardwareLoopProfitable,
 
 1255                           SmallPtrSetImpl<const SCEV *> *LoserRegs);
 
 1266  Value *OperandValToReplace = 
nullptr;
 
 1276  Immediate 
Offset = Immediate::getZero();
 
 1278  LSRFixup() = 
default;
 
 1280  bool isUseFullyOutsideLoop(
const Loop *L) 
const;
 
 1282  void print(raw_ostream &OS) 
const;
 
 1292  DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
 
 1305  using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
 
 1308  MemAccessTy AccessTy;
 
 1314  Immediate MinOffset = Immediate::getFixedMax();
 
 1315  Immediate MaxOffset = Immediate::getFixedMin();
 
 1319  bool AllFixupsOutsideLoop = 
true;
 
 1324  bool AllFixupsUnconditional = 
true;
 
 1331  bool RigidFormula = 
false;
 
 1337  Type *WidestFixupType = 
nullptr;
 
 1345  SmallPtrSet<const SCEV *, 4> Regs;
 
 1347  LSRUse(KindType K, MemAccessTy AT) : 
Kind(
K), AccessTy(AT) {}
 
 1349  LSRFixup &getNewFixup() {
 
 1350    Fixups.push_back(LSRFixup());
 
 1354  void pushFixup(LSRFixup &f) {
 
 1356    if (Immediate::isKnownGT(
f.Offset, MaxOffset))
 
 1357      MaxOffset = 
f.Offset;
 
 1358    if (Immediate::isKnownLT(
f.Offset, MinOffset))
 
 1359      MinOffset = 
f.Offset;
 
 1362  bool HasFormulaWithSameRegs(
const Formula &
F) 
const;
 
 1363  float getNotSelectedProbability(
const SCEV *
Reg) 
const;
 
 1364  bool InsertFormula(
const Formula &
F, 
const Loop &L);
 
 1365  void DeleteFormula(Formula &
F);
 
 1366  void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
 
 1368  void print(raw_ostream &OS) 
const;
 
 1375                                 LSRUse::KindType Kind, MemAccessTy AccessTy,
 
 1376                                 GlobalValue *BaseGV, Immediate BaseOffset,
 
 1377                                 bool HasBaseReg, int64_t Scale,
 
 1378                                 Instruction *
Fixup = 
nullptr);
 
 1391                           [&](
unsigned i, 
const SCEV *
Reg) {
 
 1392                             return i + getSetupCost(Reg, Depth - 1);
 
 
 1401void Cost::RateRegister(
const Formula &
F, 
const SCEV *
Reg,
 
 1402                        SmallPtrSetImpl<const SCEV *> &Regs, 
const LSRUse &LU,
 
 1403                        bool HardwareLoopProfitable) {
 
 1408    if (AR->getLoop() != L) {
 
 1415      if (!AR->getLoop()->contains(L)) {
 
 1425    unsigned LoopCost = 1;
 
 1434                           F.BaseOffset.isFixed() &&
 
 1435                           *Step == 
F.BaseOffset.getFixedValue();
 
 1440        if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
 
 1447    if (LU.Kind == LSRUse::ICmpZero && 
F.countsDownToZero() &&
 
 1448        HardwareLoopProfitable)
 
 1450    C.AddRecCost += LoopCost;
 
 1455      if (!Regs.
count(AR->getOperand(1))) {
 
 1456        RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
 
 1468  C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
 
 1477void Cost::RatePrimaryRegister(
const Formula &
F, 
const SCEV *
Reg,
 
 1478                               SmallPtrSetImpl<const SCEV *> &Regs,
 
 1479                               const LSRUse &LU, 
bool HardwareLoopProfitable,
 
 1480                               SmallPtrSetImpl<const SCEV *> *LoserRegs) {
 
 1481  if (LoserRegs && LoserRegs->
count(
Reg)) {
 
 1486    RateRegister(
F, 
Reg, Regs, LU, HardwareLoopProfitable);
 
 1487    if (LoserRegs && isLoser())
 
 1492void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
 
 1493                       const DenseSet<const SCEV *> &VisitedRegs,
 
 1494                       const LSRUse &LU, 
bool HardwareLoopProfitable,
 
 1495                       SmallPtrSetImpl<const SCEV *> *LoserRegs) {
 
 1498  assert(
F.isCanonical(*L) && 
"Cost is accurate only for canonical formula");
 
 1500  unsigned PrevAddRecCost = 
C.AddRecCost;
 
 1501  unsigned PrevNumRegs = 
C.NumRegs;
 
 1502  unsigned PrevNumBaseAdds = 
C.NumBaseAdds;
 
 1503  if (
const SCEV *ScaledReg = 
F.ScaledReg) {
 
 1504    if (VisitedRegs.
count(ScaledReg)) {
 
 1508    RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
 
 1513  for (
const SCEV *BaseReg : 
F.BaseRegs) {
 
 1514    if (VisitedRegs.
count(BaseReg)) {
 
 1518    RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
 
 1525  size_t NumBaseParts = 
F.getNumRegs();
 
 1526  if (NumBaseParts > 1)
 
 1531  C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
 
 1537  for (
const LSRFixup &
Fixup : LU.Fixups) {
 
 1538    if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
 
 1539      Immediate 
Offset = 
Fixup.Offset.addUnsigned(
F.BaseOffset);
 
 1543      else if (
Offset.isNonZero())
 
 1545            APInt(64, 
Offset.getKnownMinValue(), 
true).getSignificantBits();
 
 1549      if (LU.Kind == LSRUse::Address && 
Offset.isNonZero() &&
 
 1570  if (
C.NumRegs > TTIRegNum) {
 
 1573    if (PrevNumRegs > TTIRegNum)
 
 1574      C.Insns += (
C.NumRegs - PrevNumRegs);
 
 1576      C.Insns += (
C.NumRegs - TTIRegNum);
 
 1589  if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
 
 1593  C.Insns += (
C.AddRecCost - PrevAddRecCost);
 
 1596  if (LU.Kind != LSRUse::ICmpZero)
 
 1597    C.Insns += 
C.NumBaseAdds - PrevNumBaseAdds;
 
 1603  C.Insns = std::numeric_limits<unsigned>::max();
 
 1604  C.NumRegs = std::numeric_limits<unsigned>::max();
 
 1605  C.AddRecCost = std::numeric_limits<unsigned>::max();
 
 1606  C.NumIVMuls = std::numeric_limits<unsigned>::max();
 
 1607  C.NumBaseAdds = std::numeric_limits<unsigned>::max();
 
 1608  C.ImmCost = std::numeric_limits<unsigned>::max();
 
 1609  C.SetupCost = std::numeric_limits<unsigned>::max();
 
 1610  C.ScaleCost = std::numeric_limits<unsigned>::max();
 
 1614bool Cost::isLess(
const Cost &
Other)
 const {
 
 1616      C.Insns != 
Other.C.Insns)
 
 1617    return C.Insns < 
Other.C.Insns;
 
 1621#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1624    OS << 
C.Insns << 
" instruction" << (
C.Insns == 1 ? 
" " : 
"s ");
 
 1625  OS << 
C.NumRegs << 
" reg" << (
C.NumRegs == 1 ? 
"" : 
"s");
 
 1626  if (
C.AddRecCost != 0)
 
 1627    OS << 
", with addrec cost " << 
C.AddRecCost;
 
 1628  if (
C.NumIVMuls != 0)
 
 1629    OS << 
", plus " << 
C.NumIVMuls << 
" IV mul" 
 1630       << (
C.NumIVMuls == 1 ? 
"" : 
"s");
 
 1631  if (
C.NumBaseAdds != 0)
 
 1632    OS << 
", plus " << 
C.NumBaseAdds << 
" base add" 
 1633       << (
C.NumBaseAdds == 1 ? 
"" : 
"s");
 
 1634  if (
C.ScaleCost != 0)
 
 1635    OS << 
", plus " << 
C.ScaleCost << 
" scale cost";
 
 1637    OS << 
", plus " << 
C.ImmCost << 
" imm cost";
 
 1638  if (
C.SetupCost != 0)
 
 1639    OS << 
", plus " << 
C.SetupCost << 
" setup cost";
 
 1648bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
 const {
 
 1651    for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
 
 1652      if (PN->getIncomingValue(i) == OperandValToReplace &&
 
 1653          L->contains(PN->getIncomingBlock(i)))
 
 1658  return !
L->contains(UserInst);
 
 1661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1662void LSRFixup::print(raw_ostream &OS)
 const {
 
 1667    Store->getOperand(0)->printAsOperand(OS, 
false);
 
 1673  OS << 
", OperandValToReplace=";
 
 1676  for (
const Loop *PIL : PostIncLoops) {
 
 1677    OS << 
", PostIncLoop=";
 
 1678    PIL->getHeader()->printAsOperand(OS, 
false);
 
 1682    OS << 
", Offset=" << 
Offset;
 
 1692bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
 const {
 
 1694  if (
F.ScaledReg) 
Key.push_back(
F.ScaledReg);
 
 1701float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
 const {
 
 1703  for (
const Formula &
F : Formulae)
 
 1704    if (
F.referencesReg(
Reg))
 
 1706  return ((
float)(Formulae.size() - FNum)) / Formulae.size();
 
 1711bool LSRUse::InsertFormula(
const Formula &
F, 
const Loop &L) {
 
 1712  assert(
F.isCanonical(L) && 
"Invalid canonical representation");
 
 1714  if (!Formulae.empty() && RigidFormula)
 
 1718  if (
F.ScaledReg) 
Key.push_back(
F.ScaledReg);
 
 1726  assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
 
 1727         "Zero allocated in a scaled register!");
 
 1729  for (
const SCEV *BaseReg : 
F.BaseRegs)
 
 1730    assert(!
BaseReg->isZero() && 
"Zero allocated in a base register!");
 
 1734  Formulae.push_back(
F);
 
 1745void LSRUse::DeleteFormula(Formula &
F) {
 
 1746  if (&
F != &Formulae.back())
 
 1748  Formulae.pop_back();
 
 1752void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
 
 1754  SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
 
 1756  for (
const Formula &
F : Formulae) {
 
 1757    if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
 
 1762  for (
const SCEV *S : OldRegs)
 
 1764      RegUses.dropRegister(S, LUIdx);
 
 1767#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1768void LSRUse::print(raw_ostream &OS)
 const {
 
 1769  OS << 
"LSR Use: Kind=";
 
 1771  case Basic:    OS << 
"Basic"; 
break;
 
 1772  case Special:  OS << 
"Special"; 
break;
 
 1773  case ICmpZero: OS << 
"ICmpZero"; 
break;
 
 1775    OS << 
"Address of ";
 
 1779      OS << *AccessTy.MemTy;
 
 1782    OS << 
" in addrspace(" << AccessTy.AddrSpace << 
')';
 
 1785  OS << 
", Offsets={";
 
 1786  bool NeedComma = 
false;
 
 1787  for (
const LSRFixup &
Fixup : Fixups) {
 
 1788    if (NeedComma) OS << 
',';
 
 1794  if (AllFixupsOutsideLoop)
 
 1795    OS << 
", all-fixups-outside-loop";
 
 1797  if (AllFixupsUnconditional)
 
 1798    OS << 
", all-fixups-unconditional";
 
 1800  if (WidestFixupType)
 
 1801    OS << 
", widest fixup type: " << *WidestFixupType;
 
 1810                                 LSRUse::KindType Kind, MemAccessTy AccessTy,
 
 1812                                 bool HasBaseReg, int64_t Scale,
 
 1815  case LSRUse::Address: {
 
 1816    int64_t FixedOffset =
 
 1817        BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
 
 1818    int64_t ScalableOffset =
 
 1819        BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
 
 1820    return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
 
 1821                                     HasBaseReg, Scale, AccessTy.AddrSpace,
 
 1822                                     Fixup, ScalableOffset);
 
 1824  case LSRUse::ICmpZero:
 
 1831    if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
 
 1836    if (Scale != 0 && Scale != -1)
 
 1841    if (BaseOffset.isNonZero()) {
 
 1844      if (BaseOffset.isScalable())
 
 1854        BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
 
 1855      return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
 
 1863    return !BaseGV && Scale == 0 && BaseOffset.isZero();
 
 1865  case LSRUse::Special:
 
 1867    return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
 
 
 1874                                 Immediate MinOffset, Immediate MaxOffset,
 
 1875                                 LSRUse::KindType Kind, MemAccessTy AccessTy,
 
 1877                                 bool HasBaseReg, int64_t Scale) {
 
 1878  if (BaseOffset.isNonZero() &&
 
 1879      (BaseOffset.isScalable() != MinOffset.isScalable() ||
 
 1880       BaseOffset.isScalable() != MaxOffset.isScalable()))
 
 1883  int64_t 
Base = BaseOffset.getKnownMinValue();
 
 1884  int64_t Min = MinOffset.getKnownMinValue();
 
 1885  int64_t Max = MaxOffset.getKnownMinValue();
 
 1888  MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
 
 1891  MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
 
 1894                              HasBaseReg, Scale) &&
 
 
 1900                                 Immediate MinOffset, Immediate MaxOffset,
 
 1901                                 LSRUse::KindType Kind, MemAccessTy AccessTy,
 
 1902                                 const Formula &
F, 
const Loop &L) {
 
 1910  assert((
F.isCanonical(L) || 
F.Scale != 0));
 
 1912                              F.BaseGV, 
F.BaseOffset, 
F.HasBaseReg, 
F.Scale);
 
 
 1917                       Immediate MaxOffset, LSRUse::KindType Kind,
 
 1919                       Immediate BaseOffset, 
bool HasBaseReg, int64_t Scale) {
 
 1922                              BaseOffset, HasBaseReg, Scale) ||
 
 1927                               BaseGV, BaseOffset, 
true, 0));
 
 
 1931                       Immediate MaxOffset, LSRUse::KindType Kind,
 
 1932                       MemAccessTy AccessTy, 
const Formula &
F) {
 
 1933  return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy, 
F.BaseGV,
 
 1934                    F.BaseOffset, 
F.HasBaseReg, 
F.Scale);
 
 
 1940    return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
 
 1942  return TTI.isLegalAddImmediate(
Offset.getFixedValue());
 
 
 1946                                 const LSRUse &LU, 
const Formula &
F) {
 
 1948  if (LU.Kind == LSRUse::Address && 
TTI.LSRWithInstrQueries()) {
 
 1949    for (
const LSRFixup &
Fixup : LU.Fixups)
 
 1951                                (
F.BaseOffset + 
Fixup.Offset), 
F.HasBaseReg,
 
 1952                                F.Scale, 
Fixup.UserInst))
 
 1958                              LU.AccessTy, 
F.BaseGV, 
F.BaseOffset, 
F.HasBaseReg,
 
 
 1963                                            const LSRUse &LU, 
const Formula &
F,
 
 1972    return F.Scale != 1;
 
 1975  case LSRUse::Address: {
 
 1977    int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
 
 1978    if (
F.BaseOffset.isScalable()) {
 
 1979      ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
 
 1980      ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
 
 1982      FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
 
 1983      FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
 
 1987        F.HasBaseReg, 
F.Scale, LU.AccessTy.AddrSpace);
 
 1990        F.HasBaseReg, 
F.Scale, LU.AccessTy.AddrSpace);
 
 1993           "Legal addressing mode has an illegal cost!");
 
 1994    return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
 
 1996  case LSRUse::ICmpZero:
 
 1998  case LSRUse::Special:
 
 
 2008                             LSRUse::KindType Kind, MemAccessTy AccessTy,
 
 2012  if (BaseOffset.isZero() && !BaseGV)
 
 2017  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
 2021  if (!HasBaseReg && Scale == 1) {
 
 2031  if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
 
 
 2041                             Immediate MaxOffset, LSRUse::KindType Kind,
 
 2042                             MemAccessTy AccessTy, 
const SCEV *S,
 
 2045  if (S->
isZero()) 
return true;
 
 2053  if (!S->
isZero()) 
return false;
 
 2056  if (BaseOffset.isZero() && !BaseGV)
 
 2059  if (BaseOffset.isScalable())
 
 2064  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
 2067                              BaseOffset, HasBaseReg, Scale);
 
 
 2084  const SCEV *IncExpr;
 
 2086  IVInc(Instruction *U, 
Value *O, 
const SCEV *
E)
 
 2087      : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
 
 2094  const SCEV *ExprBase = 
nullptr;
 
 2096  IVChain() = 
default;
 
 2097  IVChain(
const IVInc &Head, 
const SCEV *
Base)
 
 2098      : Incs(1, Head), ExprBase(
Base) {}
 
 2103  const_iterator 
begin()
 const {
 
 2105    return std::next(Incs.
begin());
 
 2107  const_iterator 
end()
 const {
 
 2112  bool hasIncs()
 const { 
return Incs.
size() >= 2; }
 
 2121  bool isProfitableIncrement(
const SCEV *OperExpr,
 
 2122                             const SCEV *IncExpr,
 
 2130  SmallPtrSet<Instruction*, 4> FarUsers;
 
 2131  SmallPtrSet<Instruction*, 4> NearUsers;
 
 2137  ScalarEvolution &SE;
 
 2140  AssumptionCache &AC;
 
 2141  TargetLibraryInfo &TLI;
 
 2142  const TargetTransformInfo &
TTI;
 
 2144  MemorySSAUpdater *MSSAU;
 
 2148  bool HardwareLoopProfitable = 
false;
 
 2162  SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
 
 2169  SmallSetVector<Type *, 4> Types;
 
 2175  RegUseTracker RegUses;
 
 2180  static const unsigned MaxChains = 8;
 
 2186  SmallPtrSet<Use*, MaxChains> IVIncSet;
 
 2189  SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
 
 2195  SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
 
 2197  void OptimizeShadowIV();
 
 2198  bool FindIVUserForCond(ICmpInst *
Cond, IVStrideUse *&CondUse);
 
 2199  ICmpInst *OptimizeMax(ICmpInst *
Cond, IVStrideUse* &CondUse);
 
 2200  void OptimizeLoopTermCond();
 
 2202  void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
 
 2203                        SmallVectorImpl<ChainUsers> &ChainUsersVec);
 
 2204  void FinalizeChain(IVChain &Chain);
 
 2205  void CollectChains();
 
 2206  void GenerateIVChain(
const IVChain &Chain,
 
 2207                       SmallVectorImpl<WeakTrackingVH> &DeadInsts);
 
 2209  void CollectInterestingTypesAndFactors();
 
 2210  void CollectFixupsAndInitialFormulae();
 
 2213  using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
 
 2216  bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, 
bool HasBaseReg,
 
 2217                          LSRUse::KindType Kind, MemAccessTy AccessTy);
 
 2219  std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
 
 2220                                      MemAccessTy AccessTy);
 
 2222  void DeleteUse(LSRUse &LU, 
size_t LUIdx);
 
 2224  LSRUse *FindUseWithSimilarFormula(
const Formula &
F, 
const LSRUse &OrigLU);
 
 2226  void InsertInitialFormula(
const SCEV *S, LSRUse &LU, 
size_t LUIdx);
 
 2227  void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU, 
size_t LUIdx);
 
 2228  void CountRegisters(
const Formula &
F, 
size_t LUIdx);
 
 2229  bool InsertFormula(LSRUse &LU, 
unsigned LUIdx, 
const Formula &
F);
 
 2230  bool IsFixupExecutedEachIncrement(
const LSRFixup &LF) 
const;
 
 2232  void CollectLoopInvariantFixupsAndFormulae();
 
 2234  void GenerateReassociations(LSRUse &LU, 
unsigned LUIdx, Formula 
Base,
 
 2235                              unsigned Depth = 0);
 
 2237  void GenerateReassociationsImpl(LSRUse &LU, 
unsigned LUIdx,
 
 2239                                  size_t Idx, 
bool IsScaledReg = 
false);
 
 2240  void GenerateCombinations(LSRUse &LU, 
unsigned LUIdx, Formula 
Base);
 
 2241  void GenerateSymbolicOffsetsImpl(LSRUse &LU, 
unsigned LUIdx,
 
 2242                                   const Formula &
Base, 
size_t Idx,
 
 2243                                   bool IsScaledReg = 
false);
 
 2244  void GenerateSymbolicOffsets(LSRUse &LU, 
unsigned LUIdx, Formula 
Base);
 
 2245  void GenerateConstantOffsetsImpl(LSRUse &LU, 
unsigned LUIdx,
 
 2246                                   const Formula &
Base,
 
 2247                                   const SmallVectorImpl<Immediate> &Worklist,
 
 2248                                   size_t Idx, 
bool IsScaledReg = 
false);
 
 2249  void GenerateConstantOffsets(LSRUse &LU, 
unsigned LUIdx, Formula 
Base);
 
 2250  void GenerateICmpZeroScales(LSRUse &LU, 
unsigned LUIdx, Formula 
Base);
 
 2251  void GenerateScales(LSRUse &LU, 
unsigned LUIdx, Formula 
Base);
 
 2252  void GenerateTruncates(LSRUse &LU, 
unsigned LUIdx, Formula 
Base);
 
 2253  void GenerateCrossUseConstantOffsets();
 
 2254  void GenerateAllReuseFormulae();
 
 2256  void FilterOutUndesirableDedicatedRegisters();
 
 2258  size_t EstimateSearchSpaceComplexity() 
const;
 
 2259  void NarrowSearchSpaceByDetectingSupersets();
 
 2260  void NarrowSearchSpaceByCollapsingUnrolledCode();
 
 2261  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
 
 2262  void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
 
 2263  void NarrowSearchSpaceByFilterPostInc();
 
 2264  void NarrowSearchSpaceByDeletingCostlyFormulas();
 
 2265  void NarrowSearchSpaceByPickingWinnerRegs();
 
 2266  void NarrowSearchSpaceUsingHeuristics();
 
 2268  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
 
 2270                    SmallVectorImpl<const Formula *> &Workspace,
 
 2271                    const Cost &CurCost,
 
 2272                    const SmallPtrSet<const SCEV *, 16> &CurRegs,
 
 2273                    DenseSet<const SCEV *> &VisitedRegs) 
const;
 
 2274  void Solve(SmallVectorImpl<const Formula *> &Solution) 
const;
 
 2278                      const SmallVectorImpl<Instruction *> &Inputs) 
const;
 
 2281                                                     const LSRUse &LU) 
const;
 
 2283  Value *Expand(
const LSRUse &LU, 
const LSRFixup &LF, 
const Formula &
F,
 
 2285                SmallVectorImpl<WeakTrackingVH> &DeadInsts) 
const;
 
 2286  void RewriteForPHI(PHINode *PN, 
const LSRUse &LU, 
const LSRFixup &LF,
 
 2288                     SmallVectorImpl<WeakTrackingVH> &DeadInsts);
 
 2289  void Rewrite(
const LSRUse &LU, 
const LSRFixup &LF, 
const Formula &
F,
 
 2290               SmallVectorImpl<WeakTrackingVH> &DeadInsts);
 
 2291  void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
 
 2294  LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
 
 2295              LoopInfo &LI, 
const TargetTransformInfo &
TTI, AssumptionCache &AC,
 
 2296              TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
 
 2298  bool getChanged()
 const { 
return Changed; }
 
 2299  const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
 const {
 
 2300    return ScalarEvolutionIVs;
 
 2303  void print_factors_and_types(raw_ostream &OS) 
const;
 
 2304  void print_fixups(raw_ostream &OS) 
const;
 
 2305  void print_uses(raw_ostream &OS) 
const;
 
 2306  void print(raw_ostream &OS) 
const;
 
 2314void LSRInstance::OptimizeShadowIV() {
 
 2324    Type *DestTy = 
nullptr;
 
 2325    bool IsSigned = 
false;
 
 2341      DestTy = UCast->getDestTy();
 
 2345      DestTy = SCast->getDestTy();
 
 2347    if (!DestTy) 
continue;
 
 2367    if (Mantissa == -1) 
continue;
 
 2371    unsigned Entry, Latch;
 
 2381    if (!Init) 
continue;
 
 2382    Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
 
 2386    BinaryOperator *Incr =
 
 2388    if (!Incr) 
continue;
 
 2389    if (Incr->
getOpcode() != Instruction::Add
 
 2390        && Incr->
getOpcode() != Instruction::Sub)
 
 2394    ConstantInt *
C = 
nullptr;
 
 2406    if (!
C->getValue().isStrictlyPositive())
 
 2414    Constant *CFP = ConstantFP::get(DestTy, 
C->getZExtValue());
 
 2416        Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
 
 2417                                              : Instruction::FSub,
 
 2434bool LSRInstance::FindIVUserForCond(ICmpInst *
Cond, IVStrideUse *&CondUse) {
 
 2435  for (IVStrideUse &U : IU)
 
 2436    if (
U.getUser() == 
Cond) {
 
 2494ICmpInst *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse* &CondUse) {
 
 2509  const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
 
 2510  if (IterationCount != SE.
getSCEV(Sel)) 
return Cond;
 
 2516  const SCEVNAryExpr *
Max = 
nullptr;
 
 2518    Pred = ICmpInst::ICMP_SLE;
 
 2521    Pred = ICmpInst::ICMP_SLT;
 
 2524    Pred = ICmpInst::ICMP_ULT;
 
 2533  if (
Max->getNumOperands() != 2)
 
 2536  const SCEV *MaxLHS = 
Max->getOperand(0);
 
 2537  const SCEV *MaxRHS = 
Max->getOperand(1);
 
 2542      (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
 
 2553         "Loop condition operand is an addrec in a different loop!");
 
 2557  Value *NewRHS = 
nullptr;
 
 2558  if (ICmpInst::isTrueWhenEqual(Pred)) {
 
 2562         if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
 
 2563           NewRHS = BO->getOperand(0);
 
 2566        if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
 
 2567          NewRHS = BO->getOperand(0);
 
 2575    NewRHS = SU->getValue();
 
 2587  ICmpInst *NewCond = 
new ICmpInst(
Cond->getIterator(), Pred,
 
 2588                                   Cond->getOperand(0), NewRHS, 
"scmp");
 
 2592  Cond->replaceAllUsesWith(NewCond);
 
 2595  Cond->eraseFromParent();
 
 2597  if (
Cmp->use_empty()) {
 
 2599    Cmp->eraseFromParent();
 
 2606LSRInstance::OptimizeLoopTermCond() {
 
 2607  SmallPtrSet<Instruction *, 4> PostIncs;
 
 2622  SmallVector<BasicBlock*, 8> ExitingBlocks;
 
 2623  L->getExitingBlocks(ExitingBlocks);
 
 2631  for (BasicBlock *ExitingBlock : ExitingBlocks) {
 
 2645    IVStrideUse *CondUse = 
nullptr;
 
 2647    if (!FindIVUserForCond(
Cond, CondUse))
 
 2661    if (!DT.
dominates(ExitingBlock, LatchBlock))
 
 2666    if (LatchBlock != ExitingBlock)
 
 2667      for (
const IVStrideUse &UI : IU)
 
 2670        if (&UI != CondUse &&
 
 2674          const SCEV *
A = IU.getStride(*CondUse, L);
 
 2675          const SCEV *
B = IU.getStride(UI, L);
 
 2676          if (!
A || !
B) 
continue;
 
 2685          if (
const SCEVConstant *
D =
 
 2687            const ConstantInt *
C = 
D->getValue();
 
 2689            if (
C->isOne() || 
C->isMinusOne())
 
 2690              goto decline_post_inc;
 
 2692            if (
C->getValue().getSignificantBits() >= 64 ||
 
 2693                C->getValue().isMinSignedValue())
 
 2694              goto decline_post_inc;
 
 2697              MemAccessTy AccessTy =
 
 2699              int64_t Scale = 
C->getSExtValue();
 
 2703                                            AccessTy.AddrSpace))
 
 2704                goto decline_post_inc;
 
 2709                                            AccessTy.AddrSpace))
 
 2710                goto decline_post_inc;
 
 2715    LLVM_DEBUG(
dbgs() << 
"  Change loop exiting icmp to use postinc iv: " 
 2721    if (
Cond->getNextNode() != TermBr) {
 
 2722      if (
Cond->hasOneUse()) {
 
 2726        ICmpInst *OldCond = 
Cond;
 
 2728        Cond->setName(
L->getHeader()->getName() + 
".termcond");
 
 2750  IVIncInsertPos = 
L->getLoopLatch()->getTerminator();
 
 2751  for (Instruction *Inst : PostIncs)
 
 2757bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
 
 2758                                     bool HasBaseReg, LSRUse::KindType Kind,
 
 2759                                     MemAccessTy AccessTy) {
 
 2760  Immediate NewMinOffset = LU.MinOffset;
 
 2761  Immediate NewMaxOffset = LU.MaxOffset;
 
 2762  MemAccessTy NewAccessTy = AccessTy;
 
 2767  if (LU.Kind != Kind)
 
 2773  if (Kind == LSRUse::Address) {
 
 2774    if (AccessTy.MemTy != LU.AccessTy.MemTy) {
 
 2775      NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
 
 2776                                            AccessTy.AddrSpace);
 
 2781  if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
 
 2783                          LU.MaxOffset - NewOffset, HasBaseReg))
 
 2785    NewMinOffset = NewOffset;
 
 2786  } 
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
 
 2788                          NewOffset - LU.MinOffset, HasBaseReg))
 
 2790    NewMaxOffset = NewOffset;
 
 2796  if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
 
 2797      (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
 
 2801  LU.MinOffset = NewMinOffset;
 
 2802  LU.MaxOffset = NewMaxOffset;
 
 2803  LU.AccessTy = NewAccessTy;
 
 2810std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
 
 2811                                                 LSRUse::KindType Kind,
 
 2812                                                 MemAccessTy AccessTy) {
 
 2813  const SCEV *
Copy = Expr;
 
 2820    Offset = Immediate::getFixed(0);
 
 2823  std::pair<UseMapTy::iterator, bool> 
P =
 
 2824      UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
 
 2827    size_t LUIdx = 
P.first->second;
 
 2828    LSRUse &LU = 
Uses[LUIdx];
 
 2829    if (reconcileNewOffset(LU, 
Offset, 
true, Kind, AccessTy))
 
 2831      return std::make_pair(LUIdx, 
Offset);
 
 2835  size_t LUIdx = 
Uses.size();
 
 2836  P.first->second = LUIdx;
 
 2837  Uses.push_back(LSRUse(Kind, AccessTy));
 
 2838  LSRUse &LU = 
Uses[LUIdx];
 
 2842  return std::make_pair(LUIdx, 
Offset);
 
 2846void LSRInstance::DeleteUse(LSRUse &LU, 
size_t LUIdx) {
 
 2847  if (&LU != &
Uses.back())
 
 2852  RegUses.swapAndDropUse(LUIdx, 
Uses.size());
 
 2858LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
 
 2859                                       const LSRUse &OrigLU) {
 
 2861  for (LSRUse &LU : 
Uses) {
 
 2867    if (&LU != &OrigLU &&
 
 2868        LU.Kind != LSRUse::ICmpZero &&
 
 2869        LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
 
 2870        LU.WidestFixupType == OrigLU.WidestFixupType &&
 
 2871        LU.HasFormulaWithSameRegs(OrigF)) {
 
 2873      for (
const Formula &
F : LU.Formulae) {
 
 2876        if (
F.BaseRegs == OrigF.BaseRegs &&
 
 2877            F.ScaledReg == OrigF.ScaledReg &&
 
 2878            F.BaseGV == OrigF.BaseGV &&
 
 2879            F.Scale == OrigF.Scale &&
 
 2880            F.UnfoldedOffset == OrigF.UnfoldedOffset) {
 
 2881          if (
F.BaseOffset.isZero())
 
 2896void LSRInstance::CollectInterestingTypesAndFactors() {
 
 2897  SmallSetVector<const SCEV *, 4> Strides;
 
 2901  for (
const IVStrideUse &U : IU) {
 
 2902    const SCEV *Expr = IU.getExpr(U);
 
 2920    } 
while (!Worklist.
empty());
 
 2924  for (SmallSetVector<const SCEV *, 4>::const_iterator
 
 2926    for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
 
 2927         std::next(
I); NewStrideIter != 
E; ++NewStrideIter) {
 
 2928      const SCEV *OldStride = *
I;
 
 2929      const SCEV *NewStride = *NewStrideIter;
 
 2939      if (
const SCEVConstant *Factor =
 
 2942        if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
 
 2943          Factors.insert(Factor->getAPInt().getSExtValue());
 
 2944      } 
else if (
const SCEVConstant *Factor =
 
 2948        if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
 
 2949          Factors.insert(Factor->getAPInt().getSExtValue());
 
 2955  if (Types.size() == 1)
 
 2967  for(; OI != OE; ++OI) {
 
 
 2986    return Trunc->getOperand(0);
 
 
 3019      if (SubExpr->getSCEVType() == 
scAddExpr)
 
 3022      if (SubExpr->getSCEVType() != 
scMulExpr)
 
 
 3038bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
 
 3039                                    const SCEV *IncExpr,
 
 3040                                    ScalarEvolution &SE) {
 
 3053  SmallPtrSet<const SCEV*, 8> Processed;
 
 3074  if (!Chain.hasIncs())
 
 3077  if (!
Users.empty()) {
 
 3078    LLVM_DEBUG(
dbgs() << 
"Chain: " << *Chain.Incs[0].UserInst << 
" users:\n";
 
 3080                    : 
Users) { 
dbgs() << 
"  " << *Inst << 
"\n"; });
 
 3083  assert(!Chain.Incs.empty() && 
"empty IV chains are not allowed");
 
 3092      && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
 
 3095  const SCEV *LastIncExpr = 
nullptr;
 
 3096  unsigned NumConstIncrements = 0;
 
 3097  unsigned NumVarIncrements = 0;
 
 3098  unsigned NumReusedIncrements = 0;
 
 3100  if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
 
 3103  for (
const IVInc &Inc : Chain) {
 
 3104    if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
 
 3106    if (Inc.IncExpr->isZero())
 
 3112      ++NumConstIncrements;
 
 3116    if (Inc.IncExpr == LastIncExpr)
 
 3117      ++NumReusedIncrements;
 
 3121    LastIncExpr = Inc.IncExpr;
 
 3126  if (NumConstIncrements > 1)
 
 3133  cost += NumVarIncrements;
 
 3137  cost -= NumReusedIncrements;
 
 3139  LLVM_DEBUG(
dbgs() << 
"Chain: " << *Chain.Incs[0].UserInst << 
" Cost: " << cost
 
 
 3146void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
 
 3147                                   SmallVectorImpl<ChainUsers> &ChainUsersVec) {
 
 3151  const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
 
 3152  const SCEV *
const OperExprBase = 
getExprBase(OperExpr);
 
 3156  unsigned ChainIdx = 0, NChains = IVChainVec.size();
 
 3157  const SCEV *LastIncExpr = 
nullptr;
 
 3158  for (; ChainIdx < NChains; ++ChainIdx) {
 
 3159    IVChain &Chain = IVChainVec[ChainIdx];
 
 3177    const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
 
 3178    const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
 
 3182    if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
 
 3183      LastIncExpr = IncExpr;
 
 3189  if (ChainIdx == NChains) {
 
 3196    LastIncExpr = OperExpr;
 
 3203    IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
 
 3205    ChainUsersVec.
resize(NChains);
 
 3206    LLVM_DEBUG(
dbgs() << 
"IV Chain#" << ChainIdx << 
" Head: (" << *UserInst
 
 3207                      << 
") IV=" << *LastIncExpr << 
"\n");
 
 3209    LLVM_DEBUG(
dbgs() << 
"IV Chain#" << ChainIdx << 
"  Inc: (" << *UserInst
 
 3210                      << 
") IV+" << *LastIncExpr << 
"\n");
 
 3212    IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
 
 3214  IVChain &Chain = IVChainVec[ChainIdx];
 
 3216  SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
 
 3218  if (!LastIncExpr->
isZero()) {
 
 3219    ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
 
 3228  for (User *U : IVOper->
users()) {
 
 3234    IVChain::const_iterator IncIter = Chain.Incs.begin();
 
 3235    IVChain::const_iterator IncEnd = Chain.Incs.end();
 
 3236    for( ; IncIter != IncEnd; ++IncIter) {
 
 3237      if (IncIter->UserInst == OtherUse)
 
 3240    if (IncIter != IncEnd)
 
 3245        && IU.isIVUserOrOperand(OtherUse)) {
 
 3248    NearUsers.
insert(OtherUse);
 
 3253  ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
 
 3278void LSRInstance::CollectChains() {
 
 3282  SmallVector<BasicBlock *,8> LatchPath;
 
 3285       Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
 
 3291  for (BasicBlock *BB : 
reverse(LatchPath)) {
 
 3292    for (Instruction &
I : *BB) {
 
 3304      for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
 
 3305           ChainIdx < NChains; ++ChainIdx) {
 
 3306        ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
 
 3309      SmallPtrSet<Instruction*, 4> UniqueOperands;
 
 3312      while (IVOpIter != IVOpEnd) {
 
 3314        if (UniqueOperands.
insert(IVOpInst).second)
 
 3315          ChainInstruction(&
I, IVOpInst, ChainUsersVec);
 
 3316        IVOpIter = 
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
 
 3321  for (PHINode &PN : 
L->getHeader()->phis()) {
 
 3328      ChainInstruction(&PN, IncV, ChainUsersVec);
 
 3331  unsigned ChainIdx = 0;
 
 3332  for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
 
 3333       UsersIdx < NChains; ++UsersIdx) {
 
 3335                           ChainUsersVec[UsersIdx].FarUsers, SE, 
TTI))
 
 3338    if (ChainIdx != UsersIdx)
 
 3339      IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
 
 3340    FinalizeChain(IVChainVec[ChainIdx]);
 
 3343  IVChainVec.resize(ChainIdx);
 
 3346void LSRInstance::FinalizeChain(IVChain &Chain) {
 
 3347  assert(!Chain.Incs.empty() && 
"empty IV chains are not allowed");
 
 3348  LLVM_DEBUG(
dbgs() << 
"Final Chain: " << *Chain.Incs[0].UserInst << 
"\n");
 
 3350  for (
const IVInc &Inc : Chain) {
 
 3352    auto UseI = 
find(Inc.UserInst->operands(), Inc.IVOperand);
 
 3353    assert(UseI != Inc.UserInst->op_end() && 
"cannot find IV operand");
 
 3354    IVIncSet.insert(UseI);
 
 3362  Immediate IncOffset = Immediate::getZero();
 
 3371        C->getSignificantBits() > 64)
 
 3373    IncOffset = Immediate::getScalable(
C->getSExtValue());
 
 
 3389void LSRInstance::GenerateIVChain(
const IVChain &Chain,
 
 3390                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
 
 3393  const IVInc &Head = Chain.Incs[0];
 
 3398  Value *IVSrc = 
nullptr;
 
 3399  while (IVOpIter != IVOpEnd) {
 
 3410    if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
 
 3411        || SE.
getSCEV(IVSrc) == Head.IncExpr) {
 
 3414    IVOpIter = 
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
 
 3416  if (IVOpIter == IVOpEnd) {
 
 3418    LLVM_DEBUG(
dbgs() << 
"Concealed chain head: " << *Head.UserInst << 
"\n");
 
 3421  assert(IVSrc && 
"Failed to find IV chain source");
 
 3426  const SCEV *LeftOverExpr = 
nullptr;
 
 3427  const SCEV *Accum = SE.
getZero(IntTy);
 
 3431  for (
const IVInc &Inc : Chain) {
 
 3434      InsertPt = 
L->getLoopLatch()->getTerminator();
 
 3438    Value *IVOper = IVSrc;
 
 3439    if (!Inc.IncExpr->isZero()) {
 
 3444      LeftOverExpr = LeftOverExpr ?
 
 3445        SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
 
 3449    bool FoundBase = 
false;
 
 3450    for (
auto [MapScev, MapIVOper] : 
reverse(Bases)) {
 
 3451      const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
 
 3453        if (!Remainder->
isZero()) {
 
 3455          Value *IncV = 
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
 
 3456          const SCEV *IVOperExpr =
 
 3458          IVOper = 
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
 
 3467    if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
 
 3470      Value *IncV = 
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
 
 3473      IVOper = 
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
 
 3477        assert(IVTy == IVOper->
getType() && 
"inconsistent IV increment type");
 
 3480        LeftOverExpr = 
nullptr;
 
 3484    if (IVTy != OperTy) {
 
 3486             "cannot extend a chained IV");
 
 3488      IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, 
"lsr.chain");
 
 3490    Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
 
 3497    for (PHINode &Phi : 
L->getHeader()->phis()) {
 
 3501          Phi.getIncomingValueForBlock(
L->getLoopLatch()));
 
 3504      Value *IVOper = IVSrc;
 
 3506      if (IVTy != PostIncTy) {
 
 3508        IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
 
 3509        Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
 
 3510        IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, 
"lsr.chain");
 
 3512      Phi.replaceUsesOfWith(PostIncV, IVOper);
 
 3518void LSRInstance::CollectFixupsAndInitialFormulae() {
 
 3519  BranchInst *ExitBranch = 
nullptr;
 
 3520  bool SaveCmp = 
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
 
 3523  SmallPtrSet<const SCEV *, 16> Regs;
 
 3524  DenseSet<const SCEV *> VisitedRegs;
 
 3525  DenseSet<size_t> VisitedLSRUse;
 
 3527  for (
const IVStrideUse &U : IU) {
 
 3532    assert(UseI != UserInst->
op_end() && 
"cannot find IV operand");
 
 3533    if (IVIncSet.count(UseI)) {
 
 3534      LLVM_DEBUG(
dbgs() << 
"Use is in profitable chain: " << **UseI << 
'\n');
 
 3538    LSRUse::KindType 
Kind = LSRUse::Basic;
 
 3539    MemAccessTy AccessTy;
 
 3541      Kind = LSRUse::Address;
 
 3545    const SCEV *S = IU.getExpr(U);
 
 3561      if (CI->isEquality()) {
 
 3564        Value *
NV = CI->getOperand(1);
 
 3565        if (NV == 
U.getOperandValToReplace()) {
 
 3566          CI->setOperand(1, CI->getOperand(0));
 
 3567          CI->setOperand(0, NV);
 
 3568          NV = CI->getOperand(1);
 
 3575            (!
NV->getType()->isPointerTy() ||
 
 3582          Kind = LSRUse::ICmpZero;
 
 3584        } 
else if (
L->isLoopInvariant(NV) &&
 
 3587                   !
NV->getType()->isPointerTy()) {
 
 3598          Kind = LSRUse::ICmpZero;
 
 3605        for (
size_t i = 0, e = Factors.size(); i != e; ++i)
 
 3606          if (Factors[i] != -1)
 
 3607            Factors.insert(-(uint64_t)Factors[i]);
 
 3613    std::pair<size_t, Immediate> 
P = getUse(S, Kind, AccessTy);
 
 3614    size_t LUIdx = 
P.first;
 
 3616    LSRUse &LU = 
Uses[LUIdx];
 
 3619    LSRFixup &LF = LU.getNewFixup();
 
 3620    LF.UserInst = UserInst;
 
 3621    LF.OperandValToReplace = 
U.getOperandValToReplace();
 
 3622    LF.PostIncLoops = TmpPostIncLoops;
 
 3624    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
 
 3625    LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
 
 3628    if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
 
 3630      F.initialMatch(S, L, SE);
 
 3631      BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
 
 3632                               HardwareLoopProfitable);
 
 3633      VisitedLSRUse.
insert(LUIdx);
 
 3636    if (!LU.WidestFixupType ||
 
 3639      LU.WidestFixupType = LF.OperandValToReplace->
getType();
 
 3642    if (LU.Formulae.empty()) {
 
 3643      InsertInitialFormula(S, LU, LUIdx);
 
 3644      CountRegisters(LU.Formulae.back(), LUIdx);
 
 3653void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
 
 3657    LU.RigidFormula = 
true;
 
 3660  F.initialMatch(S, L, SE);
 
 3661  bool Inserted = InsertFormula(LU, LUIdx, 
F);
 
 3662  assert(Inserted && 
"Initial formula already exists!"); (void)Inserted;
 
 3668LSRInstance::InsertSupplementalFormula(
const SCEV *S,
 
 3669                                       LSRUse &LU, 
size_t LUIdx) {
 
 3671  F.BaseRegs.push_back(S);
 
 3672  F.HasBaseReg = 
true;
 
 3673  bool Inserted = InsertFormula(LU, LUIdx, 
F);
 
 3674  assert(Inserted && 
"Supplemental formula already exists!"); (void)Inserted;
 
 3678void LSRInstance::CountRegisters(
const Formula &
F, 
size_t LUIdx) {
 
 3680    RegUses.countRegister(
F.ScaledReg, LUIdx);
 
 3681  for (
const SCEV *BaseReg : 
F.BaseRegs)
 
 3682    RegUses.countRegister(BaseReg, LUIdx);
 
 3687bool LSRInstance::InsertFormula(LSRUse &LU, 
unsigned LUIdx, 
const Formula &
F) {
 
 3690         "Formula is illegal");
 
 3692  if (!LU.InsertFormula(
F, *L))
 
 3695  CountRegisters(
F, LUIdx);
 
 3701bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
 const {
 
 3713LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
 
 3715  SmallPtrSet<const SCEV *, 32> Visited;
 
 3722  while (!Worklist.
empty()) {
 
 3726    if (!Visited.
insert(S).second)
 
 3737      const Value *
V = US->getValue();
 
 3740        if (
L->contains(Inst)) 
continue;
 
 3744      for (
const Use &U : 
V->uses()) {
 
 3754        if (UserInst->
getParent()->getParent() != 
L->getHeader()->getParent())
 
 3776          bool HasIncompatibleEHPTerminatedBlock = 
false;
 
 3778          for (
unsigned int I = 0; 
I < PhiNode->getNumIncomingValues(); 
I++) {
 
 3779            if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
 
 3780              if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
 
 3781                HasIncompatibleEHPTerminatedBlock = 
true;
 
 3786          if (HasIncompatibleEHPTerminatedBlock) {
 
 3809          unsigned OtherIdx = !
U.getOperandNo();
 
 3810          Value *OtherOp = ICI->getOperand(OtherIdx);
 
 3820        std::pair<size_t, Immediate> 
P =
 
 3821            getUse(S, LSRUse::Basic, MemAccessTy());
 
 3822        size_t LUIdx = 
P.first;
 
 3824        LSRUse &LU = 
Uses[LUIdx];
 
 3825        LSRFixup &LF = LU.getNewFixup();
 
 3826        LF.UserInst = 
const_cast<Instruction *
>(UserInst);
 
 3827        LF.OperandValToReplace = 
U;
 
 3829        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
 
 3830        LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
 
 3831        if (!LU.WidestFixupType ||
 
 3834          LU.WidestFixupType = LF.OperandValToReplace->
getType();
 
 3835        InsertSupplementalFormula(US, LU, LUIdx);
 
 3836        CountRegisters(LU.Formulae.back(), 
Uses.size() - 1);
 
 3852                                   unsigned Depth = 0) {
 
 3859    for (
const SCEV *S : 
Add->operands()) {
 
 3866  const SCEV *Start, *Step;
 
 3871    if (Start->isZero())
 
 3880      Remainder = 
nullptr;
 
 3882    if (Remainder != Start) {
 
 
 3904                              LSRUse &LU, 
const SCEV *S, 
const Loop *L,
 
 3906  if (LU.Kind != LSRUse::Address ||
 
 3907      !LU.AccessTy.getType()->isIntOrIntVectorTy())
 
 3913  if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
 
 
 3922void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, 
unsigned LUIdx,
 
 3923                                             const Formula &
Base,
 
 3924                                             unsigned Depth, 
size_t Idx,
 
 3926  const SCEV *
BaseReg = IsScaledReg ? 
Base.ScaledReg : 
Base.BaseRegs[Idx];
 
 3934  const SCEV *Remainder = 
CollectSubexprs(BaseReg, 
nullptr, AddOps, L, SE);
 
 3938  if (AddOps.
size() == 1)
 
 3952                         LU.AccessTy, *J, 
Base.getNumRegs() > 1))
 
 3957    InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
 
 3961    if (InnerAddOps.size() == 1 &&
 
 3963                         LU.AccessTy, InnerAddOps[0], 
Base.getNumRegs() > 1))
 
 3966    const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
 
 3971    if (
F.UnfoldedOffset.isNonZero() && 
F.UnfoldedOffset.isScalable())
 
 3980          Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
 
 3983        F.ScaledReg = 
nullptr;
 
 3986        F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
 
 3987    } 
else if (IsScaledReg)
 
 3988      F.ScaledReg = InnerSum;
 
 3990      F.BaseRegs[Idx] = InnerSum;
 
 3998          Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
 
 4001      F.BaseRegs.push_back(*J);
 
 4006    if (InsertFormula(LU, LUIdx, 
F))
 
 4013      GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
 
 4019void LSRInstance::GenerateReassociations(LSRUse &LU, 
unsigned LUIdx,
 
 4021  assert(
Base.isCanonical(*L) && 
"Input must be in the canonical form");
 
 4026  for (
size_t i = 0, e = 
Base.BaseRegs.size(); i != e; ++i)
 
 4027    GenerateReassociationsImpl(LU, LUIdx, 
Base, 
Depth, i);
 
 4029  if (
Base.Scale == 1)
 
 4030    GenerateReassociationsImpl(LU, LUIdx, 
Base, 
Depth,
 
 4036void LSRInstance::GenerateCombinations(LSRUse &LU, 
unsigned LUIdx,
 
 4039  if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
 
 4040          (
Base.UnfoldedOffset.isNonZero()) <=
 
 4048  Formula NewBase = 
Base;
 
 4049  NewBase.BaseRegs.clear();
 
 4050  Type *CombinedIntegerType = 
nullptr;
 
 4051  for (
const SCEV *BaseReg : 
Base.BaseRegs) {
 
 4054      if (!CombinedIntegerType)
 
 4056      Ops.push_back(BaseReg);
 
 4059      NewBase.BaseRegs.push_back(BaseReg);
 
 4063  if (
Ops.size() == 0)
 
 4068  auto GenerateFormula = [&](
const SCEV *Sum) {
 
 4069    Formula 
F = NewBase;
 
 4077    F.BaseRegs.push_back(Sum);
 
 4079    (void)InsertFormula(LU, LUIdx, 
F);
 
 4083  if (
Ops.size() > 1) {
 
 4090  if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
 
 4091    assert(CombinedIntegerType && 
"Missing a type for the unfolded offset");
 
 4093                                 NewBase.UnfoldedOffset.getFixedValue(), 
true));
 
 4094    NewBase.UnfoldedOffset = Immediate::getFixed(0);
 
 4100void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, 
unsigned LUIdx,
 
 4101                                              const Formula &
Base, 
size_t Idx,
 
 4103  const SCEV *
G = IsScaledReg ? 
Base.ScaledReg : 
Base.BaseRegs[Idx];
 
 4105  if (
G->isZero() || !GV)
 
 4109  if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 
F))
 
 4114    F.BaseRegs[Idx] = 
G;
 
 4115  (void)InsertFormula(LU, LUIdx, 
F);
 
 4119void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, 
unsigned LUIdx,
 
 4122  if (
Base.BaseGV) 
return;
 
 4124  for (
size_t i = 0, e = 
Base.BaseRegs.size(); i != e; ++i)
 
 4125    GenerateSymbolicOffsetsImpl(LU, LUIdx, 
Base, i);
 
 4126  if (
Base.Scale == 1)
 
 4127    GenerateSymbolicOffsetsImpl(LU, LUIdx, 
Base,  -1,
 
 4132void LSRInstance::GenerateConstantOffsetsImpl(
 
 4133    LSRUse &LU, 
unsigned LUIdx, 
const Formula &
Base,
 
 4134    const SmallVectorImpl<Immediate> &Worklist, 
size_t Idx, 
bool IsScaledReg) {
 
 4136  auto GenerateOffset = [&](
const SCEV *
G, Immediate 
Offset) {
 
 4138    if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
 
 4140    F.BaseOffset = 
Base.BaseOffset.subUnsigned(
Offset);
 
 4142    if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 
F)) {
 
 4144      const SCEV *NewOffset = 
Offset.getSCEV(SE, 
G->getType());
 
 4150          F.ScaledReg = 
nullptr;
 
 4152          F.deleteBaseReg(
F.BaseRegs[Idx]);
 
 4154      } 
else if (IsScaledReg)
 
 4157        F.BaseRegs[Idx] = NewG;
 
 4159      (void)InsertFormula(LU, LUIdx, 
F);
 
 4163  const SCEV *
G = IsScaledReg ? 
Base.ScaledReg : 
Base.BaseRegs[Idx];
 
 4174    const APInt *StepInt;
 
 4179      for (Immediate 
Offset : Worklist) {
 
 4181          Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
 
 4187  for (Immediate 
Offset : Worklist)
 
 4191  if (
G->isZero() || 
Imm.isZero() ||
 
 4192      !
Base.BaseOffset.isCompatibleImmediate(Imm))
 
 4195  F.BaseOffset = 
F.BaseOffset.addUnsigned(Imm);
 
 4196  if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 
F))
 
 4201    F.BaseRegs[Idx] = 
G;
 
 4206  (void)InsertFormula(LU, LUIdx, 
F);
 
 4210void LSRInstance::GenerateConstantOffsets(LSRUse &LU, 
unsigned LUIdx,
 
 4216  if (LU.MaxOffset != LU.MinOffset)
 
 4219  for (
size_t i = 0, e = 
Base.BaseRegs.size(); i != e; ++i)
 
 4220    GenerateConstantOffsetsImpl(LU, LUIdx, 
Base, Worklist, i);
 
 4221  if (
Base.Scale == 1)
 
 4222    GenerateConstantOffsetsImpl(LU, LUIdx, 
Base, Worklist,  -1,
 
 4228void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, 
unsigned LUIdx,
 
 4230  if (LU.Kind != LSRUse::ICmpZero) 
return;
 
 4238  if (LU.MinOffset != LU.MaxOffset) 
return;
 
 4241  if (
Base.ScaledReg && 
Base.ScaledReg->getType()->isPointerTy())
 
 4243  for (
const SCEV *BaseReg : 
Base.BaseRegs)
 
 4244    if (
BaseReg->getType()->isPointerTy())
 
 4246  assert(!
Base.BaseGV && 
"ICmpZero use is not legal!");
 
 4249  for (int64_t Factor : Factors) {
 
 4254    if (
Base.BaseOffset.isMin() && Factor == -1)
 
 4257    if (
Base.BaseOffset.isNonZero() && 
Base.BaseOffset.isScalable())
 
 4259    Immediate NewBaseOffset = 
Base.BaseOffset.mulUnsigned(Factor);
 
 4260    assert(Factor != 0 && 
"Zero factor not expected!");
 
 4261    if (NewBaseOffset.getFixedValue() / Factor !=
 
 4262        Base.BaseOffset.getFixedValue())
 
 4270    Immediate 
Offset = LU.MinOffset;
 
 4271    if (
Offset.isMin() && Factor == -1)
 
 4274    if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
 
 4282    F.BaseOffset = NewBaseOffset;
 
 4289    F.BaseOffset = 
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
 
 4291    const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
 
 4294    for (
size_t i = 0, e = 
F.BaseRegs.size(); i != e; ++i) {
 
 4308    if (
F.UnfoldedOffset.isNonZero()) {
 
 4309      if (
F.UnfoldedOffset.isMin() && Factor == -1)
 
 4311      F.UnfoldedOffset = 
F.UnfoldedOffset.mulUnsigned(Factor);
 
 4312      if (
F.UnfoldedOffset.getFixedValue() / Factor !=
 
 4313          Base.UnfoldedOffset.getFixedValue())
 
 4317                                       IntTy, 
F.UnfoldedOffset.getFixedValue()))
 
 4322    (void)InsertFormula(LU, LUIdx, 
F);
 
 4329void LSRInstance::GenerateScales(LSRUse &LU, 
unsigned LUIdx, Formula 
Base) {
 
 4336  if (
Base.Scale != 0 && !
Base.unscale())
 
 4339  assert(
Base.Scale == 0 && 
"unscale did not did its job!");
 
 4342  for (int64_t Factor : Factors) {
 
 4343    Base.Scale = Factor;
 
 4344    Base.HasBaseReg = 
Base.BaseRegs.size() > 1;
 
 4346    if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
 
 4350      if (LU.Kind == LSRUse::Basic &&
 
 4351          isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
 
 4352                     LU.AccessTy, 
Base) &&
 
 4353          LU.AllFixupsOutsideLoop)
 
 4354        LU.Kind = LSRUse::Special;
 
 4360    if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
 
 4361        Base.BaseOffset.isZero() && !
Base.BaseGV)
 
 4364    for (
size_t i = 0, e = 
Base.BaseRegs.size(); i != e; ++i) {
 
 4366      if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
 
 4367        const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
 
 4372        if (
const SCEV *Quotient = 
getExactSDiv(AR, FactorS, SE, 
true))
 
 4373          if (!Quotient->isZero()) {
 
 4376            F.ScaledReg = Quotient;
 
 4377            F.deleteBaseReg(
F.BaseRegs[i]);
 
 4381            if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
 
 4382                                 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
 
 4386            if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
 
 4388            (void)InsertFormula(LU, LUIdx, 
F);
 
 4404  const SCEV *Result = 
nullptr;
 
 4405  for (
auto &L : 
Loops) {
 
 4409    if (!New || (Result && New != Result))
 
 4414  assert(Result && 
"failed to create expression");
 
 
 4419void LSRInstance::GenerateTruncates(LSRUse &LU, 
unsigned LUIdx, Formula 
Base) {
 
 4421  if (
Base.BaseGV) 
return;
 
 4431  if (
Base.ScaledReg && 
Base.ScaledReg->getType()->isPointerTy())
 
 4434             [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
 
 4438  for (
auto &LF : LU.Fixups)
 
 4439    Loops.push_back(LF.PostIncLoops);
 
 4441  for (
Type *SrcTy : Types) {
 
 4450        const SCEV *NewScaledReg =
 
 4452        if (!NewScaledReg || NewScaledReg->
isZero())
 
 4454        F.ScaledReg = NewScaledReg;
 
 4456      bool HasZeroBaseReg = 
false;
 
 4457      for (
const SCEV *&BaseReg : 
F.BaseRegs) {
 
 4458        const SCEV *NewBaseReg =
 
 4460        if (!NewBaseReg || NewBaseReg->
isZero()) {
 
 4461          HasZeroBaseReg = 
true;
 
 4471      if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
 
 4475      (void)InsertFormula(LU, LUIdx, 
F);
 
 4488  const SCEV *OrigReg;
 
 4490  WorkItem(
size_t LI, Immediate 
I, 
const SCEV *R)
 
 4491      : LUIdx(LI), 
Imm(
I), OrigReg(
R) {}
 
 4493  void print(raw_ostream &OS) 
const;
 
 4499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4500void WorkItem::print(raw_ostream &OS)
 const {
 
 4501  OS << 
"in formulae referencing " << *OrigReg << 
" in use " << LUIdx
 
 4502     << 
" , add offset " << 
Imm;
 
 4512void LSRInstance::GenerateCrossUseConstantOffsets() {
 
 4514  using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
 
 4516  DenseMap<const SCEV *, ImmMapTy> 
Map;
 
 4517  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
 
 4519  for (
const SCEV *Use : RegUses) {
 
 4522    auto Pair = 
Map.try_emplace(
Reg);
 
 4525    Pair.first->second.insert(std::make_pair(Imm, Use));
 
 4526    UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
 
 4533  SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
 
 4535  for (
const SCEV *
Reg : Sequence) {
 
 4536    const ImmMapTy &Imms = 
Map.find(
Reg)->second;
 
 4539    if (Imms.size() == 1)
 
 4543               for (
const auto &Entry
 
 4545               << 
' ' << 
Entry.first;
 
 4549    for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
 
 4551      const SCEV *OrigReg = J->second;
 
 4553      Immediate JImm = J->first;
 
 4554      const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
 
 4557          UsedByIndicesMap[
Reg].
count() == 1) {
 
 4565      Immediate 
First = Imms.begin()->first;
 
 4566      Immediate 
Last = std::prev(Imms.end())->first;
 
 4567      if (!
First.isCompatibleImmediate(
Last)) {
 
 4574      bool Scalable = 
First.isScalable() || 
Last.isScalable();
 
 4575      int64_t FI = 
First.getKnownMinValue();
 
 4576      int64_t LI = 
Last.getKnownMinValue();
 
 4579      int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
 
 4582      Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
 
 4583      ImmMapTy::const_iterator OtherImms[] = {
 
 4584          Imms.begin(), std::prev(Imms.end()),
 
 4585          Imms.lower_bound(Immediate::get(Avg, Scalable))};
 
 4586      for (
const auto &M : OtherImms) {
 
 4587        if (M == J || M == JE) 
continue;
 
 4588        if (!JImm.isCompatibleImmediate(
M->first))
 
 4592        Immediate 
Imm = JImm.subUnsigned(
M->first);
 
 4593        for (
unsigned LUIdx : UsedByIndices.
set_bits())
 
 4595          if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
 
 4596            WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
 
 4603  UsedByIndicesMap.
clear();
 
 4604  UniqueItems.
clear();
 
 4607  for (
const WorkItem &WI : WorkItems) {
 
 4608    size_t LUIdx = WI.LUIdx;
 
 4609    LSRUse &LU = 
Uses[LUIdx];
 
 4610    Immediate 
Imm = WI.Imm;
 
 4611    const SCEV *OrigReg = WI.OrigReg;
 
 4614    const SCEV *NegImmS = 
Imm.getNegativeSCEV(SE, IntTy);
 
 4618    for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
 
 4619      Formula 
F = LU.Formulae[
L];
 
 4626      if (
F.ScaledReg == OrigReg) {
 
 4627        if (!
F.BaseOffset.isCompatibleImmediate(Imm))
 
 4629        Immediate 
Offset = 
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
 
 4631        const SCEV *S = 
Offset.getNegativeSCEV(SE, IntTy);
 
 4632        if (
F.referencesReg(S))
 
 4635        NewF.BaseOffset = 
Offset;
 
 4636        if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
 
 4639        NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
 
 4648          if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
 
 4650          if (
C->getValue()->isNegative() !=
 
 4651                  (NewF.BaseOffset.isLessThanZero()) &&
 
 4652              (
C->getAPInt().abs() * APInt(
BitWidth, 
F.Scale))
 
 4653                  .ule(std::abs(NewF.BaseOffset.getFixedValue())))
 
 4658        NewF.canonicalize(*this->L);
 
 4659        (void)InsertFormula(LU, LUIdx, NewF);
 
 4662        for (
size_t N = 0, NE = 
F.BaseRegs.size(); 
N != NE; ++
N) {
 
 4664          if (BaseReg != OrigReg)
 
 4667          if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
 
 4668              !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
 
 4669              !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
 
 4671          NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
 
 4673                          LU.Kind, LU.AccessTy, NewF)) {
 
 4677            Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
 
 4681            NewF.UnfoldedOffset = NewUnfoldedOffset;
 
 4683          NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
 
 4688          for (
const SCEV *NewReg : NewF.BaseRegs)
 
 4690              if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
 
 4692              if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
 
 4694                      .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
 
 4695                  (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
 
 4698                          NewF.BaseOffset.getFixedValue()))
 
 4703          NewF.canonicalize(*this->L);
 
 4704          (void)InsertFormula(LU, LUIdx, NewF);
 
 4715LSRInstance::GenerateAllReuseFormulae() {
 
 4718  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 4719    LSRUse &LU = 
Uses[LUIdx];
 
 4720    for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
 
 4721      GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
 
 4722    for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
 
 4723      GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
 
 4725  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 4726    LSRUse &LU = 
Uses[LUIdx];
 
 4727    for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
 
 4728      GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
 
 4729    for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
 
 4730      GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
 
 4731    for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
 
 4732      GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
 
 4733    for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
 
 4734      GenerateScales(LU, LUIdx, LU.Formulae[i]);
 
 4736  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 4737    LSRUse &LU = 
Uses[LUIdx];
 
 4738    for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
 
 4739      GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
 
 4742  GenerateCrossUseConstantOffsets();
 
 4745                       "After generating reuse formulae:\n";
 
 4746             print_uses(
dbgs()));
 
 4751void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 
 4752  DenseSet<const SCEV *> VisitedRegs;
 
 4753  SmallPtrSet<const SCEV *, 16> Regs;
 
 4754  SmallPtrSet<const SCEV *, 16> LoserRegs;
 
 4756  bool ChangedFormulae = 
false;
 
 4761  using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>, 
size_t>;
 
 4763  BestFormulaeTy BestFormulae;
 
 4765  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 4766    LSRUse &LU = 
Uses[LUIdx];
 
 4771    for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
 
 4772         FIdx != NumForms; ++FIdx) {
 
 4773      Formula &
F = LU.Formulae[FIdx];
 
 4784      CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
 
 4786      if (CostF.isLoser()) {
 
 4798        for (
const SCEV *
Reg : 
F.BaseRegs) {
 
 4799          if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
 
 4803            RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
 
 4804          Key.push_back(
F.ScaledReg);
 
 4809        std::pair<BestFormulaeTy::const_iterator, bool> 
P =
 
 4810          BestFormulae.insert(std::make_pair(
Key, FIdx));
 
 4814        Formula &Best = LU.Formulae[
P.first->second];
 
 4816        Cost CostBest(L, SE, 
TTI, AMK);
 
 4818        CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
 
 4819                             HardwareLoopProfitable);
 
 4820        if (CostF.isLess(CostBest))
 
 4824                             "    in favor of formula ";
 
 4825                   Best.print(
dbgs()); 
dbgs() << 
'\n');
 
 4828      ChangedFormulae = 
true;
 
 4830      LU.DeleteFormula(
F);
 
 4838      LU.RecomputeRegs(LUIdx, RegUses);
 
 4841    BestFormulae.clear();
 
 4846              "After filtering out undesirable candidates:\n";
 
 4854size_t LSRInstance::EstimateSearchSpaceComplexity()
 const {
 
 4856  for (
const LSRUse &LU : 
Uses) {
 
 4857    size_t FSize = LU.Formulae.size();
 
 4872void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
 
 4876    LLVM_DEBUG(
dbgs() << 
"Narrowing the search space by eliminating formulae " 
 4877                         "which use a superset of registers used by other " 
 4880    for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 4881      LSRUse &LU = 
Uses[LUIdx];
 
 4883      for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
 
 4884        Formula &
F = LU.Formulae[i];
 
 4885        if (
F.BaseOffset.isNonZero() && 
F.BaseOffset.isScalable())
 
 4891             I = 
F.BaseRegs.begin(), 
E = 
F.BaseRegs.end(); 
I != 
E; ++
I) {
 
 4897                Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
 
 4898                                    (uint64_t)
C->getValue()->getSExtValue());
 
 4899            NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
 
 4900                                (
I - 
F.BaseRegs.begin()));
 
 4901            if (LU.HasFormulaWithSameRegs(NewF)) {
 
 4904              LU.DeleteFormula(
F);
 
 4915                NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
 
 4916                                    (
I - 
F.BaseRegs.begin()));
 
 4917                if (LU.HasFormulaWithSameRegs(NewF)) {
 
 4920                  LU.DeleteFormula(
F);
 
 4931        LU.RecomputeRegs(LUIdx, RegUses);
 
 4940void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
 
 4945      dbgs() << 
"The search space is too complex.\n" 
 4946                "Narrowing the search space by assuming that uses separated " 
 4947                "by a constant offset will use the same registers.\n");
 
 4951  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 4952    LSRUse &LU = 
Uses[LUIdx];
 
 4953    for (
const Formula &
F : LU.Formulae) {
 
 4954      if (
F.BaseOffset.isZero() || (
F.Scale != 0 && 
F.Scale != 1))
 
 4957      LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
 
 4961      if (!reconcileNewOffset(*LUThatHas, 
F.BaseOffset,  
false,
 
 4962                              LU.Kind, LU.AccessTy))
 
 4967      LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
 
 4968      LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
 
 4971      for (LSRFixup &
Fixup : LU.Fixups) {
 
 4972        Fixup.Offset += 
F.BaseOffset;
 
 4973        LUThatHas->pushFixup(
Fixup);
 
 4979      for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
 
 4980        Formula &
F = LUThatHas->Formulae[i];
 
 4981        if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
 
 4982                        LUThatHas->Kind, LUThatHas->AccessTy, 
F)) {
 
 4984          LUThatHas->DeleteFormula(
F);
 
 4992        LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
 
 4995      DeleteUse(LU, LUIdx);
 
 5008void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
 
 5012    LLVM_DEBUG(
dbgs() << 
"Narrowing the search space by re-filtering out " 
 5013                         "undesirable dedicated registers.\n");
 
 5015    FilterOutUndesirableDedicatedRegisters();
 
 5030void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
 
 5035      dbgs() << 
"The search space is too complex.\n" 
 5036                "Narrowing the search space by choosing the best Formula " 
 5037                "from the Formulae with the same Scale and ScaledReg.\n");
 
 5040  using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, 
size_t>;
 
 5042  BestFormulaeTy BestFormulae;
 
 5044  bool ChangedFormulae = 
false;
 
 5046  DenseSet<const SCEV *> VisitedRegs;
 
 5047  SmallPtrSet<const SCEV *, 16> Regs;
 
 5049  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 5050    LSRUse &LU = 
Uses[LUIdx];
 
 5055    auto IsBetterThan = [&](Formula &FA, Formula &FB) {
 
 5060      size_t FARegNum = 0;
 
 5061      for (
const SCEV *
Reg : FA.BaseRegs) {
 
 5062        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
 
 5063        FARegNum += (NumUses - UsedByIndices.
count() + 1);
 
 5065      size_t FBRegNum = 0;
 
 5066      for (
const SCEV *
Reg : FB.BaseRegs) {
 
 5067        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
 
 5068        FBRegNum += (NumUses - UsedByIndices.
count() + 1);
 
 5070      if (FARegNum != FBRegNum)
 
 5071        return FARegNum < FBRegNum;
 
 5078      CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
 
 5080      CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
 
 5081      return CostFA.isLess(CostFB);
 
 5085    for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
 
 5087      Formula &
F = LU.Formulae[FIdx];
 
 5090      auto P = BestFormulae.insert({{
F.ScaledReg, 
F.Scale}, FIdx});
 
 5094      Formula &Best = LU.Formulae[
P.first->second];
 
 5095      if (IsBetterThan(
F, Best))
 
 5099                           "    in favor of formula ";
 
 5100                 Best.print(
dbgs()); 
dbgs() << 
'\n');
 
 5102      ChangedFormulae = 
true;
 
 5104      LU.DeleteFormula(
F);
 
 5110      LU.RecomputeRegs(LUIdx, RegUses);
 
 5113    BestFormulae.clear();
 
 5118              "After filtering out undesirable candidates:\n";
 
 5125void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
 
 5132                       "Narrowing the search space by choosing the lowest " 
 5133                       "register Formula for PostInc Uses.\n");
 
 5135  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 5136    LSRUse &LU = 
Uses[LUIdx];
 
 5138    if (LU.Kind != LSRUse::Address)
 
 5144    size_t MinRegs = std::numeric_limits<size_t>::max();
 
 5145    for (
const Formula &
F : LU.Formulae)
 
 5146      MinRegs = std::min(
F.getNumRegs(), MinRegs);
 
 5149    for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
 
 5151      Formula &
F = LU.Formulae[FIdx];
 
 5152      if (
F.getNumRegs() > MinRegs) {
 
 5155        LU.DeleteFormula(
F);
 
 5162      LU.RecomputeRegs(LUIdx, RegUses);
 
 5213void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
 
 5222  SmallPtrSet<const SCEV *, 4> UniqRegs;
 
 5226  DenseMap <const SCEV *, float> RegNumMap;
 
 5227  for (
const SCEV *
Reg : RegUses) {
 
 5231    for (
const LSRUse &LU : 
Uses) {
 
 5232      if (!LU.Regs.count(
Reg))
 
 5234      float P = LU.getNotSelectedProbability(
Reg);
 
 5240    RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
 
 5244      dbgs() << 
"Narrowing the search space by deleting costly formulas\n");
 
 5247  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 5248    LSRUse &LU = 
Uses[LUIdx];
 
 5250    if (LU.Formulae.size() < 2)
 
 5255    float FMinRegNum = LU.Formulae[0].getNumRegs();
 
 5256    float FMinARegNum = LU.Formulae[0].getNumRegs();
 
 5258    for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
 
 5259      Formula &
F = LU.Formulae[i];
 
 5262      for (
const SCEV *BaseReg : 
F.BaseRegs) {
 
 5263        if (UniqRegs.
count(BaseReg))
 
 5265        FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
 
 5268              RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
 
 5270      if (
const SCEV *ScaledReg = 
F.ScaledReg) {
 
 5271        if (!UniqRegs.
count(ScaledReg)) {
 
 5273              RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
 
 5276                RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
 
 5279      if (FMinRegNum > FRegNum ||
 
 5280          (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
 
 5281        FMinRegNum = FRegNum;
 
 5282        FMinARegNum = FARegNum;
 
 5287               dbgs() << 
" with min reg num " << FMinRegNum << 
'\n');
 
 5289      std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
 
 5290    while (LU.Formulae.size() != 1) {
 
 5293      LU.Formulae.pop_back();
 
 5295    LU.RecomputeRegs(LUIdx, RegUses);
 
 5296    assert(LU.Formulae.size() == 1 && 
"Should be exactly 1 min regs formula");
 
 5297    Formula &
F = LU.Formulae[0];
 
 5313                                       MemAccessTy AccessType) {
 
 5323  return TTI.isLegalAddressingMode(
 
 5324             AccessType.MemTy, 
nullptr,
 
 5325             Diff->getSExtValue(),
 
 5326             true, 0, AccessType.AddrSpace) &&
 
 5327         !
TTI.isLegalAddressingMode(
 
 5328             AccessType.MemTy, 
nullptr,
 
 5329             -Diff->getSExtValue(),
 
 5330             true, 0, AccessType.AddrSpace);
 
 
 5336void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
 
 5339  SmallPtrSet<const SCEV *, 4> Taken;
 
 5347    const SCEV *Best = 
nullptr;
 
 5348    unsigned BestNum = 0;
 
 5349    for (
const SCEV *
Reg : RegUses) {
 
 5354        BestNum = RegUses.getUsedByIndices(
Reg).count();
 
 5356        unsigned Count = RegUses.getUsedByIndices(
Reg).count();
 
 5357        if (
Count > BestNum) {
 
 5365        if (
Count == BestNum) {
 
 5366          int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
 
 5367          if (LUIdx >= 0 && 
Uses[LUIdx].Kind == LSRUse::Address &&
 
 5369                                         Uses[LUIdx].AccessTy)) {
 
 5376    assert(Best && 
"Failed to find best LSRUse candidate");
 
 5378    LLVM_DEBUG(
dbgs() << 
"Narrowing the search space by assuming " << *Best
 
 5379                      << 
" will yield profitable reuse.\n");
 
 5384    for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx) {
 
 5385      LSRUse &LU = 
Uses[LUIdx];
 
 5386      if (!LU.Regs.count(Best)) 
continue;
 
 5389      for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
 
 5390        Formula &
F = LU.Formulae[i];
 
 5391        if (!
F.referencesReg(Best)) {
 
 5393          LU.DeleteFormula(
F);
 
 5397          assert(e != 0 && 
"Use has no formulae left! Is Regs inconsistent?");
 
 5403        LU.RecomputeRegs(LUIdx, RegUses);
 
 5414void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
 
 5415  NarrowSearchSpaceByDetectingSupersets();
 
 5416  NarrowSearchSpaceByCollapsingUnrolledCode();
 
 5417  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
 
 5419    NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
 
 5420  NarrowSearchSpaceByFilterPostInc();
 
 5422    NarrowSearchSpaceByDeletingCostlyFormulas();
 
 5424    NarrowSearchSpaceByPickingWinnerRegs();
 
 5428void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
 
 5430                               SmallVectorImpl<const Formula *> &Workspace,
 
 5431                               const Cost &CurCost,
 
 5432                               const SmallPtrSet<const SCEV *, 16> &CurRegs,
 
 5433                               DenseSet<const SCEV *> &VisitedRegs)
 const {
 
 5444  const LSRUse &LU = 
Uses[Workspace.
size()];
 
 5450  SmallSetVector<const SCEV *, 4> ReqRegs;
 
 5451  for (
const SCEV *S : CurRegs)
 
 5452    if (LU.Regs.count(S))
 
 5455  SmallPtrSet<const SCEV *, 16> NewRegs;
 
 5456  Cost NewCost(L, SE, 
TTI, AMK);
 
 5457  for (
const Formula &
F : LU.Formulae) {
 
 5465      int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
 
 5466      for (
const SCEV *
Reg : ReqRegs) {
 
 5467        if ((
F.ScaledReg && 
F.ScaledReg == 
Reg) ||
 
 5470          if (NumReqRegsToFind == 0)
 
 5474      if (NumReqRegsToFind != 0) {
 
 5485    NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
 
 5486    if (NewCost.isLess(SolutionCost)) {
 
 5488      if (Workspace.
size() != 
Uses.size()) {
 
 5489        SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
 
 5490                     NewRegs, VisitedRegs);
 
 5491        if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
 
 5492          VisitedRegs.
insert(
F.ScaledReg ? 
F.ScaledReg : 
F.BaseRegs[0]);
 
 5495                   dbgs() << 
".\nRegs:\n";
 
 5496                   for (
const SCEV *S : NewRegs) 
dbgs()
 
 5497                      << 
"- " << *S << 
"\n";
 
 5500        SolutionCost = NewCost;
 
 5501        Solution = Workspace;
 
 5510void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
 const {
 
 5512  Cost SolutionCost(L, SE, 
TTI, AMK);
 
 5513  SolutionCost.Lose();
 
 5514  Cost CurCost(L, SE, 
TTI, AMK);
 
 5515  SmallPtrSet<const SCEV *, 16> CurRegs;
 
 5516  DenseSet<const SCEV *> VisitedRegs;
 
 5520  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
 
 5521               CurRegs, VisitedRegs);
 
 5522  if (Solution.
empty()) {
 
 5529                       "The chosen solution requires ";
 
 5530             SolutionCost.print(
dbgs()); 
dbgs() << 
":\n";
 
 5531             for (
size_t i = 0, e = 
Uses.size(); i != e; ++i) {
 
 5536               Solution[i]->print(
dbgs());
 
 5542  const bool EnableDropUnprofitableSolution = [&] {
 
 5554  if (BaselineCost.isLess(SolutionCost)) {
 
 5555    if (!EnableDropUnprofitableSolution)
 
 5557          dbgs() << 
"Baseline is more profitable than chosen solution, " 
 5558                    "add option 'lsr-drop-solution' to drop LSR solution.\n");
 
 5561                           "solution, dropping LSR solution.\n";);
 
 5572                                 const SmallVectorImpl<Instruction *> &Inputs)
 
 5576    bool AllDominate = 
true;
 
 5583    for (Instruction *Inst : Inputs) {
 
 5584      if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
 
 5585        AllDominate = 
false;
 
 5590      if (Tentative->
getParent() == Inst->getParent() &&
 
 5591          (!BetterPos || !DT.
dominates(Inst, BetterPos)))
 
 5601    const Loop *IPLoop = LI.getLoopFor(IP->getParent());
 
 5602    unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
 
 5606      if (!Rung) 
return IP;
 
 5607      Rung = Rung->getIDom();
 
 5608      if (!Rung) 
return IP;
 
 5609      IDom = Rung->getBlock();
 
 5612      const Loop *IDomLoop = LI.getLoopFor(IDom);
 
 5613      unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
 
 5614      if (IDomDepth <= IPLoopDepth &&
 
 5615          (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
 
 5632  SmallVector<Instruction *, 4> Inputs;
 
 5635  if (LU.Kind == LSRUse::ICmpZero)
 
 5636    if (Instruction *
I =
 
 5639  if (LF.PostIncLoops.
count(L)) {
 
 5640    if (LF.isUseFullyOutsideLoop(L))
 
 5641      Inputs.
push_back(
L->getLoopLatch()->getTerminator());
 
 5647  for (
const Loop *PIL : LF.PostIncLoops) {
 
 5648    if (PIL == L) 
continue;
 
 5653    if (!ExitingBlocks.
empty()) {
 
 5655      for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
 
 5662         "Insertion point must be a normal instruction");
 
 5672  while (IP->isEHPad()) ++IP;
 
 5677  while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
 
 5685Value *LSRInstance::Expand(
const LSRUse &LU, 
const LSRFixup &LF,
 
 5687                           SmallVectorImpl<WeakTrackingVH> &DeadInsts)
 const {
 
 5688  if (LU.RigidFormula)
 
 5689    return LF.OperandValToReplace;
 
 5693  IP = AdjustInsertPositionForExpand(IP, LF, LU);
 
 5698  Rewriter.setPostInc(LF.PostIncLoops);
 
 5703  Type *Ty = 
F.getType();
 
 5717  for (
const SCEV *
Reg : 
F.BaseRegs) {
 
 5718    assert(!
Reg->isZero() && 
"Zero allocated in a base register!");
 
 5726  Value *ICmpScaledV = 
nullptr;
 
 5728    const SCEV *ScaledS = 
F.ScaledReg;
 
 5734    if (LU.Kind == LSRUse::ICmpZero) {
 
 5744               "The only scale supported by ICmpZero uses is -1!");
 
 5745        ICmpScaledV = 
Rewriter.expandCodeFor(ScaledS, 
nullptr);
 
 5753      if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
 
 5763      Ops.push_back(ScaledS);
 
 5789  assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
 
 5790         "Expanding mismatched offsets\n");
 
 5792  Immediate 
Offset = 
F.BaseOffset.addUnsigned(LF.Offset);
 
 5793  if (
Offset.isNonZero()) {
 
 5794    if (LU.Kind == LSRUse::ICmpZero) {
 
 5799            ConstantInt::get(IntTy, -(uint64_t)
Offset.getFixedValue());
 
 5802        ICmpScaledV = ConstantInt::get(IntTy, 
Offset.getFixedValue());
 
 5807      Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
 
 5812  Immediate UnfoldedOffset = 
F.UnfoldedOffset;
 
 5813  if (UnfoldedOffset.isNonZero()) {
 
 5815    Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
 
 5819  const SCEV *FullS = 
Ops.empty() ?
 
 5830  if (LU.Kind == LSRUse::ICmpZero) {
 
 5834    assert(!
F.BaseGV && 
"ICmp does not support folding a global value and " 
 5835                           "a scale at the same time!");
 
 5836    if (
F.Scale == -1) {
 
 5837      if (ICmpScaledV->
getType() != OpTy) {
 
 5847      assert((
F.Scale == 0 || 
F.Scale == 1) &&
 
 5848             "ICmp does not support folding a global value and " 
 5849             "a scale at the same time!");
 
 5851                                           -(uint64_t)
Offset.getFixedValue());
 
 5852      if (
C->getType() != OpTy) {
 
 5856        assert(
C && 
"Cast of ConstantInt should have folded");
 
 5869void LSRInstance::RewriteForPHI(PHINode *PN, 
const LSRUse &LU,
 
 5870                                const LSRFixup &LF, 
const Formula &
F,
 
 5871                                SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
 
 5872  DenseMap<BasicBlock *, Value *> 
Inserted;
 
 5876      bool needUpdateFixups = 
false;
 
 5887        Loop *PNLoop = LI.getLoopFor(Parent);
 
 5888        if (!PNLoop || Parent != PNLoop->
getHeader()) {
 
 5894                                  CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
 
 5895                                      .setMergeIdenticalEdges()
 
 5896                                      .setKeepOneInputPHIs());
 
 5899            DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
 
 5910            if (
L->contains(BB) && !
L->contains(PN))
 
 5918            needUpdateFixups = 
true;
 
 5923      std::pair<DenseMap<BasicBlock *, Value *>::iterator, 
bool> Pair =
 
 5936              LF.OperandValToReplace->
getType(), 
"tmp",
 
 5943          if (
L->contains(
I) && !
L->contains(BB))
 
 5944            InsertedNonLCSSAInsts.insert(
I);
 
 5947        Pair.first->second = FullV;
 
 5954      if (needUpdateFixups) {
 
 5955        for (LSRUse &LU : 
Uses)
 
 5956          for (LSRFixup &
Fixup : LU.Fixups)
 
 5960            if (
Fixup.UserInst == PN) {
 
 5963              bool foundInOriginalPHI = 
false;
 
 5965                if (val == 
Fixup.OperandValToReplace) {
 
 5966                  foundInOriginalPHI = 
true;
 
 5971              if (foundInOriginalPHI)
 
 5982                    if (val == 
Fixup.OperandValToReplace)
 
 5983                      Fixup.UserInst = NewPN;
 
 5993void LSRInstance::Rewrite(
const LSRUse &LU, 
const LSRFixup &LF,
 
 5995                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
 
 5999    RewriteForPHI(PN, LU, LF, 
F, DeadInsts);
 
 6005    if (FullV->
getType() != OpTy) {
 
 6017    if (LU.Kind == LSRUse::ICmpZero)
 
 6033  if (LU.Kind != LSRUse::Address)
 
 6040  if (IVIncInsertPos->
getParent() == LHeader)
 
 6043  if (!
Fixup.OperandValToReplace ||
 
 6045        Instruction *UI = cast<Instruction>(U);
 
 6046        return UI->getParent() != LHeader;
 
 6051  Type *Ty = 
I->getType();
 
 
 6058void LSRInstance::ImplementSolution(
 
 6059    const SmallVectorImpl<const Formula *> &Solution) {
 
 6065  for (
const IVChain &Chain : IVChainVec) {
 
 6071  for (
size_t LUIdx = 0, NumUses = 
Uses.size(); LUIdx != NumUses; ++LUIdx)
 
 6072    for (
const LSRFixup &
Fixup : 
Uses[LUIdx].Fixups) {
 
 6075              ? 
L->getHeader()->getTerminator()
 
 6077      Rewriter.setIVIncInsertPos(L, InsertPos);
 
 6078      Rewrite(
Uses[LUIdx], 
Fixup, *Solution[LUIdx], DeadInsts);
 
 6082  auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
 
 6085  for (
const IVChain &Chain : IVChainVec) {
 
 6086    GenerateIVChain(Chain, DeadInsts);
 
 6090  for (
const WeakVH &
IV : 
Rewriter.getInsertedIVs())
 
 6108  for (PHINode &PN : 
L->getHeader()->phis()) {
 
 6109    BinaryOperator *BO = 
nullptr;
 
 6115    case Instruction::Sub:
 
 6120    case Instruction::Add:
 
 6137                      [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
 
 6146LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
 
 6147                         DominatorTree &DT, LoopInfo &LI,
 
 6148                         const TargetTransformInfo &
TTI, AssumptionCache &AC,
 
 6149                         TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
 
 6150    : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), 
TTI(
TTI), 
L(
L),
 
 6153                            : 
TTI.getPreferredAddressingMode(
L, &SE)),
 
 6155      BaselineCost(
L, SE, 
TTI, AMK) {
 
 6157  if (!
L->isLoopSimplifyForm())
 
 6165  unsigned NumUsers = 0;
 
 6169      LLVM_DEBUG(
dbgs() << 
"LSR skipping loop, too many IV Users in " << U
 
 6177       auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
 
 6187             L->getHeader()->printAsOperand(
dbgs(), 
false);
 
 6193  HardwareLoopProfitable =
 
 6194      TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
 
 6198#if LLVM_ENABLE_ABI_BREAKING_CHECKS 
 6201  Rewriter.disableCanonicalMode();
 
 6202  Rewriter.enableLSRMode();
 
 6206  OptimizeLoopTermCond();
 
 6209  if (IU.empty()) 
return;
 
 6212  if (!
L->isInnermost()) {
 
 6225  CollectInterestingTypesAndFactors();
 
 6226  CollectFixupsAndInitialFormulae();
 
 6227  CollectLoopInvariantFixupsAndFormulae();
 
 6233             print_uses(
dbgs()));
 
 6235             BaselineCost.print(
dbgs()); 
dbgs() << 
"\n");
 
 6239  GenerateAllReuseFormulae();
 
 6241  FilterOutUndesirableDedicatedRegisters();
 
 6242  NarrowSearchSpaceUsingHeuristics();
 
 6252  if (Solution.
empty())
 
 6257  for (
const LSRUse &LU : 
Uses) {
 
 6258    for (
const Formula &
F : LU.Formulae)
 
 6260                        F) && 
"Illegal formula generated!");
 
 6265  ImplementSolution(Solution);
 
 6268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 6269void LSRInstance::print_factors_and_types(
raw_ostream &OS)
 const {
 
 6270  if (Factors.empty() && 
Types.empty()) 
return;
 
 6272  OS << 
"LSR has identified the following interesting factors and types: ";
 
 6275  for (int64_t Factor : Factors) {
 
 6276    if (!
First) OS << 
", ";
 
 6278    OS << 
'*' << Factor;
 
 6281  for (
Type *Ty : Types) {
 
 6282    if (!
First) OS << 
", ";
 
 6284    OS << 
'(' << *Ty << 
')';
 
 6289void LSRInstance::print_fixups(raw_ostream &OS)
 const {
 
 6290  OS << 
"LSR is examining the following fixup sites:\n";
 
 6291  for (
const LSRUse &LU : 
Uses)
 
 6292    for (
const LSRFixup &LF : LU.Fixups) {
 
 6299void LSRInstance::print_uses(raw_ostream &OS)
 const {
 
 6300  OS << 
"LSR is examining the following uses:\n";
 
 6301  for (
const LSRUse &LU : 
Uses) {
 
 6305    for (
const Formula &
F : LU.Formulae) {
 
 6313void LSRInstance::print(raw_ostream &OS)
 const {
 
 6314  print_factors_and_types(OS);
 
 6326class LoopStrengthReduce : 
public LoopPass {
 
 6330  LoopStrengthReduce();
 
 6333  bool runOnLoop(Loop *L, LPPassManager &LPM) 
override;
 
 6334  void getAnalysisUsage(AnalysisUsage &AU) 
const override;
 
 6339LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
 
 6343void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
 const {
 
 6370ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
 
 6371  llvm::DIExpression::expr_op_iterator Begin =
 
 6372      llvm::DIExpression::expr_op_iterator(Expr.
begin());
 
 6373  llvm::DIExpression::expr_op_iterator End =
 
 6374      llvm::DIExpression::expr_op_iterator(Expr.
end());
 
 6375  return {Begin, End};
 
 6378struct SCEVDbgValueBuilder {
 
 6379  SCEVDbgValueBuilder() = 
default;
 
 6380  SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
 
 6382  void clone(
const SCEVDbgValueBuilder &
Base) {
 
 6383    LocationOps = 
Base.LocationOps;
 
 6388    LocationOps.
clear();
 
 6395  SmallVector<Value *, 2> LocationOps;
 
 6398  void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
 
 6405    unsigned ArgIndex = 0;
 
 6406    if (It != LocationOps.
end()) {
 
 6407      ArgIndex = std::distance(LocationOps.
begin(), It);
 
 6409      ArgIndex = LocationOps.
size();
 
 6415  void pushValue(
const SCEVUnknown *U) {
 
 6420  bool pushConst(
const SCEVConstant *
C) {
 
 6421    if (
C->getAPInt().getSignificantBits() > 64)
 
 6423    Expr.
push_back(llvm::dwarf::DW_OP_consts);
 
 6424    Expr.
push_back(
C->getAPInt().getSExtValue());
 
 6431    return ToDwarfOpIter(Expr);
 
 6436  bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
 
 6439           "Expected arithmetic SCEV type");
 
 6441    unsigned EmitOperator = 0;
 
 6442    for (
const auto &
Op : CommExpr->
operands()) {
 
 6445      if (EmitOperator >= 1)
 
 6446        pushOperator(DwarfOp);
 
 6453  bool pushCast(
const llvm::SCEVCastExpr *
C, 
bool IsSigned) {
 
 6454    const llvm::SCEV *Inner = 
C->getOperand(0);
 
 6455    const llvm::Type *
Type = 
C->getType();
 
 6456    uint64_t ToWidth = 
Type->getIntegerBitWidth();
 
 6457    bool Success = pushSCEV(Inner);
 
 6459                          IsSigned ? llvm::dwarf::DW_ATE_signed
 
 6460                                   : llvm::dwarf::DW_ATE_unsigned};
 
 6461    for (
const auto &
Op : CastOps)
 
 6467  bool pushSCEV(
const llvm::SCEV *S) {
 
 6470      Success &= pushConst(StartInt);
 
 6475      pushLocation(
U->getValue());
 
 6478      Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
 
 6481      Success &= pushSCEV(UDiv->getLHS());
 
 6482      Success &= pushSCEV(UDiv->getRHS());
 
 6483      pushOperator(llvm::dwarf::DW_OP_div);
 
 6489             "Unexpected cast type in SCEV.");
 
 6493      Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
 
 6508  bool isIdentityFunction(uint64_t 
Op, 
const SCEV *S) {
 
 6510      if (
C->getAPInt().getSignificantBits() > 64)
 
 6512      int64_t 
I = 
C->getAPInt().getSExtValue();
 
 6514      case llvm::dwarf::DW_OP_plus:
 
 6515      case llvm::dwarf::DW_OP_minus:
 
 6517      case llvm::dwarf::DW_OP_mul:
 
 6518      case llvm::dwarf::DW_OP_div:
 
 6531  bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
 
 6537    if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
 
 6538      if (!pushSCEV(Stride))
 
 6540      pushOperator(llvm::dwarf::DW_OP_mul);
 
 6542    if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
 
 6543      if (!pushSCEV(Start))
 
 6545      pushOperator(llvm::dwarf::DW_OP_plus);
 
 6551  void createOffsetExpr(int64_t 
Offset, 
Value *OffsetValue) {
 
 6552    pushLocation(OffsetValue);
 
 6555        dbgs() << 
"scev-salvage: Generated IV offset expression. Offset: " 
 6556               << std::to_string(
Offset) << 
"\n");
 
 6562  bool createIterCountExpr(
const SCEV *S,
 
 6563                           const SCEVDbgValueBuilder &IterationCount,
 
 6564                           ScalarEvolution &SE) {
 
 6573    LLVM_DEBUG(
dbgs() << 
"scev-salvage: Location to salvage SCEV: " << *S
 
 6577    if (!Rec->isAffine())
 
 6585    clone(IterationCount);
 
 6586    if (!SCEVToValueExpr(*Rec, SE))
 
 6597  bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
 
 6598                           ScalarEvolution &SE) {
 
 6604    if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
 
 6605      if (!pushSCEV(Start))
 
 6607      pushOperator(llvm::dwarf::DW_OP_minus);
 
 6609    if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
 
 6610      if (!pushSCEV(Stride))
 
 6612      pushOperator(llvm::dwarf::DW_OP_div);
 
 6620  void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
 
 6621                       SmallVectorImpl<Value *> &DestLocations) {
 
 6623           "Expected the locations vector to contain the IV");
 
 6628           "Expected the location ops to contain the IV.");
 
 6632    for (
const auto &
Op : LocationOps) {
 
 6633      auto It = 
find(DestLocations, 
Op);
 
 6634      if (It != DestLocations.
end()) {
 
 6636        DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
 
 6644    for (
const auto &
Op : expr_ops()) {
 
 6646        Op.appendToVector(DestExpr);
 
 6653      uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
 
 6661struct DVIRecoveryRec {
 
 6662  DVIRecoveryRec(DbgVariableRecord *DVR)
 
 6663      : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
 
 6665  DbgVariableRecord *DbgRef;
 
 6667  bool HadLocationArgList;
 
 6673    for (
auto &RE : RecoveryExprs)
 
 6675    RecoveryExprs.clear();
 
 6678  ~DVIRecoveryRec() { clear(); }
 
 6686  auto expr_ops = ToDwarfOpIter(Expr);
 
 6688  for (
auto Op : expr_ops)
 
 
 6697template <
typename T>
 
 6701                                    "contain any DW_OP_llvm_arg operands.");
 
 
 6708template <
typename T>
 
 6713         "Expected expression that references DIArglist locations using " 
 6714         "DW_OP_llvm_arg operands.");
 
 6716  for (
Value *V : Locations)
 
 
 6733  if (NumLLVMArgs == 0) {
 
 6740           "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
 
 
 6770  LLVM_DEBUG(
dbgs() << 
"scev-salvage: restore dbg.value to pre-LSR state\n" 
 6771                    << 
"scev-salvage: post-LSR: " << *DbgVal << 
'\n');
 
 6772  assert(DVIRec.Expr && 
"Expected an expression");
 
 6777  if (!DVIRec.HadLocationArgList) {
 
 6778    assert(DVIRec.LocationOps.size() == 1 &&
 
 6779           "Unexpected number of location ops.");
 
 6783    Value *CachedValue =
 
 6788    for (
WeakVH VH : DVIRec.LocationOps) {
 
 6796  LLVM_DEBUG(
dbgs() << 
"scev-salvage: pre-LSR: " << *DbgVal << 
'\n');
 
 
 6801                       const SCEV *SCEVInductionVar,
 
 6802                       SCEVDbgValueBuilder IterCountExpr) {
 
 6816  LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
 
 6818  NewLocationOps.
push_back(LSRInductionVar);
 
 6820  for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
 
 6821    WeakVH VH = DVIRec.LocationOps[i];
 
 6827      LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
 
 6829                        << 
" now at index " << LocationOpIndexMap[i] << 
"\n");
 
 6837      LLVM_DEBUG(
dbgs() << 
"scev-salvage: SCEV for location at index: " << i
 
 6838                        << 
" refers to a location that is now undef or erased. " 
 6839                           "Salvage abandoned.\n");
 
 6843    LLVM_DEBUG(
dbgs() << 
"scev-salvage: salvaging location at index " << i
 
 6844                      << 
" with SCEV: " << *DVIRec.SCEVs[i] << 
"\n");
 
 6846    DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
 
 6847    SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
 
 6851    if (std::optional<APInt> 
Offset =
 
 6853      if (
Offset->getSignificantBits() <= 64)
 
 6854        SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
 
 6857    } 
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
 
 6866    assert(DVIRec.RecoveryExprs.size() == 1 &&
 
 6867           "Expected only a single recovery expression for an empty " 
 6869    assert(DVIRec.RecoveryExprs[0] &&
 
 6870           "Expected a SCEVDbgSalvageBuilder for location 0");
 
 6871    SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
 
 6872    B->appendToVectors(
NewExpr, NewLocationOps);
 
 6874  for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
 
 6882    SCEVDbgValueBuilder *DbgBuilder =
 
 6883        DVIRec.RecoveryExprs[LocationArgIndex].get();
 
 6889      assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
 
 6890             "Expected a positive index for the location-op position.");
 
 6891      NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
 
 6895    DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
 
 6899  LLVM_DEBUG(
dbgs() << 
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef << 
"\n");
 
 
 6907    SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
 
 6908  if (DVIToUpdate.empty())
 
 6912  assert(SCEVInductionVar &&
 
 6913         "Anticipated a SCEV for the post-LSR induction variable");
 
 6917    if (!IVAddRec->isAffine())
 
 6925    SCEVDbgValueBuilder IterCountExpr;
 
 6926    IterCountExpr.pushLocation(LSRInductionVar);
 
 6927    if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
 
 6930    LLVM_DEBUG(
dbgs() << 
"scev-salvage: IV SCEV: " << *SCEVInductionVar
 
 6933    for (
auto &DVIRec : DVIToUpdate) {
 
 6934      SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
 
 
 6945    SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
 
 6946  for (
const auto &
B : L->getBlocks()) {
 
 6947    for (
auto &
I : *
B) {
 
 6949        if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
 
 6954        if (DbgVal.isKillLocation())
 
 6959        const auto &HasTranslatableLocationOps =
 
 6961          for (
const auto LocOp : DbgValToTranslate.location_ops()) {
 
 6975        if (!HasTranslatableLocationOps(DbgVal))
 
 6978        std::unique_ptr<DVIRecoveryRec> NewRec =
 
 6979            std::make_unique<DVIRecoveryRec>(&DbgVal);
 
 6983        NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
 
 6984        for (
const auto LocOp : DbgVal.location_ops()) {
 
 6985          NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
 
 6986          NewRec->LocationOps.push_back(LocOp);
 
 6987          NewRec->HadLocationArgList = DbgVal.hasArgList();
 
 6989        SalvageableDVISCEVs.push_back(std::move(NewRec));
 
 
 6999                                           const LSRInstance &LSR) {
 
 7001  auto IsSuitableIV = [&](
PHINode *
P) {
 
 7012  for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
 
 7019    if (IsSuitableIV(
P))
 
 7023  for (
PHINode &
P : L.getHeader()->phis()) {
 
 7024    if (IsSuitableIV(&
P))
 
 
 7042  std::unique_ptr<MemorySSAUpdater> MSSAU;
 
 7044    MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
 
 7047  const LSRInstance &Reducer =
 
 7048      LSRInstance(L, IU, SE, DT, LI, 
TTI, AC, TLI, MSSAU.get());
 
 7049  Changed |= Reducer.getChanged();
 
 7055    const DataLayout &
DL = L->getHeader()->getDataLayout();
 
 7057#if LLVM_ENABLE_ABI_BREAKING_CHECKS 
 7060    unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
 
 7074  if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
 
 7076    const DataLayout &
DL = L->getHeader()->getDataLayout();
 
 7089  if (SalvageableDVIRecords.
empty())
 
 7095  for (
const auto &L : LI) {
 
 7099      LLVM_DEBUG(
dbgs() << 
"scev-salvage: SCEV salvaging not possible. An IV " 
 7100                           "could not be identified.\n");
 
 7104  for (
auto &Rec : SalvageableDVIRecords)
 
 7106  SalvageableDVIRecords.
clear();
 
 
 7110bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
 
 7114  auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
 
 7115  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
 
 7116  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
 7117  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
 7118  const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
 
 7119      *
L->getHeader()->getParent());
 
 7120  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
 
 7121      *
L->getHeader()->getParent());
 
 7122  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
 
 7123      *
L->getHeader()->getParent());
 
 7124  auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
 
 7127    MSSA = &MSSAAnalysis->getMSSA();
 
 7144char LoopStrengthReduce::ID = 0;
 
 7147                      "Loop Strength Reduction", 
false, 
false)
 
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.