LLVM  3.7.0
LoopStrengthReduce.cpp
Go to the documentation of this file.
1 //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into forms suitable for efficient execution
12 // on the target.
13 //
14 // This pass performs a strength reduction on array references inside loops that
15 // have as one or more of their components the loop induction variable, it
16 // rewrites expressions to take advantage of scaled-index addressing modes
17 // available on the target, and it performs a variety of other optimizations
18 // related to loop induction variables.
19 //
20 // Terminology note: this code has a lot of handling for "post-increment" or
21 // "post-inc" users. This is not talking about post-increment addressing modes;
22 // it is instead talking about code like this:
23 //
24 // %i = phi [ 0, %entry ], [ %i.next, %latch ]
25 // ...
26 // %i.next = add %i, 1
27 // %c = icmp eq %i.next, %n
28 //
29 // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30 // it's useful to think about these as the same register, with some uses using
31 // the value of the register before the add and some using it after. In this
32 // example, the icmp is a post-increment user, since it uses %i.next, which is
33 // the value of the induction variable after the increment. The other common
34 // case of post-increment users is users outside the loop.
35 //
36 // TODO: More sophistication in the way Formulae are generated and filtered.
37 //
38 // TODO: Handle multiple loops at a time.
39 //
40 // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41 // of a GlobalValue?
42 //
43 // TODO: When truncation is free, truncate ICmp users' operands to make it a
44 // smaller encoding (on x86 at least).
45 //
46 // TODO: When a negated register is used by an add (such as in a list of
47 // multiple base registers, or as the increment expression in an addrec),
48 // we may not actually need both reg and (-1 * reg) in registers; the
49 // negation can be implemented by using a sub instead of an add. The
50 // lack of support for taking this into consideration when making
51 // register pressure decisions is partly worked around by the "Special"
52 // use kind.
53 //
54 //===----------------------------------------------------------------------===//
55 
56 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/ADT/DenseSet.h"
58 #include "llvm/ADT/Hashing.h"
59 #include "llvm/ADT/STLExtras.h"
60 #include "llvm/ADT/SetVector.h"
62 #include "llvm/Analysis/IVUsers.h"
63 #include "llvm/Analysis/LoopPass.h"
66 #include "llvm/IR/Constants.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/Instructions.h"
70 #include "llvm/IR/IntrinsicInst.h"
71 #include "llvm/IR/Module.h"
72 #include "llvm/IR/ValueHandle.h"
74 #include "llvm/Support/Debug.h"
78 #include <algorithm>
79 using namespace llvm;
80 
81 #define DEBUG_TYPE "loop-reduce"
82 
83 /// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
84 /// bail out. This threshold is far beyond the number of users that LSR can
85 /// conceivably solve, so it should not affect generated code, but catches the
86 /// worst cases before LSR burns too much compile time and stack space.
87 static const unsigned MaxIVUsers = 200;
88 
89 // Temporary flag to cleanup congruent phis after LSR phi expansion.
90 // It's currently disabled until we can determine whether it's truly useful or
91 // not. The flag should be removed after the v3.0 release.
92 // This is now needed for ivchains.
94  "enable-lsr-phielim", cl::Hidden, cl::init(true),
95  cl::desc("Enable LSR phi elimination"));
96 
97 #ifndef NDEBUG
98 // Stress test IV chain generation.
100  "stress-ivchain", cl::Hidden, cl::init(false),
101  cl::desc("Stress test LSR IV chains"));
102 #else
103 static bool StressIVChain = false;
104 #endif
105 
106 namespace {
107 
108 /// RegSortData - This class holds data which is used to order reuse candidates.
109 class RegSortData {
110 public:
111  /// UsedByIndices - This represents the set of LSRUse indices which reference
112  /// a particular register.
113  SmallBitVector UsedByIndices;
114 
115  void print(raw_ostream &OS) const;
116  void dump() const;
117 };
118 
119 }
120 
121 void RegSortData::print(raw_ostream &OS) const {
122  OS << "[NumUses=" << UsedByIndices.count() << ']';
123 }
124 
125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
126 void RegSortData::dump() const {
127  print(errs()); errs() << '\n';
128 }
129 #endif
130 
131 namespace {
132 
133 /// RegUseTracker - Map register candidates to information about how they are
134 /// used.
135 class RegUseTracker {
136  typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
137 
138  RegUsesTy RegUsesMap;
140 
141 public:
142  void CountRegister(const SCEV *Reg, size_t LUIdx);
143  void DropRegister(const SCEV *Reg, size_t LUIdx);
144  void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
145 
146  bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
147 
148  const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
149 
150  void clear();
151 
153  typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
154  iterator begin() { return RegSequence.begin(); }
155  iterator end() { return RegSequence.end(); }
156  const_iterator begin() const { return RegSequence.begin(); }
157  const_iterator end() const { return RegSequence.end(); }
158 };
159 
160 }
161 
162 void
163 RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
164  std::pair<RegUsesTy::iterator, bool> Pair =
165  RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
166  RegSortData &RSD = Pair.first->second;
167  if (Pair.second)
168  RegSequence.push_back(Reg);
169  RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
170  RSD.UsedByIndices.set(LUIdx);
171 }
172 
173 void
174 RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
175  RegUsesTy::iterator It = RegUsesMap.find(Reg);
176  assert(It != RegUsesMap.end());
177  RegSortData &RSD = It->second;
178  assert(RSD.UsedByIndices.size() > LUIdx);
179  RSD.UsedByIndices.reset(LUIdx);
180 }
181 
182 void
183 RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
184  assert(LUIdx <= LastLUIdx);
185 
186  // Update RegUses. The data structure is not optimized for this purpose;
187  // we must iterate through it and update each of the bit vectors.
188  for (auto &Pair : RegUsesMap) {
189  SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
190  if (LUIdx < UsedByIndices.size())
191  UsedByIndices[LUIdx] =
192  LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
193  UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
194  }
195 }
196 
197 bool
198 RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
199  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
200  if (I == RegUsesMap.end())
201  return false;
202  const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
203  int i = UsedByIndices.find_first();
204  if (i == -1) return false;
205  if ((size_t)i != LUIdx) return true;
206  return UsedByIndices.find_next(i) != -1;
207 }
208 
209 const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
210  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
211  assert(I != RegUsesMap.end() && "Unknown register!");
212  return I->second.UsedByIndices;
213 }
214 
215 void RegUseTracker::clear() {
216  RegUsesMap.clear();
217  RegSequence.clear();
218 }
219 
220 namespace {
221 
222 /// Formula - This class holds information that describes a formula for
223 /// computing satisfying a use. It may include broken-out immediates and scaled
224 /// registers.
225 struct Formula {
226  /// Global base address used for complex addressing.
227  GlobalValue *BaseGV;
228 
229  /// Base offset for complex addressing.
230  int64_t BaseOffset;
231 
232  /// Whether any complex addressing has a base register.
233  bool HasBaseReg;
234 
235  /// The scale of any complex addressing.
236  int64_t Scale;
237 
238  /// BaseRegs - The list of "base" registers for this use. When this is
239  /// non-empty. The canonical representation of a formula is
240  /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
241  /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
242  /// #1 enforces that the scaled register is always used when at least two
243  /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
244  /// #2 enforces that 1 * reg is reg.
245  /// This invariant can be temporarly broken while building a formula.
246  /// However, every formula inserted into the LSRInstance must be in canonical
247  /// form.
249 
250  /// ScaledReg - The 'scaled' register for this use. This should be non-null
251  /// when Scale is not zero.
252  const SCEV *ScaledReg;
253 
254  /// UnfoldedOffset - An additional constant offset which added near the
255  /// use. This requires a temporary register, but the offset itself can
256  /// live in an add immediate field rather than a register.
257  int64_t UnfoldedOffset;
258 
259  Formula()
260  : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
261  ScaledReg(nullptr), UnfoldedOffset(0) {}
262 
263  void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
264 
265  bool isCanonical() const;
266 
267  void Canonicalize();
268 
269  bool Unscale();
270 
271  size_t getNumRegs() const;
272  Type *getType() const;
273 
274  void DeleteBaseReg(const SCEV *&S);
275 
276  bool referencesReg(const SCEV *S) const;
277  bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
278  const RegUseTracker &RegUses) const;
279 
280  void print(raw_ostream &OS) const;
281  void dump() const;
282 };
283 
284 }
285 
286 /// DoInitialMatch - Recursion helper for InitialMatch.
287 static void DoInitialMatch(const SCEV *S, Loop *L,
290  ScalarEvolution &SE) {
291  // Collect expressions which properly dominate the loop header.
292  if (SE.properlyDominates(S, L->getHeader())) {
293  Good.push_back(S);
294  return;
295  }
296 
297  // Look at add operands.
298  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
299  for (const SCEV *S : Add->operands())
300  DoInitialMatch(S, L, Good, Bad, SE);
301  return;
302  }
303 
304  // Look at addrec operands.
305  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
306  if (!AR->getStart()->isZero()) {
307  DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
308  DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
309  AR->getStepRecurrence(SE),
310  // FIXME: AR->getNoWrapFlags()
311  AR->getLoop(), SCEV::FlagAnyWrap),
312  L, Good, Bad, SE);
313  return;
314  }
315 
316  // Handle a multiplication by -1 (negation) if it didn't fold.
317  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
318  if (Mul->getOperand(0)->isAllOnesValue()) {
319  SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
320  const SCEV *NewMul = SE.getMulExpr(Ops);
321 
324  DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
325  const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
326  SE.getEffectiveSCEVType(NewMul->getType())));
327  for (const SCEV *S : MyGood)
328  Good.push_back(SE.getMulExpr(NegOne, S));
329  for (const SCEV *S : MyBad)
330  Bad.push_back(SE.getMulExpr(NegOne, S));
331  return;
332  }
333 
334  // Ok, we can't do anything interesting. Just stuff the whole thing into a
335  // register and hope for the best.
336  Bad.push_back(S);
337 }
338 
339 /// InitialMatch - Incorporate loop-variant parts of S into this Formula,
340 /// attempting to keep all loop-invariant and loop-computable values in a
341 /// single base register.
342 void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
345  DoInitialMatch(S, L, Good, Bad, SE);
346  if (!Good.empty()) {
347  const SCEV *Sum = SE.getAddExpr(Good);
348  if (!Sum->isZero())
349  BaseRegs.push_back(Sum);
350  HasBaseReg = true;
351  }
352  if (!Bad.empty()) {
353  const SCEV *Sum = SE.getAddExpr(Bad);
354  if (!Sum->isZero())
355  BaseRegs.push_back(Sum);
356  HasBaseReg = true;
357  }
358  Canonicalize();
359 }
360 
361 /// \brief Check whether or not this formula statisfies the canonical
362 /// representation.
363 /// \see Formula::BaseRegs.
364 bool Formula::isCanonical() const {
365  if (ScaledReg)
366  return Scale != 1 || !BaseRegs.empty();
367  return BaseRegs.size() <= 1;
368 }
369 
370 /// \brief Helper method to morph a formula into its canonical representation.
371 /// \see Formula::BaseRegs.
372 /// Every formula having more than one base register, must use the ScaledReg
373 /// field. Otherwise, we would have to do special cases everywhere in LSR
374 /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
375 /// On the other hand, 1*reg should be canonicalized into reg.
376 void Formula::Canonicalize() {
377  if (isCanonical())
378  return;
379  // So far we did not need this case. This is easy to implement but it is
380  // useless to maintain dead code. Beside it could hurt compile time.
381  assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
382  // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
383  ScaledReg = BaseRegs.back();
384  BaseRegs.pop_back();
385  Scale = 1;
386  size_t BaseRegsSize = BaseRegs.size();
387  size_t Try = 0;
388  // If ScaledReg is an invariant, try to find a variant expression.
389  while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
390  std::swap(ScaledReg, BaseRegs[Try++]);
391 }
392 
393 /// \brief Get rid of the scale in the formula.
394 /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
395 /// \return true if it was possible to get rid of the scale, false otherwise.
396 /// \note After this operation the formula may not be in the canonical form.
397 bool Formula::Unscale() {
398  if (Scale != 1)
399  return false;
400  Scale = 0;
401  BaseRegs.push_back(ScaledReg);
402  ScaledReg = nullptr;
403  return true;
404 }
405 
406 /// getNumRegs - Return the total number of register operands used by this
407 /// formula. This does not include register uses implied by non-constant
408 /// addrec strides.
409 size_t Formula::getNumRegs() const {
410  return !!ScaledReg + BaseRegs.size();
411 }
412 
413 /// getType - Return the type of this formula, if it has one, or null
414 /// otherwise. This type is meaningless except for the bit size.
415 Type *Formula::getType() const {
416  return !BaseRegs.empty() ? BaseRegs.front()->getType() :
417  ScaledReg ? ScaledReg->getType() :
418  BaseGV ? BaseGV->getType() :
419  nullptr;
420 }
421 
422 /// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
423 void Formula::DeleteBaseReg(const SCEV *&S) {
424  if (&S != &BaseRegs.back())
425  std::swap(S, BaseRegs.back());
426  BaseRegs.pop_back();
427 }
428 
429 /// referencesReg - Test if this formula references the given register.
430 bool Formula::referencesReg(const SCEV *S) const {
431  return S == ScaledReg ||
432  std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
433 }
434 
435 /// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
436 /// which are used by uses other than the use with the given index.
437 bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
438  const RegUseTracker &RegUses) const {
439  if (ScaledReg)
440  if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
441  return true;
442  for (const SCEV *BaseReg : BaseRegs)
443  if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
444  return true;
445  return false;
446 }
447 
448 void Formula::print(raw_ostream &OS) const {
449  bool First = true;
450  if (BaseGV) {
451  if (!First) OS << " + "; else First = false;
452  BaseGV->printAsOperand(OS, /*PrintType=*/false);
453  }
454  if (BaseOffset != 0) {
455  if (!First) OS << " + "; else First = false;
456  OS << BaseOffset;
457  }
458  for (const SCEV *BaseReg : BaseRegs) {
459  if (!First) OS << " + "; else First = false;
460  OS << "reg(" << *BaseReg << ')';
461  }
462  if (HasBaseReg && BaseRegs.empty()) {
463  if (!First) OS << " + "; else First = false;
464  OS << "**error: HasBaseReg**";
465  } else if (!HasBaseReg && !BaseRegs.empty()) {
466  if (!First) OS << " + "; else First = false;
467  OS << "**error: !HasBaseReg**";
468  }
469  if (Scale != 0) {
470  if (!First) OS << " + "; else First = false;
471  OS << Scale << "*reg(";
472  if (ScaledReg)
473  OS << *ScaledReg;
474  else
475  OS << "<unknown>";
476  OS << ')';
477  }
478  if (UnfoldedOffset != 0) {
479  if (!First) OS << " + ";
480  OS << "imm(" << UnfoldedOffset << ')';
481  }
482 }
483 
484 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
485 void Formula::dump() const {
486  print(errs()); errs() << '\n';
487 }
488 #endif
489 
490 /// isAddRecSExtable - Return true if the given addrec can be sign-extended
491 /// without changing its value.
492 static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
493  Type *WideTy =
495  return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
496 }
497 
498 /// isAddSExtable - Return true if the given add can be sign-extended
499 /// without changing its value.
500 static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
501  Type *WideTy =
503  return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
504 }
505 
506 /// isMulSExtable - Return true if the given mul can be sign-extended
507 /// without changing its value.
508 static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
509  Type *WideTy =
511  SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
512  return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
513 }
514 
515 /// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
516 /// and if the remainder is known to be zero, or null otherwise. If
517 /// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
518 /// to Y, ignoring that the multiplication may overflow, which is useful when
519 /// the result will be used in a context where the most significant bits are
520 /// ignored.
521 static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
522  ScalarEvolution &SE,
523  bool IgnoreSignificantBits = false) {
524  // Handle the trivial case, which works for any SCEV type.
525  if (LHS == RHS)
526  return SE.getConstant(LHS->getType(), 1);
527 
528  // Handle a few RHS special cases.
529  const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
530  if (RC) {
531  const APInt &RA = RC->getValue()->getValue();
532  // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
533  // some folding.
534  if (RA.isAllOnesValue())
535  return SE.getMulExpr(LHS, RC);
536  // Handle x /s 1 as x.
537  if (RA == 1)
538  return LHS;
539  }
540 
541  // Check for a division of a constant by a constant.
542  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
543  if (!RC)
544  return nullptr;
545  const APInt &LA = C->getValue()->getValue();
546  const APInt &RA = RC->getValue()->getValue();
547  if (LA.srem(RA) != 0)
548  return nullptr;
549  return SE.getConstant(LA.sdiv(RA));
550  }
551 
552  // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
553  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
554  if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
555  const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
556  IgnoreSignificantBits);
557  if (!Step) return nullptr;
558  const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
559  IgnoreSignificantBits);
560  if (!Start) return nullptr;
561  // FlagNW is independent of the start value, step direction, and is
562  // preserved with smaller magnitude steps.
563  // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
564  return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
565  }
566  return nullptr;
567  }
568 
569  // Distribute the sdiv over add operands, if the add doesn't overflow.
570  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
571  if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
573  for (const SCEV *S : Add->operands()) {
574  const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
575  if (!Op) return nullptr;
576  Ops.push_back(Op);
577  }
578  return SE.getAddExpr(Ops);
579  }
580  return nullptr;
581  }
582 
583  // Check for a multiply operand that we can pull RHS out of.
584  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
585  if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
587  bool Found = false;
588  for (const SCEV *S : Mul->operands()) {
589  if (!Found)
590  if (const SCEV *Q = getExactSDiv(S, RHS, SE,
591  IgnoreSignificantBits)) {
592  S = Q;
593  Found = true;
594  }
595  Ops.push_back(S);
596  }
597  return Found ? SE.getMulExpr(Ops) : nullptr;
598  }
599  return nullptr;
600  }
601 
602  // Otherwise we don't know.
603  return nullptr;
604 }
605 
606 /// ExtractImmediate - If S involves the addition of a constant integer value,
607 /// return that integer value, and mutate S to point to a new SCEV with that
608 /// value excluded.
609 static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
610  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
611  if (C->getValue()->getValue().getMinSignedBits() <= 64) {
612  S = SE.getConstant(C->getType(), 0);
613  return C->getValue()->getSExtValue();
614  }
615  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
616  SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
617  int64_t Result = ExtractImmediate(NewOps.front(), SE);
618  if (Result != 0)
619  S = SE.getAddExpr(NewOps);
620  return Result;
621  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
622  SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
623  int64_t Result = ExtractImmediate(NewOps.front(), SE);
624  if (Result != 0)
625  S = SE.getAddRecExpr(NewOps, AR->getLoop(),
626  // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
628  return Result;
629  }
630  return 0;
631 }
632 
633 /// ExtractSymbol - If S involves the addition of a GlobalValue address,
634 /// return that symbol, and mutate S to point to a new SCEV with that
635 /// value excluded.
637  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
638  if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
639  S = SE.getConstant(GV->getType(), 0);
640  return GV;
641  }
642  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
643  SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
644  GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
645  if (Result)
646  S = SE.getAddExpr(NewOps);
647  return Result;
648  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
649  SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
650  GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
651  if (Result)
652  S = SE.getAddRecExpr(NewOps, AR->getLoop(),
653  // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
655  return Result;
656  }
657  return nullptr;
658 }
659 
660 /// isAddressUse - Returns true if the specified instruction is using the
661 /// specified value as an address.
662 static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
663  bool isAddress = isa<LoadInst>(Inst);
664  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
665  if (SI->getOperand(1) == OperandVal)
666  isAddress = true;
667  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
668  // Addressing modes can also be folded into prefetches and a variety
669  // of intrinsics.
670  switch (II->getIntrinsicID()) {
671  default: break;
672  case Intrinsic::prefetch:
673  case Intrinsic::x86_sse_storeu_ps:
674  case Intrinsic::x86_sse2_storeu_pd:
675  case Intrinsic::x86_sse2_storeu_dq:
676  case Intrinsic::x86_sse2_storel_dq:
677  if (II->getArgOperand(0) == OperandVal)
678  isAddress = true;
679  break;
680  }
681  }
682  return isAddress;
683 }
684 
685 /// getAccessType - Return the type of the memory being accessed.
686 static Type *getAccessType(const Instruction *Inst) {
687  Type *AccessTy = Inst->getType();
688  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
689  AccessTy = SI->getOperand(0)->getType();
690  else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
691  // Addressing modes can also be folded into prefetches and a variety
692  // of intrinsics.
693  switch (II->getIntrinsicID()) {
694  default: break;
695  case Intrinsic::x86_sse_storeu_ps:
696  case Intrinsic::x86_sse2_storeu_pd:
697  case Intrinsic::x86_sse2_storeu_dq:
698  case Intrinsic::x86_sse2_storel_dq:
699  AccessTy = II->getArgOperand(0)->getType();
700  break;
701  }
702  }
703 
704  // All pointers have the same requirements, so canonicalize them to an
705  // arbitrary pointer type to minimize variation.
706  if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
707  AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
708  PTy->getAddressSpace());
709 
710  return AccessTy;
711 }
712 
713 /// isExistingPhi - Return true if this AddRec is already a phi in its loop.
714 static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
715  for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
716  PHINode *PN = dyn_cast<PHINode>(I); ++I) {
717  if (SE.isSCEVable(PN->getType()) &&
718  (SE.getEffectiveSCEVType(PN->getType()) ==
719  SE.getEffectiveSCEVType(AR->getType())) &&
720  SE.getSCEV(PN) == AR)
721  return true;
722  }
723  return false;
724 }
725 
726 /// Check if expanding this expression is likely to incur significant cost. This
727 /// is tricky because SCEV doesn't track which expressions are actually computed
728 /// by the current IR.
729 ///
730 /// We currently allow expansion of IV increments that involve adds,
731 /// multiplication by constants, and AddRecs from existing phis.
732 ///
733 /// TODO: Allow UDivExpr if we can find an existing IV increment that is an
734 /// obvious multiple of the UDivExpr.
735 static bool isHighCostExpansion(const SCEV *S,
736  SmallPtrSetImpl<const SCEV*> &Processed,
737  ScalarEvolution &SE) {
738  // Zero/One operand expressions
739  switch (S->getSCEVType()) {
740  case scUnknown:
741  case scConstant:
742  return false;
743  case scTruncate:
744  return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
745  Processed, SE);
746  case scZeroExtend:
747  return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
748  Processed, SE);
749  case scSignExtend:
750  return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
751  Processed, SE);
752  }
753 
754  if (!Processed.insert(S).second)
755  return false;
756 
757  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
758  for (const SCEV *S : Add->operands()) {
759  if (isHighCostExpansion(S, Processed, SE))
760  return true;
761  }
762  return false;
763  }
764 
765  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
766  if (Mul->getNumOperands() == 2) {
767  // Multiplication by a constant is ok
768  if (isa<SCEVConstant>(Mul->getOperand(0)))
769  return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
770 
771  // If we have the value of one operand, check if an existing
772  // multiplication already generates this expression.
773  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
774  Value *UVal = U->getValue();
775  for (User *UR : UVal->users()) {
776  // If U is a constant, it may be used by a ConstantExpr.
777  Instruction *UI = dyn_cast<Instruction>(UR);
778  if (UI && UI->getOpcode() == Instruction::Mul &&
779  SE.isSCEVable(UI->getType())) {
780  return SE.getSCEV(UI) == Mul;
781  }
782  }
783  }
784  }
785  }
786 
787  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
788  if (isExistingPhi(AR, SE))
789  return false;
790  }
791 
792  // Fow now, consider any other type of expression (div/mul/min/max) high cost.
793  return true;
794 }
795 
796 /// DeleteTriviallyDeadInstructions - If any of the instructions is the
797 /// specified set are trivially dead, delete them and see if this makes any of
798 /// their operands subsequently dead.
799 static bool
801  bool Changed = false;
802 
803  while (!DeadInsts.empty()) {
804  Value *V = DeadInsts.pop_back_val();
805  Instruction *I = dyn_cast_or_null<Instruction>(V);
806 
807  if (!I || !isInstructionTriviallyDead(I))
808  continue;
809 
810  for (Use &O : I->operands())
811  if (Instruction *U = dyn_cast<Instruction>(O)) {
812  O = nullptr;
813  if (U->use_empty())
814  DeadInsts.emplace_back(U);
815  }
816 
817  I->eraseFromParent();
818  Changed = true;
819  }
820 
821  return Changed;
822 }
823 
824 namespace {
825 class LSRUse;
826 }
827 
828 /// \brief Check if the addressing mode defined by \p F is completely
829 /// folded in \p LU at isel time.
830 /// This includes address-mode folding and special icmp tricks.
831 /// This function returns true if \p LU can accommodate what \p F
832 /// defines and up to 1 base + 1 scaled + offset.
833 /// In other words, if \p F has several base registers, this function may
834 /// still return true. Therefore, users still need to account for
835 /// additional base registers and/or unfolded offsets to derive an
836 /// accurate cost model.
837 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
838  const LSRUse &LU, const Formula &F);
839 // Get the cost of the scaling factor used in F for LU.
840 static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
841  const LSRUse &LU, const Formula &F);
842 
843 namespace {
844 
845 /// Cost - This class is used to measure and compare candidate formulae.
846 class Cost {
847  /// TODO: Some of these could be merged. Also, a lexical ordering
848  /// isn't always optimal.
849  unsigned NumRegs;
850  unsigned AddRecCost;
851  unsigned NumIVMuls;
852  unsigned NumBaseAdds;
853  unsigned ImmCost;
854  unsigned SetupCost;
855  unsigned ScaleCost;
856 
857 public:
858  Cost()
859  : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
860  SetupCost(0), ScaleCost(0) {}
861 
862  bool operator<(const Cost &Other) const;
863 
864  void Lose();
865 
866 #ifndef NDEBUG
867  // Once any of the metrics loses, they must all remain losers.
868  bool isValid() {
869  return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
870  | ImmCost | SetupCost | ScaleCost) != ~0u)
871  || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
872  & ImmCost & SetupCost & ScaleCost) == ~0u);
873  }
874 #endif
875 
876  bool isLoser() {
877  assert(isValid() && "invalid cost");
878  return NumRegs == ~0u;
879  }
880 
881  void RateFormula(const TargetTransformInfo &TTI,
882  const Formula &F,
884  const DenseSet<const SCEV *> &VisitedRegs,
885  const Loop *L,
888  const LSRUse &LU,
889  SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
890 
891  void print(raw_ostream &OS) const;
892  void dump() const;
893 
894 private:
895  void RateRegister(const SCEV *Reg,
897  const Loop *L,
898  ScalarEvolution &SE, DominatorTree &DT);
899  void RatePrimaryRegister(const SCEV *Reg,
901  const Loop *L,
903  SmallPtrSetImpl<const SCEV *> *LoserRegs);
904 };
905 
906 }
907 
908 /// RateRegister - Tally up interesting quantities from the given register.
909 void Cost::RateRegister(const SCEV *Reg,
911  const Loop *L,
912  ScalarEvolution &SE, DominatorTree &DT) {
913  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
914  // If this is an addrec for another loop, don't second-guess its addrec phi
915  // nodes. LSR isn't currently smart enough to reason about more than one
916  // loop at a time. LSR has already run on inner loops, will not run on outer
917  // loops, and cannot be expected to change sibling loops.
918  if (AR->getLoop() != L) {
919  // If the AddRec exists, consider it's register free and leave it alone.
920  if (isExistingPhi(AR, SE))
921  return;
922 
923  // Otherwise, do not consider this formula at all.
924  Lose();
925  return;
926  }
927  AddRecCost += 1; /// TODO: This should be a function of the stride.
928 
929  // Add the step value register, if it needs one.
930  // TODO: The non-affine case isn't precisely modeled here.
931  if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
932  if (!Regs.count(AR->getOperand(1))) {
933  RateRegister(AR->getOperand(1), Regs, L, SE, DT);
934  if (isLoser())
935  return;
936  }
937  }
938  }
939  ++NumRegs;
940 
941  // Rough heuristic; favor registers which don't require extra setup
942  // instructions in the preheader.
943  if (!isa<SCEVUnknown>(Reg) &&
944  !isa<SCEVConstant>(Reg) &&
945  !(isa<SCEVAddRecExpr>(Reg) &&
946  (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
947  isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
948  ++SetupCost;
949 
950  NumIVMuls += isa<SCEVMulExpr>(Reg) &&
951  SE.hasComputableLoopEvolution(Reg, L);
952 }
953 
954 /// RatePrimaryRegister - Record this register in the set. If we haven't seen it
955 /// before, rate it. Optional LoserRegs provides a way to declare any formula
956 /// that refers to one of those regs an instant loser.
957 void Cost::RatePrimaryRegister(const SCEV *Reg,
959  const Loop *L,
961  SmallPtrSetImpl<const SCEV *> *LoserRegs) {
962  if (LoserRegs && LoserRegs->count(Reg)) {
963  Lose();
964  return;
965  }
966  if (Regs.insert(Reg).second) {
967  RateRegister(Reg, Regs, L, SE, DT);
968  if (LoserRegs && isLoser())
969  LoserRegs->insert(Reg);
970  }
971 }
972 
973 void Cost::RateFormula(const TargetTransformInfo &TTI,
974  const Formula &F,
976  const DenseSet<const SCEV *> &VisitedRegs,
977  const Loop *L,
980  const LSRUse &LU,
981  SmallPtrSetImpl<const SCEV *> *LoserRegs) {
982  assert(F.isCanonical() && "Cost is accurate only for canonical formula");
983  // Tally up the registers.
984  if (const SCEV *ScaledReg = F.ScaledReg) {
985  if (VisitedRegs.count(ScaledReg)) {
986  Lose();
987  return;
988  }
989  RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
990  if (isLoser())
991  return;
992  }
993  for (const SCEV *BaseReg : F.BaseRegs) {
994  if (VisitedRegs.count(BaseReg)) {
995  Lose();
996  return;
997  }
998  RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
999  if (isLoser())
1000  return;
1001  }
1002 
1003  // Determine how many (unfolded) adds we'll need inside the loop.
1004  size_t NumBaseParts = F.getNumRegs();
1005  if (NumBaseParts > 1)
1006  // Do not count the base and a possible second register if the target
1007  // allows to fold 2 registers.
1008  NumBaseAdds +=
1009  NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1010  NumBaseAdds += (F.UnfoldedOffset != 0);
1011 
1012  // Accumulate non-free scaling amounts.
1013  ScaleCost += getScalingFactorCost(TTI, LU, F);
1014 
1015  // Tally up the non-zero immediates.
1016  for (int64_t O : Offsets) {
1017  int64_t Offset = (uint64_t)O + F.BaseOffset;
1018  if (F.BaseGV)
1019  ImmCost += 64; // Handle symbolic values conservatively.
1020  // TODO: This should probably be the pointer size.
1021  else if (Offset != 0)
1022  ImmCost += APInt(64, Offset, true).getMinSignedBits();
1023  }
1024  assert(isValid() && "invalid cost");
1025 }
1026 
1027 /// Lose - Set this cost to a losing value.
1028 void Cost::Lose() {
1029  NumRegs = ~0u;
1030  AddRecCost = ~0u;
1031  NumIVMuls = ~0u;
1032  NumBaseAdds = ~0u;
1033  ImmCost = ~0u;
1034  SetupCost = ~0u;
1035  ScaleCost = ~0u;
1036 }
1037 
1038 /// operator< - Choose the lower cost.
1039 bool Cost::operator<(const Cost &Other) const {
1040  return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
1041  ImmCost, SetupCost) <
1042  std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
1043  Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
1044  Other.SetupCost);
1045 }
1046 
1047 void Cost::print(raw_ostream &OS) const {
1048  OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1049  if (AddRecCost != 0)
1050  OS << ", with addrec cost " << AddRecCost;
1051  if (NumIVMuls != 0)
1052  OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1053  if (NumBaseAdds != 0)
1054  OS << ", plus " << NumBaseAdds << " base add"
1055  << (NumBaseAdds == 1 ? "" : "s");
1056  if (ScaleCost != 0)
1057  OS << ", plus " << ScaleCost << " scale cost";
1058  if (ImmCost != 0)
1059  OS << ", plus " << ImmCost << " imm cost";
1060  if (SetupCost != 0)
1061  OS << ", plus " << SetupCost << " setup cost";
1062 }
1063 
1064 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1065 void Cost::dump() const {
1066  print(errs()); errs() << '\n';
1067 }
1068 #endif
1069 
1070 namespace {
1071 
1072 /// LSRFixup - An operand value in an instruction which is to be replaced
1073 /// with some equivalent, possibly strength-reduced, replacement.
1074 struct LSRFixup {
1075  /// UserInst - The instruction which will be updated.
1076  Instruction *UserInst;
1077 
1078  /// OperandValToReplace - The operand of the instruction which will
1079  /// be replaced. The operand may be used more than once; every instance
1080  /// will be replaced.
1081  Value *OperandValToReplace;
1082 
1083  /// PostIncLoops - If this user is to use the post-incremented value of an
1084  /// induction variable, this variable is non-null and holds the loop
1085  /// associated with the induction variable.
1086  PostIncLoopSet PostIncLoops;
1087 
1088  /// LUIdx - The index of the LSRUse describing the expression which
1089  /// this fixup needs, minus an offset (below).
1090  size_t LUIdx;
1091 
1092  /// Offset - A constant offset to be added to the LSRUse expression.
1093  /// This allows multiple fixups to share the same LSRUse with different
1094  /// offsets, for example in an unrolled loop.
1095  int64_t Offset;
1096 
1097  bool isUseFullyOutsideLoop(const Loop *L) const;
1098 
1099  LSRFixup();
1100 
1101  void print(raw_ostream &OS) const;
1102  void dump() const;
1103 };
1104 
1105 }
1106 
1107 LSRFixup::LSRFixup()
1108  : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
1109  Offset(0) {}
1110 
1111 /// isUseFullyOutsideLoop - Test whether this fixup always uses its
1112 /// value outside of the given loop.
1113 bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1114  // PHI nodes use their value in their incoming blocks.
1115  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1116  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1117  if (PN->getIncomingValue(i) == OperandValToReplace &&
1118  L->contains(PN->getIncomingBlock(i)))
1119  return false;
1120  return true;
1121  }
1122 
1123  return !L->contains(UserInst);
1124 }
1125 
1126 void LSRFixup::print(raw_ostream &OS) const {
1127  OS << "UserInst=";
1128  // Store is common and interesting enough to be worth special-casing.
1129  if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1130  OS << "store ";
1131  Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
1132  } else if (UserInst->getType()->isVoidTy())
1133  OS << UserInst->getOpcodeName();
1134  else
1135  UserInst->printAsOperand(OS, /*PrintType=*/false);
1136 
1137  OS << ", OperandValToReplace=";
1138  OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
1139 
1140  for (const Loop *PIL : PostIncLoops) {
1141  OS << ", PostIncLoop=";
1142  PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
1143  }
1144 
1145  if (LUIdx != ~size_t(0))
1146  OS << ", LUIdx=" << LUIdx;
1147 
1148  if (Offset != 0)
1149  OS << ", Offset=" << Offset;
1150 }
1151 
1152 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1153 void LSRFixup::dump() const {
1154  print(errs()); errs() << '\n';
1155 }
1156 #endif
1157 
1158 namespace {
1159 
1160 /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
1161 /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
1162 struct UniquifierDenseMapInfo {
1163  static SmallVector<const SCEV *, 4> getEmptyKey() {
1165  V.push_back(reinterpret_cast<const SCEV *>(-1));
1166  return V;
1167  }
1168 
1169  static SmallVector<const SCEV *, 4> getTombstoneKey() {
1171  V.push_back(reinterpret_cast<const SCEV *>(-2));
1172  return V;
1173  }
1174 
1175  static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
1176  return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1177  }
1178 
1179  static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
1180  const SmallVector<const SCEV *, 4> &RHS) {
1181  return LHS == RHS;
1182  }
1183 };
1184 
1185 /// LSRUse - This class holds the state that LSR keeps for each use in
1186 /// IVUsers, as well as uses invented by LSR itself. It includes information
1187 /// about what kinds of things can be folded into the user, information about
1188 /// the user itself, and information about how the use may be satisfied.
1189 /// TODO: Represent multiple users of the same expression in common?
1190 class LSRUse {
1191  DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
1192 
1193 public:
1194  /// KindType - An enum for a kind of use, indicating what types of
1195  /// scaled and immediate operands it might support.
1196  enum KindType {
1197  Basic, ///< A normal use, with no folding.
1198  Special, ///< A special case of basic, allowing -1 scales.
1199  Address, ///< An address use; folding according to TargetLowering
1200  ICmpZero ///< An equality icmp with both operands folded into one.
1201  // TODO: Add a generic icmp too?
1202  };
1203 
1204  typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
1205 
1206  KindType Kind;
1207  Type *AccessTy;
1208 
1210  int64_t MinOffset;
1211  int64_t MaxOffset;
1212 
1213  /// AllFixupsOutsideLoop - This records whether all of the fixups using this
1214  /// LSRUse are outside of the loop, in which case some special-case heuristics
1215  /// may be used.
1216  bool AllFixupsOutsideLoop;
1217 
1218  /// RigidFormula is set to true to guarantee that this use will be associated
1219  /// with a single formula--the one that initially matched. Some SCEV
1220  /// expressions cannot be expanded. This allows LSR to consider the registers
1221  /// used by those expressions without the need to expand them later after
1222  /// changing the formula.
1223  bool RigidFormula;
1224 
1225  /// WidestFixupType - This records the widest use type for any fixup using
1226  /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
1227  /// max fixup widths to be equivalent, because the narrower one may be relying
1228  /// on the implicit truncation to truncate away bogus bits.
1229  Type *WidestFixupType;
1230 
1231  /// Formulae - A list of ways to build a value that can satisfy this user.
1232  /// After the list is populated, one of these is selected heuristically and
1233  /// used to formulate a replacement for OperandValToReplace in UserInst.
1234  SmallVector<Formula, 12> Formulae;
1235 
1236  /// Regs - The set of register candidates used by all formulae in this LSRUse.
1238 
1239  LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
1240  MinOffset(INT64_MAX),
1241  MaxOffset(INT64_MIN),
1242  AllFixupsOutsideLoop(true),
1243  RigidFormula(false),
1244  WidestFixupType(nullptr) {}
1245 
1246  bool HasFormulaWithSameRegs(const Formula &F) const;
1247  bool InsertFormula(const Formula &F);
1248  void DeleteFormula(Formula &F);
1249  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1250 
1251  void print(raw_ostream &OS) const;
1252  void dump() const;
1253 };
1254 
1255 }
1256 
1257 /// HasFormula - Test whether this use as a formula which has the same
1258 /// registers as the given formula.
1259 bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1260  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1261  if (F.ScaledReg) Key.push_back(F.ScaledReg);
1262  // Unstable sort by host order ok, because this is only used for uniquifying.
1263  std::sort(Key.begin(), Key.end());
1264  return Uniquifier.count(Key);
1265 }
1266 
1267 /// InsertFormula - If the given formula has not yet been inserted, add it to
1268 /// the list, and return true. Return false otherwise.
1269 /// The formula must be in canonical form.
1270 bool LSRUse::InsertFormula(const Formula &F) {
1271  assert(F.isCanonical() && "Invalid canonical representation");
1272 
1273  if (!Formulae.empty() && RigidFormula)
1274  return false;
1275 
1276  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1277  if (F.ScaledReg) Key.push_back(F.ScaledReg);
1278  // Unstable sort by host order ok, because this is only used for uniquifying.
1279  std::sort(Key.begin(), Key.end());
1280 
1281  if (!Uniquifier.insert(Key).second)
1282  return false;
1283 
1284  // Using a register to hold the value of 0 is not profitable.
1285  assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1286  "Zero allocated in a scaled register!");
1287 #ifndef NDEBUG
1288  for (const SCEV *BaseReg : F.BaseRegs)
1289  assert(!BaseReg->isZero() && "Zero allocated in a base register!");
1290 #endif
1291 
1292  // Add the formula to the list.
1293  Formulae.push_back(F);
1294 
1295  // Record registers now being used by this use.
1296  Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1297  if (F.ScaledReg)
1298  Regs.insert(F.ScaledReg);
1299 
1300  return true;
1301 }
1302 
1303 /// DeleteFormula - Remove the given formula from this use's list.
1304 void LSRUse::DeleteFormula(Formula &F) {
1305  if (&F != &Formulae.back())
1306  std::swap(F, Formulae.back());
1307  Formulae.pop_back();
1308 }
1309 
1310 /// RecomputeRegs - Recompute the Regs field, and update RegUses.
1311 void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1312  // Now that we've filtered out some formulae, recompute the Regs set.
1313  SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1314  Regs.clear();
1315  for (const Formula &F : Formulae) {
1316  if (F.ScaledReg) Regs.insert(F.ScaledReg);
1317  Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1318  }
1319 
1320  // Update the RegTracker.
1321  for (const SCEV *S : OldRegs)
1322  if (!Regs.count(S))
1323  RegUses.DropRegister(S, LUIdx);
1324 }
1325 
1326 void LSRUse::print(raw_ostream &OS) const {
1327  OS << "LSR Use: Kind=";
1328  switch (Kind) {
1329  case Basic: OS << "Basic"; break;
1330  case Special: OS << "Special"; break;
1331  case ICmpZero: OS << "ICmpZero"; break;
1332  case Address:
1333  OS << "Address of ";
1334  if (AccessTy->isPointerTy())
1335  OS << "pointer"; // the full pointer type could be really verbose
1336  else
1337  OS << *AccessTy;
1338  }
1339 
1340  OS << ", Offsets={";
1341  bool NeedComma = false;
1342  for (int64_t O : Offsets) {
1343  if (NeedComma) OS << ',';
1344  OS << O;
1345  NeedComma = true;
1346  }
1347  OS << '}';
1348 
1349  if (AllFixupsOutsideLoop)
1350  OS << ", all-fixups-outside-loop";
1351 
1352  if (WidestFixupType)
1353  OS << ", widest fixup type: " << *WidestFixupType;
1354 }
1355 
1356 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1357 void LSRUse::dump() const {
1358  print(errs()); errs() << '\n';
1359 }
1360 #endif
1361 
1363  LSRUse::KindType Kind, Type *AccessTy,
1364  GlobalValue *BaseGV, int64_t BaseOffset,
1365  bool HasBaseReg, int64_t Scale) {
1366  switch (Kind) {
1367  case LSRUse::Address:
1368  return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
1369 
1370  case LSRUse::ICmpZero:
1371  // There's not even a target hook for querying whether it would be legal to
1372  // fold a GV into an ICmp.
1373  if (BaseGV)
1374  return false;
1375 
1376  // ICmp only has two operands; don't allow more than two non-trivial parts.
1377  if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1378  return false;
1379 
1380  // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1381  // putting the scaled register in the other operand of the icmp.
1382  if (Scale != 0 && Scale != -1)
1383  return false;
1384 
1385  // If we have low-level target information, ask the target if it can fold an
1386  // integer immediate on an icmp.
1387  if (BaseOffset != 0) {
1388  // We have one of:
1389  // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1390  // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1391  // Offs is the ICmp immediate.
1392  if (Scale == 0)
1393  // The cast does the right thing with INT64_MIN.
1394  BaseOffset = -(uint64_t)BaseOffset;
1395  return TTI.isLegalICmpImmediate(BaseOffset);
1396  }
1397 
1398  // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1399  return true;
1400 
1401  case LSRUse::Basic:
1402  // Only handle single-register values.
1403  return !BaseGV && Scale == 0 && BaseOffset == 0;
1404 
1405  case LSRUse::Special:
1406  // Special case Basic to handle -1 scales.
1407  return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1408  }
1409 
1410  llvm_unreachable("Invalid LSRUse Kind!");
1411 }
1412 
1414  int64_t MinOffset, int64_t MaxOffset,
1415  LSRUse::KindType Kind, Type *AccessTy,
1416  GlobalValue *BaseGV, int64_t BaseOffset,
1417  bool HasBaseReg, int64_t Scale) {
1418  // Check for overflow.
1419  if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1420  (MinOffset > 0))
1421  return false;
1422  MinOffset = (uint64_t)BaseOffset + MinOffset;
1423  if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1424  (MaxOffset > 0))
1425  return false;
1426  MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1427 
1428  return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
1429  HasBaseReg, Scale) &&
1430  isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
1431  HasBaseReg, Scale);
1432 }
1433 
1435  int64_t MinOffset, int64_t MaxOffset,
1436  LSRUse::KindType Kind, Type *AccessTy,
1437  const Formula &F) {
1438  // For the purpose of isAMCompletelyFolded either having a canonical formula
1439  // or a scale not equal to zero is correct.
1440  // Problems may arise from non canonical formulae having a scale == 0.
1441  // Strictly speaking it would best to just rely on canonical formulae.
1442  // However, when we generate the scaled formulae, we first check that the
1443  // scaling factor is profitable before computing the actual ScaledReg for
1444  // compile time sake.
1445  assert((F.isCanonical() || F.Scale != 0));
1446  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1447  F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1448 }
1449 
1450 /// isLegalUse - Test whether we know how to expand the current formula.
1451 static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1452  int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1453  GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
1454  int64_t Scale) {
1455  // We know how to expand completely foldable formulae.
1456  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1457  BaseOffset, HasBaseReg, Scale) ||
1458  // Or formulae that use a base register produced by a sum of base
1459  // registers.
1460  (Scale == 1 &&
1461  isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1462  BaseGV, BaseOffset, true, 0));
1463 }
1464 
1465 static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1466  int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1467  const Formula &F) {
1468  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1469  F.BaseOffset, F.HasBaseReg, F.Scale);
1470 }
1471 
1473  const LSRUse &LU, const Formula &F) {
1474  return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1475  LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1476  F.Scale);
1477 }
1478 
1479 static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
1480  const LSRUse &LU, const Formula &F) {
1481  if (!F.Scale)
1482  return 0;
1483 
1484  // If the use is not completely folded in that instruction, we will have to
1485  // pay an extra cost only for scale != 1.
1486  if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1487  LU.AccessTy, F))
1488  return F.Scale != 1;
1489 
1490  switch (LU.Kind) {
1491  case LSRUse::Address: {
1492  // Check the scaling factor cost with both the min and max offsets.
1493  int ScaleCostMinOffset =
1494  TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
1495  F.BaseOffset + LU.MinOffset,
1496  F.HasBaseReg, F.Scale);
1497  int ScaleCostMaxOffset =
1498  TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
1499  F.BaseOffset + LU.MaxOffset,
1500  F.HasBaseReg, F.Scale);
1501 
1502  assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1503  "Legal addressing mode has an illegal cost!");
1504  return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1505  }
1506  case LSRUse::ICmpZero:
1507  case LSRUse::Basic:
1508  case LSRUse::Special:
1509  // The use is completely folded, i.e., everything is folded into the
1510  // instruction.
1511  return 0;
1512  }
1513 
1514  llvm_unreachable("Invalid LSRUse Kind!");
1515 }
1516 
1517 static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1518  LSRUse::KindType Kind, Type *AccessTy,
1519  GlobalValue *BaseGV, int64_t BaseOffset,
1520  bool HasBaseReg) {
1521  // Fast-path: zero is always foldable.
1522  if (BaseOffset == 0 && !BaseGV) return true;
1523 
1524  // Conservatively, create an address with an immediate and a
1525  // base and a scale.
1526  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1527 
1528  // Canonicalize a scale of 1 to a base register if the formula doesn't
1529  // already have a base register.
1530  if (!HasBaseReg && Scale == 1) {
1531  Scale = 0;
1532  HasBaseReg = true;
1533  }
1534 
1535  return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
1536  HasBaseReg, Scale);
1537 }
1538 
1539 static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1540  ScalarEvolution &SE, int64_t MinOffset,
1541  int64_t MaxOffset, LSRUse::KindType Kind,
1542  Type *AccessTy, const SCEV *S, bool HasBaseReg) {
1543  // Fast-path: zero is always foldable.
1544  if (S->isZero()) return true;
1545 
1546  // Conservatively, create an address with an immediate and a
1547  // base and a scale.
1548  int64_t BaseOffset = ExtractImmediate(S, SE);
1549  GlobalValue *BaseGV = ExtractSymbol(S, SE);
1550 
1551  // If there's anything else involved, it's not foldable.
1552  if (!S->isZero()) return false;
1553 
1554  // Fast-path: zero is always foldable.
1555  if (BaseOffset == 0 && !BaseGV) return true;
1556 
1557  // Conservatively, create an address with an immediate and a
1558  // base and a scale.
1559  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1560 
1561  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1562  BaseOffset, HasBaseReg, Scale);
1563 }
1564 
1565 namespace {
1566 
1567 /// IVInc - An individual increment in a Chain of IV increments.
1568 /// Relate an IV user to an expression that computes the IV it uses from the IV
1569 /// used by the previous link in the Chain.
1570 ///
1571 /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1572 /// original IVOperand. The head of the chain's IVOperand is only valid during
1573 /// chain collection, before LSR replaces IV users. During chain generation,
1574 /// IncExpr can be used to find the new IVOperand that computes the same
1575 /// expression.
1576 struct IVInc {
1577  Instruction *UserInst;
1578  Value* IVOperand;
1579  const SCEV *IncExpr;
1580 
1581  IVInc(Instruction *U, Value *O, const SCEV *E):
1582  UserInst(U), IVOperand(O), IncExpr(E) {}
1583 };
1584 
1585 // IVChain - The list of IV increments in program order.
1586 // We typically add the head of a chain without finding subsequent links.
1587 struct IVChain {
1588  SmallVector<IVInc,1> Incs;
1589  const SCEV *ExprBase;
1590 
1591  IVChain() : ExprBase(nullptr) {}
1592 
1593  IVChain(const IVInc &Head, const SCEV *Base)
1594  : Incs(1, Head), ExprBase(Base) {}
1595 
1596  typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
1597 
1598  // begin - return the first increment in the chain.
1599  const_iterator begin() const {
1600  assert(!Incs.empty());
1601  return std::next(Incs.begin());
1602  }
1603  const_iterator end() const {
1604  return Incs.end();
1605  }
1606 
1607  // hasIncs - Returns true if this chain contains any increments.
1608  bool hasIncs() const { return Incs.size() >= 2; }
1609 
1610  // add - Add an IVInc to the end of this chain.
1611  void add(const IVInc &X) { Incs.push_back(X); }
1612 
1613  // tailUserInst - Returns the last UserInst in the chain.
1614  Instruction *tailUserInst() const { return Incs.back().UserInst; }
1615 
1616  // isProfitableIncrement - Returns true if IncExpr can be profitably added to
1617  // this chain.
1618  bool isProfitableIncrement(const SCEV *OperExpr,
1619  const SCEV *IncExpr,
1620  ScalarEvolution&);
1621 };
1622 
1623 /// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
1624 /// Distinguish between FarUsers that definitely cross IV increments and
1625 /// NearUsers that may be used between IV increments.
1626 struct ChainUsers {
1628  SmallPtrSet<Instruction*, 4> NearUsers;
1629 };
1630 
1631 /// LSRInstance - This class holds state for the main loop strength reduction
1632 /// logic.
1633 class LSRInstance {
1634  IVUsers &IU;
1635  ScalarEvolution &SE;
1636  DominatorTree &DT;
1637  LoopInfo &LI;
1638  const TargetTransformInfo &TTI;
1639  Loop *const L;
1640  bool Changed;
1641 
1642  /// IVIncInsertPos - This is the insert position that the current loop's
1643  /// induction variable increment should be placed. In simple loops, this is
1644  /// the latch block's terminator. But in more complicated cases, this is a
1645  /// position which will dominate all the in-loop post-increment users.
1646  Instruction *IVIncInsertPos;
1647 
1648  /// Factors - Interesting factors between use strides.
1650 
1651  /// Types - Interesting use types, to facilitate truncation reuse.
1653 
1654  /// Fixups - The list of operands which are to be replaced.
1656 
1657  /// Uses - The list of interesting uses.
1659 
1660  /// RegUses - Track which uses use which register candidates.
1661  RegUseTracker RegUses;
1662 
1663  // Limit the number of chains to avoid quadratic behavior. We don't expect to
1664  // have more than a few IV increment chains in a loop. Missing a Chain falls
1665  // back to normal LSR behavior for those uses.
1666  static const unsigned MaxChains = 8;
1667 
1668  /// IVChainVec - IV users can form a chain of IV increments.
1670 
1671  /// IVIncSet - IV users that belong to profitable IVChains.
1673 
1674  void OptimizeShadowIV();
1675  bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1676  ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1677  void OptimizeLoopTermCond();
1678 
1679  void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1680  SmallVectorImpl<ChainUsers> &ChainUsersVec);
1681  void FinalizeChain(IVChain &Chain);
1682  void CollectChains();
1683  void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1684  SmallVectorImpl<WeakVH> &DeadInsts);
1685 
1686  void CollectInterestingTypesAndFactors();
1687  void CollectFixupsAndInitialFormulae();
1688 
1689  LSRFixup &getNewFixup() {
1690  Fixups.push_back(LSRFixup());
1691  return Fixups.back();
1692  }
1693 
1694  // Support for sharing of LSRUses between LSRFixups.
1696  UseMapTy UseMap;
1697 
1698  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
1699  LSRUse::KindType Kind, Type *AccessTy);
1700 
1701  std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
1702  LSRUse::KindType Kind,
1703  Type *AccessTy);
1704 
1705  void DeleteUse(LSRUse &LU, size_t LUIdx);
1706 
1707  LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
1708 
1709  void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1710  void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1711  void CountRegisters(const Formula &F, size_t LUIdx);
1712  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1713 
1714  void CollectLoopInvariantFixupsAndFormulae();
1715 
1716  void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1717  unsigned Depth = 0);
1718 
1719  void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
1720  const Formula &Base, unsigned Depth,
1721  size_t Idx, bool IsScaledReg = false);
1722  void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
1723  void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1724  const Formula &Base, size_t Idx,
1725  bool IsScaledReg = false);
1726  void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1727  void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1728  const Formula &Base,
1729  const SmallVectorImpl<int64_t> &Worklist,
1730  size_t Idx, bool IsScaledReg = false);
1731  void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1732  void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1733  void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1734  void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1735  void GenerateCrossUseConstantOffsets();
1736  void GenerateAllReuseFormulae();
1737 
1738  void FilterOutUndesirableDedicatedRegisters();
1739 
1740  size_t EstimateSearchSpaceComplexity() const;
1741  void NarrowSearchSpaceByDetectingSupersets();
1742  void NarrowSearchSpaceByCollapsingUnrolledCode();
1743  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1744  void NarrowSearchSpaceByPickingWinnerRegs();
1745  void NarrowSearchSpaceUsingHeuristics();
1746 
1747  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1748  Cost &SolutionCost,
1750  const Cost &CurCost,
1751  const SmallPtrSet<const SCEV *, 16> &CurRegs,
1752  DenseSet<const SCEV *> &VisitedRegs) const;
1753  void Solve(SmallVectorImpl<const Formula *> &Solution) const;
1754 
1756  HoistInsertPosition(BasicBlock::iterator IP,
1757  const SmallVectorImpl<Instruction *> &Inputs) const;
1759  AdjustInsertPositionForExpand(BasicBlock::iterator IP,
1760  const LSRFixup &LF,
1761  const LSRUse &LU,
1762  SCEVExpander &Rewriter) const;
1763 
1764  Value *Expand(const LSRFixup &LF,
1765  const Formula &F,
1768  SmallVectorImpl<WeakVH> &DeadInsts) const;
1769  void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
1770  const Formula &F,
1772  SmallVectorImpl<WeakVH> &DeadInsts,
1773  Pass *P) const;
1774  void Rewrite(const LSRFixup &LF,
1775  const Formula &F,
1777  SmallVectorImpl<WeakVH> &DeadInsts,
1778  Pass *P) const;
1779  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
1780  Pass *P);
1781 
1782 public:
1783  LSRInstance(Loop *L, Pass *P);
1784 
1785  bool getChanged() const { return Changed; }
1786 
1787  void print_factors_and_types(raw_ostream &OS) const;
1788  void print_fixups(raw_ostream &OS) const;
1789  void print_uses(raw_ostream &OS) const;
1790  void print(raw_ostream &OS) const;
1791  void dump() const;
1792 };
1793 
1794 }
1795 
1796 /// OptimizeShadowIV - If IV is used in a int-to-float cast
1797 /// inside the loop then try to eliminate the cast operation.
1798 void LSRInstance::OptimizeShadowIV() {
1799  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1800  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1801  return;
1802 
1803  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
1804  UI != E; /* empty */) {
1805  IVUsers::const_iterator CandidateUI = UI;
1806  ++UI;
1807  Instruction *ShadowUse = CandidateUI->getUser();
1808  Type *DestTy = nullptr;
1809  bool IsSigned = false;
1810 
1811  /* If shadow use is a int->float cast then insert a second IV
1812  to eliminate this cast.
1813 
1814  for (unsigned i = 0; i < n; ++i)
1815  foo((double)i);
1816 
1817  is transformed into
1818 
1819  double d = 0.0;
1820  for (unsigned i = 0; i < n; ++i, ++d)
1821  foo(d);
1822  */
1823  if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1824  IsSigned = false;
1825  DestTy = UCast->getDestTy();
1826  }
1827  else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1828  IsSigned = true;
1829  DestTy = SCast->getDestTy();
1830  }
1831  if (!DestTy) continue;
1832 
1833  // If target does not support DestTy natively then do not apply
1834  // this transformation.
1835  if (!TTI.isTypeLegal(DestTy)) continue;
1836 
1837  PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1838  if (!PH) continue;
1839  if (PH->getNumIncomingValues() != 2) continue;
1840 
1841  Type *SrcTy = PH->getType();
1842  int Mantissa = DestTy->getFPMantissaWidth();
1843  if (Mantissa == -1) continue;
1844  if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
1845  continue;
1846 
1847  unsigned Entry, Latch;
1848  if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1849  Entry = 0;
1850  Latch = 1;
1851  } else {
1852  Entry = 1;
1853  Latch = 0;
1854  }
1855 
1857  if (!Init) continue;
1858  Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
1859  (double)Init->getSExtValue() :
1860  (double)Init->getZExtValue());
1861 
1862  BinaryOperator *Incr =
1864  if (!Incr) continue;
1865  if (Incr->getOpcode() != Instruction::Add
1866  && Incr->getOpcode() != Instruction::Sub)
1867  continue;
1868 
1869  /* Initialize new IV, double d = 0.0 in above example. */
1870  ConstantInt *C = nullptr;
1871  if (Incr->getOperand(0) == PH)
1872  C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1873  else if (Incr->getOperand(1) == PH)
1874  C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1875  else
1876  continue;
1877 
1878  if (!C) continue;
1879 
1880  // Ignore negative constants, as the code below doesn't handle them
1881  // correctly. TODO: Remove this restriction.
1882  if (!C->getValue().isStrictlyPositive()) continue;
1883 
1884  /* Add new PHINode. */
1885  PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
1886 
1887  /* create new increment. '++d' in above example. */
1888  Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
1889  BinaryOperator *NewIncr =
1890  BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1891  Instruction::FAdd : Instruction::FSub,
1892  NewPH, CFP, "IV.S.next.", Incr);
1893 
1894  NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1895  NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1896 
1897  /* Remove cast operation */
1898  ShadowUse->replaceAllUsesWith(NewPH);
1899  ShadowUse->eraseFromParent();
1900  Changed = true;
1901  break;
1902  }
1903 }
1904 
1905 /// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
1906 /// set the IV user and stride information and return true, otherwise return
1907 /// false.
1908 bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
1909  for (IVStrideUse &U : IU)
1910  if (U.getUser() == Cond) {
1911  // NOTE: we could handle setcc instructions with multiple uses here, but
1912  // InstCombine does it as well for simple uses, it's not clear that it
1913  // occurs enough in real life to handle.
1914  CondUse = &U;
1915  return true;
1916  }
1917  return false;
1918 }
1919 
1920 /// OptimizeMax - Rewrite the loop's terminating condition if it uses
1921 /// a max computation.
1922 ///
1923 /// This is a narrow solution to a specific, but acute, problem. For loops
1924 /// like this:
1925 ///
1926 /// i = 0;
1927 /// do {
1928 /// p[i] = 0.0;
1929 /// } while (++i < n);
1930 ///
1931 /// the trip count isn't just 'n', because 'n' might not be positive. And
1932 /// unfortunately this can come up even for loops where the user didn't use
1933 /// a C do-while loop. For example, seemingly well-behaved top-test loops
1934 /// will commonly be lowered like this:
1935 //
1936 /// if (n > 0) {
1937 /// i = 0;
1938 /// do {
1939 /// p[i] = 0.0;
1940 /// } while (++i < n);
1941 /// }
1942 ///
1943 /// and then it's possible for subsequent optimization to obscure the if
1944 /// test in such a way that indvars can't find it.
1945 ///
1946 /// When indvars can't find the if test in loops like this, it creates a
1947 /// max expression, which allows it to give the loop a canonical
1948 /// induction variable:
1949 ///
1950 /// i = 0;
1951 /// max = n < 1 ? 1 : n;
1952 /// do {
1953 /// p[i] = 0.0;
1954 /// } while (++i != max);
1955 ///
1956 /// Canonical induction variables are necessary because the loop passes
1957 /// are designed around them. The most obvious example of this is the
1958 /// LoopInfo analysis, which doesn't remember trip count values. It
1959 /// expects to be able to rediscover the trip count each time it is
1960 /// needed, and it does this using a simple analysis that only succeeds if
1961 /// the loop has a canonical induction variable.
1962 ///
1963 /// However, when it comes time to generate code, the maximum operation
1964 /// can be quite costly, especially if it's inside of an outer loop.
1965 ///
1966 /// This function solves this problem by detecting this type of loop and
1967 /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1968 /// the instructions for the maximum computation.
1969 ///
1970 ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1971  // Check that the loop matches the pattern we're looking for.
1972  if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1973  Cond->getPredicate() != CmpInst::ICMP_NE)
1974  return Cond;
1975 
1976  SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1977  if (!Sel || !Sel->hasOneUse()) return Cond;
1978 
1979  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1980  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1981  return Cond;
1982  const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
1983 
1984  // Add one to the backedge-taken count to get the trip count.
1985  const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
1986  if (IterationCount != SE.getSCEV(Sel)) return Cond;
1987 
1988  // Check for a max calculation that matches the pattern. There's no check
1989  // for ICMP_ULE here because the comparison would be with zero, which
1990  // isn't interesting.
1991  CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1992  const SCEVNAryExpr *Max = nullptr;
1993  if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
1994  Pred = ICmpInst::ICMP_SLE;
1995  Max = S;
1996  } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
1997  Pred = ICmpInst::ICMP_SLT;
1998  Max = S;
1999  } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2000  Pred = ICmpInst::ICMP_ULT;
2001  Max = U;
2002  } else {
2003  // No match; bail.
2004  return Cond;
2005  }
2006 
2007  // To handle a max with more than two operands, this optimization would
2008  // require additional checking and setup.
2009  if (Max->getNumOperands() != 2)
2010  return Cond;
2011 
2012  const SCEV *MaxLHS = Max->getOperand(0);
2013  const SCEV *MaxRHS = Max->getOperand(1);
2014 
2015  // ScalarEvolution canonicalizes constants to the left. For < and >, look
2016  // for a comparison with 1. For <= and >=, a comparison with zero.
2017  if (!MaxLHS ||
2018  (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2019  return Cond;
2020 
2021  // Check the relevant induction variable for conformance to
2022  // the pattern.
2023  const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2024  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2025  if (!AR || !AR->isAffine() ||
2026  AR->getStart() != One ||
2027  AR->getStepRecurrence(SE) != One)
2028  return Cond;
2029 
2030  assert(AR->getLoop() == L &&
2031  "Loop condition operand is an addrec in a different loop!");
2032 
2033  // Check the right operand of the select, and remember it, as it will
2034  // be used in the new comparison instruction.
2035  Value *NewRHS = nullptr;
2036  if (ICmpInst::isTrueWhenEqual(Pred)) {
2037  // Look for n+1, and grab n.
2038  if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2039  if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2040  if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2041  NewRHS = BO->getOperand(0);
2042  if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2043  if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2044  if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2045  NewRHS = BO->getOperand(0);
2046  if (!NewRHS)
2047  return Cond;
2048  } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2049  NewRHS = Sel->getOperand(1);
2050  else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2051  NewRHS = Sel->getOperand(2);
2052  else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2053  NewRHS = SU->getValue();
2054  else
2055  // Max doesn't match expected pattern.
2056  return Cond;
2057 
2058  // Determine the new comparison opcode. It may be signed or unsigned,
2059  // and the original comparison may be either equality or inequality.
2060  if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2061  Pred = CmpInst::getInversePredicate(Pred);
2062 
2063  // Ok, everything looks ok to change the condition into an SLT or SGE and
2064  // delete the max calculation.
2065  ICmpInst *NewCond =
2066  new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
2067 
2068  // Delete the max calculation instructions.
2069  Cond->replaceAllUsesWith(NewCond);
2070  CondUse->setUser(NewCond);
2071  Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2072  Cond->eraseFromParent();
2073  Sel->eraseFromParent();
2074  if (Cmp->use_empty())
2075  Cmp->eraseFromParent();
2076  return NewCond;
2077 }
2078 
2079 /// OptimizeLoopTermCond - Change loop terminating condition to use the
2080 /// postinc iv when possible.
2081 void
2082 LSRInstance::OptimizeLoopTermCond() {
2084 
2085  BasicBlock *LatchBlock = L->getLoopLatch();
2086  SmallVector<BasicBlock*, 8> ExitingBlocks;
2087  L->getExitingBlocks(ExitingBlocks);
2088 
2089  for (BasicBlock *ExitingBlock : ExitingBlocks) {
2090 
2091  // Get the terminating condition for the loop if possible. If we
2092  // can, we want to change it to use a post-incremented version of its
2093  // induction variable, to allow coalescing the live ranges for the IV into
2094  // one register value.
2095 
2096  BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2097  if (!TermBr)
2098  continue;
2099  // FIXME: Overly conservative, termination condition could be an 'or' etc..
2100  if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2101  continue;
2102 
2103  // Search IVUsesByStride to find Cond's IVUse if there is one.
2104  IVStrideUse *CondUse = nullptr;
2105  ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2106  if (!FindIVUserForCond(Cond, CondUse))
2107  continue;
2108 
2109  // If the trip count is computed in terms of a max (due to ScalarEvolution
2110  // being unable to find a sufficient guard, for example), change the loop
2111  // comparison to use SLT or ULT instead of NE.
2112  // One consequence of doing this now is that it disrupts the count-down
2113  // optimization. That's not always a bad thing though, because in such
2114  // cases it may still be worthwhile to avoid a max.
2115  Cond = OptimizeMax(Cond, CondUse);
2116 
2117  // If this exiting block dominates the latch block, it may also use
2118  // the post-inc value if it won't be shared with other uses.
2119  // Check for dominance.
2120  if (!DT.dominates(ExitingBlock, LatchBlock))
2121  continue;
2122 
2123  // Conservatively avoid trying to use the post-inc value in non-latch
2124  // exits if there may be pre-inc users in intervening blocks.
2125  if (LatchBlock != ExitingBlock)
2126  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2127  // Test if the use is reachable from the exiting block. This dominator
2128  // query is a conservative approximation of reachability.
2129  if (&*UI != CondUse &&
2130  !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2131  // Conservatively assume there may be reuse if the quotient of their
2132  // strides could be a legal scale.
2133  const SCEV *A = IU.getStride(*CondUse, L);
2134  const SCEV *B = IU.getStride(*UI, L);
2135  if (!A || !B) continue;
2136  if (SE.getTypeSizeInBits(A->getType()) !=
2137  SE.getTypeSizeInBits(B->getType())) {
2138  if (SE.getTypeSizeInBits(A->getType()) >
2139  SE.getTypeSizeInBits(B->getType()))
2140  B = SE.getSignExtendExpr(B, A->getType());
2141  else
2142  A = SE.getSignExtendExpr(A, B->getType());
2143  }
2144  if (const SCEVConstant *D =
2145  dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
2146  const ConstantInt *C = D->getValue();
2147  // Stride of one or negative one can have reuse with non-addresses.
2148  if (C->isOne() || C->isAllOnesValue())
2149  goto decline_post_inc;
2150  // Avoid weird situations.
2151  if (C->getValue().getMinSignedBits() >= 64 ||
2152  C->getValue().isMinSignedValue())
2153  goto decline_post_inc;
2154  // Check for possible scaled-address reuse.
2155  Type *AccessTy = getAccessType(UI->getUser());
2156  int64_t Scale = C->getSExtValue();
2157  if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
2158  /*BaseOffset=*/ 0,
2159  /*HasBaseReg=*/ false, Scale))
2160  goto decline_post_inc;
2161  Scale = -Scale;
2162  if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
2163  /*BaseOffset=*/ 0,
2164  /*HasBaseReg=*/ false, Scale))
2165  goto decline_post_inc;
2166  }
2167  }
2168 
2169  DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
2170  << *Cond << '\n');
2171 
2172  // It's possible for the setcc instruction to be anywhere in the loop, and
2173  // possible for it to have multiple users. If it is not immediately before
2174  // the exiting block branch, move it.
2175  if (&*++BasicBlock::iterator(Cond) != TermBr) {
2176  if (Cond->hasOneUse()) {
2177  Cond->moveBefore(TermBr);
2178  } else {
2179  // Clone the terminating condition and insert into the loopend.
2180  ICmpInst *OldCond = Cond;
2181  Cond = cast<ICmpInst>(Cond->clone());
2182  Cond->setName(L->getHeader()->getName() + ".termcond");
2183  ExitingBlock->getInstList().insert(TermBr, Cond);
2184 
2185  // Clone the IVUse, as the old use still exists!
2186  CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2187  TermBr->replaceUsesOfWith(OldCond, Cond);
2188  }
2189  }
2190 
2191  // If we get to here, we know that we can transform the setcc instruction to
2192  // use the post-incremented version of the IV, allowing us to coalesce the
2193  // live ranges for the IV correctly.
2194  CondUse->transformToPostInc(L);
2195  Changed = true;
2196 
2197  PostIncs.insert(Cond);
2198  decline_post_inc:;
2199  }
2200 
2201  // Determine an insertion point for the loop induction variable increment. It
2202  // must dominate all the post-inc comparisons we just set up, and it must
2203  // dominate the loop latch edge.
2204  IVIncInsertPos = L->getLoopLatch()->getTerminator();
2205  for (Instruction *Inst : PostIncs) {
2206  BasicBlock *BB =
2207  DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
2208  Inst->getParent());
2209  if (BB == Inst->getParent())
2210  IVIncInsertPos = Inst;
2211  else if (BB != IVIncInsertPos->getParent())
2212  IVIncInsertPos = BB->getTerminator();
2213  }
2214 }
2215 
2216 /// reconcileNewOffset - Determine if the given use can accommodate a fixup
2217 /// at the given offset and other details. If so, update the use and
2218 /// return true.
2219 bool
2220 LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
2221  LSRUse::KindType Kind, Type *AccessTy) {
2222  int64_t NewMinOffset = LU.MinOffset;
2223  int64_t NewMaxOffset = LU.MaxOffset;
2224  Type *NewAccessTy = AccessTy;
2225 
2226  // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2227  // something conservative, however this can pessimize in the case that one of
2228  // the uses will have all its uses outside the loop, for example.
2229  if (LU.Kind != Kind)
2230  return false;
2231 
2232  // Check for a mismatched access type, and fall back conservatively as needed.
2233  // TODO: Be less conservative when the type is similar and can use the same
2234  // addressing modes.
2235  if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
2236  NewAccessTy = Type::getVoidTy(AccessTy->getContext());
2237 
2238  // Conservatively assume HasBaseReg is true for now.
2239  if (NewOffset < LU.MinOffset) {
2240  if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2241  LU.MaxOffset - NewOffset, HasBaseReg))
2242  return false;
2243  NewMinOffset = NewOffset;
2244  } else if (NewOffset > LU.MaxOffset) {
2245  if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2246  NewOffset - LU.MinOffset, HasBaseReg))
2247  return false;
2248  NewMaxOffset = NewOffset;
2249  }
2250 
2251  // Update the use.
2252  LU.MinOffset = NewMinOffset;
2253  LU.MaxOffset = NewMaxOffset;
2254  LU.AccessTy = NewAccessTy;
2255  if (NewOffset != LU.Offsets.back())
2256  LU.Offsets.push_back(NewOffset);
2257  return true;
2258 }
2259 
2260 /// getUse - Return an LSRUse index and an offset value for a fixup which
2261 /// needs the given expression, with the given kind and optional access type.
2262 /// Either reuse an existing use or create a new one, as needed.
2263 std::pair<size_t, int64_t>
2264 LSRInstance::getUse(const SCEV *&Expr,
2265  LSRUse::KindType Kind, Type *AccessTy) {
2266  const SCEV *Copy = Expr;
2267  int64_t Offset = ExtractImmediate(Expr, SE);
2268 
2269  // Basic uses can't accept any offset, for example.
2270  if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2271  Offset, /*HasBaseReg=*/ true)) {
2272  Expr = Copy;
2273  Offset = 0;
2274  }
2275 
2276  std::pair<UseMapTy::iterator, bool> P =
2277  UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2278  if (!P.second) {
2279  // A use already existed with this base.
2280  size_t LUIdx = P.first->second;
2281  LSRUse &LU = Uses[LUIdx];
2282  if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
2283  // Reuse this use.
2284  return std::make_pair(LUIdx, Offset);
2285  }
2286 
2287  // Create a new use.
2288  size_t LUIdx = Uses.size();
2289  P.first->second = LUIdx;
2290  Uses.push_back(LSRUse(Kind, AccessTy));
2291  LSRUse &LU = Uses[LUIdx];
2292 
2293  // We don't need to track redundant offsets, but we don't need to go out
2294  // of our way here to avoid them.
2295  if (LU.Offsets.empty() || Offset != LU.Offsets.back())
2296  LU.Offsets.push_back(Offset);
2297 
2298  LU.MinOffset = Offset;
2299  LU.MaxOffset = Offset;
2300  return std::make_pair(LUIdx, Offset);
2301 }
2302 
2303 /// DeleteUse - Delete the given use from the Uses list.
2304 void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2305  if (&LU != &Uses.back())
2306  std::swap(LU, Uses.back());
2307  Uses.pop_back();
2308 
2309  // Update RegUses.
2310  RegUses.SwapAndDropUse(LUIdx, Uses.size());
2311 }
2312 
2313 /// FindUseWithFormula - Look for a use distinct from OrigLU which is has
2314 /// a formula that has the same registers as the given formula.
2315 LSRUse *
2316 LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2317  const LSRUse &OrigLU) {
2318  // Search all uses for the formula. This could be more clever.
2319  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2320  LSRUse &LU = Uses[LUIdx];
2321  // Check whether this use is close enough to OrigLU, to see whether it's
2322  // worthwhile looking through its formulae.
2323  // Ignore ICmpZero uses because they may contain formulae generated by
2324  // GenerateICmpZeroScales, in which case adding fixup offsets may
2325  // be invalid.
2326  if (&LU != &OrigLU &&
2327  LU.Kind != LSRUse::ICmpZero &&
2328  LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2329  LU.WidestFixupType == OrigLU.WidestFixupType &&
2330  LU.HasFormulaWithSameRegs(OrigF)) {
2331  // Scan through this use's formulae.
2332  for (const Formula &F : LU.Formulae) {
2333  // Check to see if this formula has the same registers and symbols
2334  // as OrigF.
2335  if (F.BaseRegs == OrigF.BaseRegs &&
2336  F.ScaledReg == OrigF.ScaledReg &&
2337  F.BaseGV == OrigF.BaseGV &&
2338  F.Scale == OrigF.Scale &&
2339  F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2340  if (F.BaseOffset == 0)
2341  return &LU;
2342  // This is the formula where all the registers and symbols matched;
2343  // there aren't going to be any others. Since we declined it, we
2344  // can skip the rest of the formulae and proceed to the next LSRUse.
2345  break;
2346  }
2347  }
2348  }
2349  }
2350 
2351  // Nothing looked good.
2352  return nullptr;
2353 }
2354 
2355 void LSRInstance::CollectInterestingTypesAndFactors() {
2357 
2358  // Collect interesting types and strides.
2360  for (const IVStrideUse &U : IU) {
2361  const SCEV *Expr = IU.getExpr(U);
2362 
2363  // Collect interesting types.
2364  Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2365 
2366  // Add strides for mentioned loops.
2367  Worklist.push_back(Expr);
2368  do {
2369  const SCEV *S = Worklist.pop_back_val();
2370  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2371  if (AR->getLoop() == L)
2372  Strides.insert(AR->getStepRecurrence(SE));
2373  Worklist.push_back(AR->getStart());
2374  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2375  Worklist.append(Add->op_begin(), Add->op_end());
2376  }
2377  } while (!Worklist.empty());
2378  }
2379 
2380  // Compute interesting factors from the set of interesting strides.
2382  I = Strides.begin(), E = Strides.end(); I != E; ++I)
2384  std::next(I); NewStrideIter != E; ++NewStrideIter) {
2385  const SCEV *OldStride = *I;
2386  const SCEV *NewStride = *NewStrideIter;
2387 
2388  if (SE.getTypeSizeInBits(OldStride->getType()) !=
2389  SE.getTypeSizeInBits(NewStride->getType())) {
2390  if (SE.getTypeSizeInBits(OldStride->getType()) >
2391  SE.getTypeSizeInBits(NewStride->getType()))
2392  NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2393  else
2394  OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2395  }
2396  if (const SCEVConstant *Factor =
2397  dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2398  SE, true))) {
2399  if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2400  Factors.insert(Factor->getValue()->getValue().getSExtValue());
2401  } else if (const SCEVConstant *Factor =
2402  dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2403  NewStride,
2404  SE, true))) {
2405  if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2406  Factors.insert(Factor->getValue()->getValue().getSExtValue());
2407  }
2408  }
2409 
2410  // If all uses use the same type, don't bother looking for truncation-based
2411  // reuse.
2412  if (Types.size() == 1)
2413  Types.clear();
2414 
2415  DEBUG(print_factors_and_types(dbgs()));
2416 }
2417 
2418 /// findIVOperand - Helper for CollectChains that finds an IV operand (computed
2419 /// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
2420 /// Instructions to IVStrideUses, we could partially skip this.
2421 static User::op_iterator
2423  Loop *L, ScalarEvolution &SE) {
2424  for(; OI != OE; ++OI) {
2425  if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2426  if (!SE.isSCEVable(Oper->getType()))
2427  continue;
2428 
2429  if (const SCEVAddRecExpr *AR =
2430  dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2431  if (AR->getLoop() == L)
2432  break;
2433  }
2434  }
2435  }
2436  return OI;
2437 }
2438 
2439 /// getWideOperand - IVChain logic must consistenctly peek base TruncInst
2440 /// operands, so wrap it in a convenient helper.
2441 static Value *getWideOperand(Value *Oper) {
2442  if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2443  return Trunc->getOperand(0);
2444  return Oper;
2445 }
2446 
2447 /// isCompatibleIVType - Return true if we allow an IV chain to include both
2448 /// types.
2449 static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2450  Type *LType = LVal->getType();
2451  Type *RType = RVal->getType();
2452  return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
2453 }
2454 
2455 /// getExprBase - Return an approximation of this SCEV expression's "base", or
2456 /// NULL for any constant. Returning the expression itself is
2457 /// conservative. Returning a deeper subexpression is more precise and valid as
2458 /// long as it isn't less complex than another subexpression. For expressions
2459 /// involving multiple unscaled values, we need to return the pointer-type
2460 /// SCEVUnknown. This avoids forming chains across objects, such as:
2461 /// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
2462 ///
2463 /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2464 /// SCEVUnknown, we simply return the rightmost SCEV operand.
2465 static const SCEV *getExprBase(const SCEV *S) {
2466  switch (S->getSCEVType()) {
2467  default: // uncluding scUnknown.
2468  return S;
2469  case scConstant:
2470  return nullptr;
2471  case scTruncate:
2472  return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2473  case scZeroExtend:
2474  return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2475  case scSignExtend:
2476  return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2477  case scAddExpr: {
2478  // Skip over scaled operands (scMulExpr) to follow add operands as long as
2479  // there's nothing more complex.
2480  // FIXME: not sure if we want to recognize negation.
2481  const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2482  for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2483  E(Add->op_begin()); I != E; ++I) {
2484  const SCEV *SubExpr = *I;
2485  if (SubExpr->getSCEVType() == scAddExpr)
2486  return getExprBase(SubExpr);
2487 
2488  if (SubExpr->getSCEVType() != scMulExpr)
2489  return SubExpr;
2490  }
2491  return S; // all operands are scaled, be conservative.
2492  }
2493  case scAddRecExpr:
2494  return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2495  }
2496 }
2497 
2498 /// Return true if the chain increment is profitable to expand into a loop
2499 /// invariant value, which may require its own register. A profitable chain
2500 /// increment will be an offset relative to the same base. We allow such offsets
2501 /// to potentially be used as chain increment as long as it's not obviously
2502 /// expensive to expand using real instructions.
2503 bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
2504  const SCEV *IncExpr,
2505  ScalarEvolution &SE) {
2506  // Aggressively form chains when -stress-ivchain.
2507  if (StressIVChain)
2508  return true;
2509 
2510  // Do not replace a constant offset from IV head with a nonconstant IV
2511  // increment.
2512  if (!isa<SCEVConstant>(IncExpr)) {
2513  const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
2514  if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2515  return 0;
2516  }
2517 
2518  SmallPtrSet<const SCEV*, 8> Processed;
2519  return !isHighCostExpansion(IncExpr, Processed, SE);
2520 }
2521 
2522 /// Return true if the number of registers needed for the chain is estimated to
2523 /// be less than the number required for the individual IV users. First prohibit
2524 /// any IV users that keep the IV live across increments (the Users set should
2525 /// be empty). Next count the number and type of increments in the chain.
2526 ///
2527 /// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2528 /// effectively use postinc addressing modes. Only consider it profitable it the
2529 /// increments can be computed in fewer registers when chained.
2530 ///
2531 /// TODO: Consider IVInc free if it's already used in another chains.
2532 static bool
2534  ScalarEvolution &SE, const TargetTransformInfo &TTI) {
2535  if (StressIVChain)
2536  return true;
2537 
2538  if (!Chain.hasIncs())
2539  return false;
2540 
2541  if (!Users.empty()) {
2542  DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
2543  for (Instruction *Inst : Users) {
2544  dbgs() << " " << *Inst << "\n";
2545  });
2546  return false;
2547  }
2548  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2549 
2550  // The chain itself may require a register, so intialize cost to 1.
2551  int cost = 1;
2552 
2553  // A complete chain likely eliminates the need for keeping the original IV in
2554  // a register. LSR does not currently know how to form a complete chain unless
2555  // the header phi already exists.
2556  if (isa<PHINode>(Chain.tailUserInst())
2557  && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2558  --cost;
2559  }
2560  const SCEV *LastIncExpr = nullptr;
2561  unsigned NumConstIncrements = 0;
2562  unsigned NumVarIncrements = 0;
2563  unsigned NumReusedIncrements = 0;
2564  for (const IVInc &Inc : Chain) {
2565  if (Inc.IncExpr->isZero())
2566  continue;
2567 
2568  // Incrementing by zero or some constant is neutral. We assume constants can
2569  // be folded into an addressing mode or an add's immediate operand.
2570  if (isa<SCEVConstant>(Inc.IncExpr)) {
2571  ++NumConstIncrements;
2572  continue;
2573  }
2574 
2575  if (Inc.IncExpr == LastIncExpr)
2576  ++NumReusedIncrements;
2577  else
2578  ++NumVarIncrements;
2579 
2580  LastIncExpr = Inc.IncExpr;
2581  }
2582  // An IV chain with a single increment is handled by LSR's postinc
2583  // uses. However, a chain with multiple increments requires keeping the IV's
2584  // value live longer than it needs to be if chained.
2585  if (NumConstIncrements > 1)
2586  --cost;
2587 
2588  // Materializing increment expressions in the preheader that didn't exist in
2589  // the original code may cost a register. For example, sign-extended array
2590  // indices can produce ridiculous increments like this:
2591  // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2592  cost += NumVarIncrements;
2593 
2594  // Reusing variable increments likely saves a register to hold the multiple of
2595  // the stride.
2596  cost -= NumReusedIncrements;
2597 
2598  DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
2599  << "\n");
2600 
2601  return cost < 0;
2602 }
2603 
2604 /// ChainInstruction - Add this IV user to an existing chain or make it the head
2605 /// of a new chain.
2606 void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2607  SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2608  // When IVs are used as types of varying widths, they are generally converted
2609  // to a wider type with some uses remaining narrow under a (free) trunc.
2610  Value *const NextIV = getWideOperand(IVOper);
2611  const SCEV *const OperExpr = SE.getSCEV(NextIV);
2612  const SCEV *const OperExprBase = getExprBase(OperExpr);
2613 
2614  // Visit all existing chains. Check if its IVOper can be computed as a
2615  // profitable loop invariant increment from the last link in the Chain.
2616  unsigned ChainIdx = 0, NChains = IVChainVec.size();
2617  const SCEV *LastIncExpr = nullptr;
2618  for (; ChainIdx < NChains; ++ChainIdx) {
2619  IVChain &Chain = IVChainVec[ChainIdx];
2620 
2621  // Prune the solution space aggressively by checking that both IV operands
2622  // are expressions that operate on the same unscaled SCEVUnknown. This
2623  // "base" will be canceled by the subsequent getMinusSCEV call. Checking
2624  // first avoids creating extra SCEV expressions.
2625  if (!StressIVChain && Chain.ExprBase != OperExprBase)
2626  continue;
2627 
2628  Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
2629  if (!isCompatibleIVType(PrevIV, NextIV))
2630  continue;
2631 
2632  // A phi node terminates a chain.
2633  if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2634  continue;
2635 
2636  // The increment must be loop-invariant so it can be kept in a register.
2637  const SCEV *PrevExpr = SE.getSCEV(PrevIV);
2638  const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
2639  if (!SE.isLoopInvariant(IncExpr, L))
2640  continue;
2641 
2642  if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2643  LastIncExpr = IncExpr;
2644  break;
2645  }
2646  }
2647  // If we haven't found a chain, create a new one, unless we hit the max. Don't
2648  // bother for phi nodes, because they must be last in the chain.
2649  if (ChainIdx == NChains) {
2650  if (isa<PHINode>(UserInst))
2651  return;
2652  if (NChains >= MaxChains && !StressIVChain) {
2653  DEBUG(dbgs() << "IV Chain Limit\n");
2654  return;
2655  }
2656  LastIncExpr = OperExpr;
2657  // IVUsers may have skipped over sign/zero extensions. We don't currently
2658  // attempt to form chains involving extensions unless they can be hoisted
2659  // into this loop's AddRec.
2660  if (!isa<SCEVAddRecExpr>(LastIncExpr))
2661  return;
2662  ++NChains;
2663  IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2664  OperExprBase));
2665  ChainUsersVec.resize(NChains);
2666  DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
2667  << ") IV=" << *LastIncExpr << "\n");
2668  } else {
2669  DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
2670  << ") IV+" << *LastIncExpr << "\n");
2671  // Add this IV user to the end of the chain.
2672  IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2673  }
2674  IVChain &Chain = IVChainVec[ChainIdx];
2675 
2676  SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2677  // This chain's NearUsers become FarUsers.
2678  if (!LastIncExpr->isZero()) {
2679  ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2680  NearUsers.end());
2681  NearUsers.clear();
2682  }
2683 
2684  // All other uses of IVOperand become near uses of the chain.
2685  // We currently ignore intermediate values within SCEV expressions, assuming
2686  // they will eventually be used be the current chain, or can be computed
2687  // from one of the chain increments. To be more precise we could
2688  // transitively follow its user and only add leaf IV users to the set.
2689  for (User *U : IVOper->users()) {
2690  Instruction *OtherUse = dyn_cast<Instruction>(U);
2691  if (!OtherUse)
2692  continue;
2693  // Uses in the chain will no longer be uses if the chain is formed.
2694  // Include the head of the chain in this iteration (not Chain.begin()).
2695  IVChain::const_iterator IncIter = Chain.Incs.begin();
2696  IVChain::const_iterator IncEnd = Chain.Incs.end();
2697  for( ; IncIter != IncEnd; ++IncIter) {
2698  if (IncIter->UserInst == OtherUse)
2699  break;
2700  }
2701  if (IncIter != IncEnd)
2702  continue;
2703 
2704  if (SE.isSCEVable(OtherUse->getType())
2705  && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2706  && IU.isIVUserOrOperand(OtherUse)) {
2707  continue;
2708  }
2709  NearUsers.insert(OtherUse);
2710  }
2711 
2712  // Since this user is part of the chain, it's no longer considered a use
2713  // of the chain.
2714  ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2715 }
2716 
2717 /// CollectChains - Populate the vector of Chains.
2718 ///
2719 /// This decreases ILP at the architecture level. Targets with ample registers,
2720 /// multiple memory ports, and no register renaming probably don't want
2721 /// this. However, such targets should probably disable LSR altogether.
2722 ///
2723 /// The job of LSR is to make a reasonable choice of induction variables across
2724 /// the loop. Subsequent passes can easily "unchain" computation exposing more
2725 /// ILP *within the loop* if the target wants it.
2726 ///
2727 /// Finding the best IV chain is potentially a scheduling problem. Since LSR
2728 /// will not reorder memory operations, it will recognize this as a chain, but
2729 /// will generate redundant IV increments. Ideally this would be corrected later
2730 /// by a smart scheduler:
2731 /// = A[i]
2732 /// = A[i+x]
2733 /// A[i] =
2734 /// A[i+x] =
2735 ///
2736 /// TODO: Walk the entire domtree within this loop, not just the path to the
2737 /// loop latch. This will discover chains on side paths, but requires
2738 /// maintaining multiple copies of the Chains state.
2739 void LSRInstance::CollectChains() {
2740  DEBUG(dbgs() << "Collecting IV Chains.\n");
2741  SmallVector<ChainUsers, 8> ChainUsersVec;
2742 
2743  SmallVector<BasicBlock *,8> LatchPath;
2744  BasicBlock *LoopHeader = L->getHeader();
2745  for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
2746  Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2747  LatchPath.push_back(Rung->getBlock());
2748  }
2749  LatchPath.push_back(LoopHeader);
2750 
2751  // Walk the instruction stream from the loop header to the loop latch.
2753  BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
2754  BBIter != BBEnd; ++BBIter) {
2755  for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
2756  I != E; ++I) {
2757  // Skip instructions that weren't seen by IVUsers analysis.
2758  if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
2759  continue;
2760 
2761  // Ignore users that are part of a SCEV expression. This way we only
2762  // consider leaf IV Users. This effectively rediscovers a portion of
2763  // IVUsers analysis but in program order this time.
2764  if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
2765  continue;
2766 
2767  // Remove this instruction from any NearUsers set it may be in.
2768  for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
2769  ChainIdx < NChains; ++ChainIdx) {
2770  ChainUsersVec[ChainIdx].NearUsers.erase(I);
2771  }
2772  // Search for operands that can be chained.
2773  SmallPtrSet<Instruction*, 4> UniqueOperands;
2774  User::op_iterator IVOpEnd = I->op_end();
2775  User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
2776  while (IVOpIter != IVOpEnd) {
2777  Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2778  if (UniqueOperands.insert(IVOpInst).second)
2779  ChainInstruction(I, IVOpInst, ChainUsersVec);
2780  IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2781  }
2782  } // Continue walking down the instructions.
2783  } // Continue walking down the domtree.
2784  // Visit phi backedges to determine if the chain can generate the IV postinc.
2785  for (BasicBlock::iterator I = L->getHeader()->begin();
2786  PHINode *PN = dyn_cast<PHINode>(I); ++I) {
2787  if (!SE.isSCEVable(PN->getType()))
2788  continue;
2789 
2790  Instruction *IncV =
2791  dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
2792  if (IncV)
2793  ChainInstruction(PN, IncV, ChainUsersVec);
2794  }
2795  // Remove any unprofitable chains.
2796  unsigned ChainIdx = 0;
2797  for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
2798  UsersIdx < NChains; ++UsersIdx) {
2799  if (!isProfitableChain(IVChainVec[UsersIdx],
2800  ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
2801  continue;
2802  // Preserve the chain at UsesIdx.
2803  if (ChainIdx != UsersIdx)
2804  IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2805  FinalizeChain(IVChainVec[ChainIdx]);
2806  ++ChainIdx;
2807  }
2808  IVChainVec.resize(ChainIdx);
2809 }
2810 
2811 void LSRInstance::FinalizeChain(IVChain &Chain) {
2812  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2813  DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
2814 
2815  for (const IVInc &Inc : Chain) {
2816  DEBUG(dbgs() << " Inc: " << Inc.UserInst << "\n");
2817  auto UseI = std::find(Inc.UserInst->op_begin(), Inc.UserInst->op_end(),
2818  Inc.IVOperand);
2819  assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
2820  IVIncSet.insert(UseI);
2821  }
2822 }
2823 
2824 /// Return true if the IVInc can be folded into an addressing mode.
2825 static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
2826  Value *Operand, const TargetTransformInfo &TTI) {
2827  const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
2828  if (!IncConst || !isAddressUse(UserInst, Operand))
2829  return false;
2830 
2831  if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
2832  return false;
2833 
2834  int64_t IncOffset = IncConst->getValue()->getSExtValue();
2836  getAccessType(UserInst), /*BaseGV=*/ nullptr,
2837  IncOffset, /*HaseBaseReg=*/ false))
2838  return false;
2839 
2840  return true;
2841 }
2842 
2843 /// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
2844 /// materialize the IV user's operand from the previous IV user's operand.
2845 void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
2846  SmallVectorImpl<WeakVH> &DeadInsts) {
2847  // Find the new IVOperand for the head of the chain. It may have been replaced
2848  // by LSR.
2849  const IVInc &Head = Chain.Incs[0];
2850  User::op_iterator IVOpEnd = Head.UserInst->op_end();
2851  // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
2852  User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
2853  IVOpEnd, L, SE);
2854  Value *IVSrc = nullptr;
2855  while (IVOpIter != IVOpEnd) {
2856  IVSrc = getWideOperand(*IVOpIter);
2857 
2858  // If this operand computes the expression that the chain needs, we may use
2859  // it. (Check this after setting IVSrc which is used below.)
2860  //
2861  // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
2862  // narrow for the chain, so we can no longer use it. We do allow using a
2863  // wider phi, assuming the LSR checked for free truncation. In that case we
2864  // should already have a truncate on this operand such that
2865  // getSCEV(IVSrc) == IncExpr.
2866  if (SE.getSCEV(*IVOpIter) == Head.IncExpr
2867  || SE.getSCEV(IVSrc) == Head.IncExpr) {
2868  break;
2869  }
2870  IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2871  }
2872  if (IVOpIter == IVOpEnd) {
2873  // Gracefully give up on this chain.
2874  DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
2875  return;
2876  }
2877 
2878  DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
2879  Type *IVTy = IVSrc->getType();
2880  Type *IntTy = SE.getEffectiveSCEVType(IVTy);
2881  const SCEV *LeftOverExpr = nullptr;
2882  for (const IVInc &Inc : Chain) {
2883  Instruction *InsertPt = Inc.UserInst;
2884  if (isa<PHINode>(InsertPt))
2885  InsertPt = L->getLoopLatch()->getTerminator();
2886 
2887  // IVOper will replace the current IV User's operand. IVSrc is the IV
2888  // value currently held in a register.
2889  Value *IVOper = IVSrc;
2890  if (!Inc.IncExpr->isZero()) {
2891  // IncExpr was the result of subtraction of two narrow values, so must
2892  // be signed.
2893  const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
2894  LeftOverExpr = LeftOverExpr ?
2895  SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
2896  }
2897  if (LeftOverExpr && !LeftOverExpr->isZero()) {
2898  // Expand the IV increment.
2899  Rewriter.clearPostInc();
2900  Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
2901  const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
2902  SE.getUnknown(IncV));
2903  IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
2904 
2905  // If an IV increment can't be folded, use it as the next IV value.
2906  if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
2907  assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
2908  IVSrc = IVOper;
2909  LeftOverExpr = nullptr;
2910  }
2911  }
2912  Type *OperTy = Inc.IVOperand->getType();
2913  if (IVTy != OperTy) {
2914  assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
2915  "cannot extend a chained IV");
2916  IRBuilder<> Builder(InsertPt);
2917  IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
2918  }
2919  Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
2920  DeadInsts.emplace_back(Inc.IVOperand);
2921  }
2922  // If LSR created a new, wider phi, we may also replace its postinc. We only
2923  // do this if we also found a wide value for the head of the chain.
2924  if (isa<PHINode>(Chain.tailUserInst())) {
2925  for (BasicBlock::iterator I = L->getHeader()->begin();
2926  PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
2927  if (!isCompatibleIVType(Phi, IVSrc))
2928  continue;
2929  Instruction *PostIncV = dyn_cast<Instruction>(
2930  Phi->getIncomingValueForBlock(L->getLoopLatch()));
2931  if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
2932  continue;
2933  Value *IVOper = IVSrc;
2934  Type *PostIncTy = PostIncV->getType();
2935  if (IVTy != PostIncTy) {
2936  assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
2937  IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
2938  Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
2939  IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
2940  }
2941  Phi->replaceUsesOfWith(PostIncV, IVOper);
2942  DeadInsts.emplace_back(PostIncV);
2943  }
2944  }
2945 }
2946 
2947 void LSRInstance::CollectFixupsAndInitialFormulae() {
2948  for (const IVStrideUse &U : IU) {
2949  Instruction *UserInst = U.getUser();
2950  // Skip IV users that are part of profitable IV Chains.
2951  User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
2952  U.getOperandValToReplace());
2953  assert(UseI != UserInst->op_end() && "cannot find IV operand");
2954  if (IVIncSet.count(UseI))
2955  continue;
2956 
2957  // Record the uses.
2958  LSRFixup &LF = getNewFixup();
2959  LF.UserInst = UserInst;
2960  LF.OperandValToReplace = U.getOperandValToReplace();
2961  LF.PostIncLoops = U.getPostIncLoops();
2962 
2963  LSRUse::KindType Kind = LSRUse::Basic;
2964  Type *AccessTy = nullptr;
2965  if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
2966  Kind = LSRUse::Address;
2967  AccessTy = getAccessType(LF.UserInst);
2968  }
2969 
2970  const SCEV *S = IU.getExpr(U);
2971 
2972  // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2973  // (N - i == 0), and this allows (N - i) to be the expression that we work
2974  // with rather than just N or i, so we can consider the register
2975  // requirements for both N and i at the same time. Limiting this code to
2976  // equality icmps is not a problem because all interesting loops use
2977  // equality icmps, thanks to IndVarSimplify.
2978  if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
2979  if (CI->isEquality()) {
2980  // Swap the operands if needed to put the OperandValToReplace on the
2981  // left, for consistency.
2982  Value *NV = CI->getOperand(1);
2983  if (NV == LF.OperandValToReplace) {
2984  CI->setOperand(1, CI->getOperand(0));
2985  CI->setOperand(0, NV);
2986  NV = CI->getOperand(1);
2987  Changed = true;
2988  }
2989 
2990  // x == y --> x - y == 0
2991  const SCEV *N = SE.getSCEV(NV);
2992  if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
2993  // S is normalized, so normalize N before folding it into S
2994  // to keep the result normalized.
2995  N = TransformForPostIncUse(Normalize, N, CI, nullptr,
2996  LF.PostIncLoops, SE, DT);
2997  Kind = LSRUse::ICmpZero;
2998  S = SE.getMinusSCEV(N, S);
2999  }
3000 
3001  // -1 and the negations of all interesting strides (except the negation
3002  // of -1) are now also interesting.
3003  for (size_t i = 0, e = Factors.size(); i != e; ++i)
3004  if (Factors[i] != -1)
3005  Factors.insert(-(uint64_t)Factors[i]);
3006  Factors.insert(-1);
3007  }
3008 
3009  // Set up the initial formula for this use.
3010  std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3011  LF.LUIdx = P.first;
3012  LF.Offset = P.second;
3013  LSRUse &LU = Uses[LF.LUIdx];
3014  LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3015  if (!LU.WidestFixupType ||
3016  SE.getTypeSizeInBits(LU.WidestFixupType) <
3017  SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3018  LU.WidestFixupType = LF.OperandValToReplace->getType();
3019 
3020  // If this is the first use of this LSRUse, give it a formula.
3021  if (LU.Formulae.empty()) {
3022  InsertInitialFormula(S, LU, LF.LUIdx);
3023  CountRegisters(LU.Formulae.back(), LF.LUIdx);
3024  }
3025  }
3026 
3027  DEBUG(print_fixups(dbgs()));
3028 }
3029 
3030 /// InsertInitialFormula - Insert a formula for the given expression into
3031 /// the given use, separating out loop-variant portions from loop-invariant
3032 /// and loop-computable portions.
3033 void
3034 LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
3035  // Mark uses whose expressions cannot be expanded.
3036  if (!isSafeToExpand(S, SE))
3037  LU.RigidFormula = true;
3038 
3039  Formula F;
3040  F.InitialMatch(S, L, SE);
3041  bool Inserted = InsertFormula(LU, LUIdx, F);
3042  assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3043 }
3044 
3045 /// InsertSupplementalFormula - Insert a simple single-register formula for
3046 /// the given expression into the given use.
3047 void
3048 LSRInstance::InsertSupplementalFormula(const SCEV *S,
3049  LSRUse &LU, size_t LUIdx) {
3050  Formula F;
3051  F.BaseRegs.push_back(S);
3052  F.HasBaseReg = true;
3053  bool Inserted = InsertFormula(LU, LUIdx, F);
3054  assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3055 }
3056 
3057 /// CountRegisters - Note which registers are used by the given formula,
3058 /// updating RegUses.
3059 void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3060  if (F.ScaledReg)
3061  RegUses.CountRegister(F.ScaledReg, LUIdx);
3062  for (const SCEV *BaseReg : F.BaseRegs)
3063  RegUses.CountRegister(BaseReg, LUIdx);
3064 }
3065 
3066 /// InsertFormula - If the given formula has not yet been inserted, add it to
3067 /// the list, and return true. Return false otherwise.
3068 bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3069  // Do not insert formula that we will not be able to expand.
3070  assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3071  "Formula is illegal");
3072  if (!LU.InsertFormula(F))
3073  return false;
3074 
3075  CountRegisters(F, LUIdx);
3076  return true;
3077 }
3078 
3079 /// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
3080 /// loop-invariant values which we're tracking. These other uses will pin these
3081 /// values in registers, making them less profitable for elimination.
3082 /// TODO: This currently misses non-constant addrec step registers.
3083 /// TODO: Should this give more weight to users inside the loop?
3084 void
3085 LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3086  SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
3088 
3089  while (!Worklist.empty()) {
3090  const SCEV *S = Worklist.pop_back_val();
3091 
3092  // Don't process the same SCEV twice
3093  if (!Visited.insert(S).second)
3094  continue;
3095 
3096  if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3097  Worklist.append(N->op_begin(), N->op_end());
3098  else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3099  Worklist.push_back(C->getOperand());
3100  else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3101  Worklist.push_back(D->getLHS());
3102  Worklist.push_back(D->getRHS());
3103  } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3104  const Value *V = US->getValue();
3105  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3106  // Look for instructions defined outside the loop.
3107  if (L->contains(Inst)) continue;
3108  } else if (isa<UndefValue>(V))
3109  // Undef doesn't have a live range, so it doesn't matter.
3110  continue;
3111  for (const Use &U : V->uses()) {
3112  const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3113  // Ignore non-instructions.
3114  if (!UserInst)
3115  continue;
3116  // Ignore instructions in other functions (as can happen with
3117  // Constants).
3118  if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3119  continue;
3120  // Ignore instructions not dominated by the loop.
3121  const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3122  UserInst->getParent() :
3123  cast<PHINode>(UserInst)->getIncomingBlock(
3124  PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3125  if (!DT.dominates(L->getHeader(), UseBB))
3126  continue;
3127  // Ignore uses which are part of other SCEV expressions, to avoid
3128  // analyzing them multiple times.
3129  if (SE.isSCEVable(UserInst->getType())) {
3130  const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3131  // If the user is a no-op, look through to its uses.
3132  if (!isa<SCEVUnknown>(UserS))
3133  continue;
3134  if (UserS == US) {
3135  Worklist.push_back(
3136  SE.getUnknown(const_cast<Instruction *>(UserInst)));
3137  continue;
3138  }
3139  }
3140  // Ignore icmp instructions which are already being analyzed.
3141  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3142  unsigned OtherIdx = !U.getOperandNo();
3143  Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
3144  if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3145  continue;
3146  }
3147 
3148  LSRFixup &LF = getNewFixup();
3149  LF.UserInst = const_cast<Instruction *>(UserInst);
3150  LF.OperandValToReplace = U;
3151  std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
3152  LF.LUIdx = P.first;
3153  LF.Offset = P.second;
3154  LSRUse &LU = Uses[LF.LUIdx];
3155  LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3156  if (!LU.WidestFixupType ||
3157  SE.getTypeSizeInBits(LU.WidestFixupType) <
3158  SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3159  LU.WidestFixupType = LF.OperandValToReplace->getType();
3160  InsertSupplementalFormula(US, LU, LF.LUIdx);
3161  CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3162  break;
3163  }
3164  }
3165  }
3166 }
3167 
3168 /// CollectSubexprs - Split S into subexpressions which can be pulled out into
3169 /// separate registers. If C is non-null, multiply each subexpression by C.
3170 ///
3171 /// Return remainder expression after factoring the subexpressions captured by
3172 /// Ops. If Ops is complete, return NULL.
3173 static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3175  const Loop *L,
3176  ScalarEvolution &SE,
3177  unsigned Depth = 0) {
3178  // Arbitrarily cap recursion to protect compile time.
3179  if (Depth >= 3)
3180  return S;
3181 
3182  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3183  // Break out add operands.
3184  for (const SCEV *S : Add->operands()) {
3185  const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
3186  if (Remainder)
3187  Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3188  }
3189  return nullptr;
3190  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3191  // Split a non-zero base out of an addrec.
3192  if (AR->getStart()->isZero())
3193  return S;
3194 
3195  const SCEV *Remainder = CollectSubexprs(AR->getStart(),
3196  C, Ops, L, SE, Depth+1);
3197  // Split the non-zero AddRec unless it is part of a nested recurrence that
3198  // does not pertain to this loop.
3199  if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3200  Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3201  Remainder = nullptr;
3202  }
3203  if (Remainder != AR->getStart()) {
3204  if (!Remainder)
3205  Remainder = SE.getConstant(AR->getType(), 0);
3206  return SE.getAddRecExpr(Remainder,
3207  AR->getStepRecurrence(SE),
3208  AR->getLoop(),
3209  //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3210  SCEV::FlagAnyWrap);
3211  }
3212  } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3213  // Break (C * (a + b + c)) into C*a + C*b + C*c.
3214  if (Mul->getNumOperands() != 2)
3215  return S;
3216  if (const SCEVConstant *Op0 =
3217  dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3218  C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
3219  const SCEV *Remainder =
3220  CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
3221  if (Remainder)
3222  Ops.push_back(SE.getMulExpr(C, Remainder));
3223  return nullptr;
3224  }
3225  }
3226  return S;
3227 }
3228 
3229 /// \brief Helper function for LSRInstance::GenerateReassociations.
3230 void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3231  const Formula &Base,
3232  unsigned Depth, size_t Idx,
3233  bool IsScaledReg) {
3234  const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3236  const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3237  if (Remainder)
3238  AddOps.push_back(Remainder);
3239 
3240  if (AddOps.size() == 1)
3241  return;
3242 
3244  JE = AddOps.end();
3245  J != JE; ++J) {
3246 
3247  // Loop-variant "unknown" values are uninteresting; we won't be able to
3248  // do anything meaningful with them.
3249  if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3250  continue;
3251 
3252  // Don't pull a constant into a register if the constant could be folded
3253  // into an immediate field.
3254  if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3255  LU.AccessTy, *J, Base.getNumRegs() > 1))
3256  continue;
3257 
3258  // Collect all operands except *J.
3259  SmallVector<const SCEV *, 8> InnerAddOps(
3260  ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
3261  InnerAddOps.append(std::next(J),
3262  ((const SmallVector<const SCEV *, 8> &)AddOps).end());
3263 
3264  // Don't leave just a constant behind in a register if the constant could
3265  // be folded into an immediate field.
3266  if (InnerAddOps.size() == 1 &&
3267  isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3268  LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3269  continue;
3270 
3271  const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3272  if (InnerSum->isZero())
3273  continue;
3274  Formula F = Base;
3275 
3276  // Add the remaining pieces of the add back into the new formula.
3277  const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3278  if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3279  TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3280  InnerSumSC->getValue()->getZExtValue())) {
3281  F.UnfoldedOffset =
3282  (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
3283  if (IsScaledReg)
3284  F.ScaledReg = nullptr;
3285  else
3286  F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3287  } else if (IsScaledReg)
3288  F.ScaledReg = InnerSum;
3289  else
3290  F.BaseRegs[Idx] = InnerSum;
3291 
3292  // Add J as its own register, or an unfolded immediate.
3293  const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3294  if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3295  TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3296  SC->getValue()->getZExtValue()))
3297  F.UnfoldedOffset =
3298  (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3299  else
3300  F.BaseRegs.push_back(*J);
3301  // We may have changed the number of register in base regs, adjust the
3302  // formula accordingly.
3303  F.Canonicalize();
3304 
3305  if (InsertFormula(LU, LUIdx, F))
3306  // If that formula hadn't been seen before, recurse to find more like
3307  // it.
3308  GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3309  }
3310 }
3311 
3312 /// GenerateReassociations - Split out subexpressions from adds and the bases of
3313 /// addrecs.
3314 void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3315  Formula Base, unsigned Depth) {
3316  assert(Base.isCanonical() && "Input must be in the canonical form");
3317  // Arbitrarily cap recursion to protect compile time.
3318  if (Depth >= 3)
3319  return;
3320 
3321  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3322  GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3323 
3324  if (Base.Scale == 1)
3325  GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3326  /* Idx */ -1, /* IsScaledReg */ true);
3327 }
3328 
3329 /// GenerateCombinations - Generate a formula consisting of all of the
3330 /// loop-dominating registers added into a single register.
3331 void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3332  Formula Base) {
3333  // This method is only interesting on a plurality of registers.
3334  if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3335  return;
3336 
3337  // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
3338  // processing the formula.
3339  Base.Unscale();
3340  Formula F = Base;
3341  F.BaseRegs.clear();
3343  for (const SCEV *BaseReg : Base.BaseRegs) {
3344  if (SE.properlyDominates(BaseReg, L->getHeader()) &&
3345  !SE.hasComputableLoopEvolution(BaseReg, L))
3346  Ops.push_back(BaseReg);
3347  else
3348  F.BaseRegs.push_back(BaseReg);
3349  }
3350  if (Ops.size() > 1) {
3351  const SCEV *Sum = SE.getAddExpr(Ops);
3352  // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3353  // opportunity to fold something. For now, just ignore such cases
3354  // rather than proceed with zero in a register.
3355  if (!Sum->isZero()) {
3356  F.BaseRegs.push_back(Sum);
3357  F.Canonicalize();
3358  (void)InsertFormula(LU, LUIdx, F);
3359  }
3360  }
3361 }
3362 
3363 /// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
3364 void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
3365  const Formula &Base, size_t Idx,
3366  bool IsScaledReg) {
3367  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3368  GlobalValue *GV = ExtractSymbol(G, SE);
3369  if (G->isZero() || !GV)
3370  return;
3371  Formula F = Base;
3372  F.BaseGV = GV;
3373  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3374  return;
3375  if (IsScaledReg)
3376  F.ScaledReg = G;
3377  else
3378  F.BaseRegs[Idx] = G;
3379  (void)InsertFormula(LU, LUIdx, F);
3380 }
3381 
3382 /// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
3383 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3384  Formula Base) {
3385  // We can't add a symbolic offset if the address already contains one.
3386  if (Base.BaseGV) return;
3387 
3388  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3389  GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3390  if (Base.Scale == 1)
3391  GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
3392  /* IsScaledReg */ true);
3393 }
3394 
3395 /// \brief Helper function for LSRInstance::GenerateConstantOffsets.
3396 void LSRInstance::GenerateConstantOffsetsImpl(
3397  LSRUse &LU, unsigned LUIdx, const Formula &Base,
3398  const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
3399  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3400  for (int64_t Offset : Worklist) {
3401  Formula F = Base;
3402  F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3403  if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
3404  LU.AccessTy, F)) {
3405  // Add the offset to the base register.
3406  const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
3407  // If it cancelled out, drop the base register, otherwise update it.
3408  if (NewG->isZero()) {
3409  if (IsScaledReg) {
3410  F.Scale = 0;
3411  F.ScaledReg = nullptr;
3412  } else
3413  F.DeleteBaseReg(F.BaseRegs[Idx]);
3414  F.Canonicalize();
3415  } else if (IsScaledReg)
3416  F.ScaledReg = NewG;
3417  else
3418  F.BaseRegs[Idx] = NewG;
3419 
3420  (void)InsertFormula(LU, LUIdx, F);
3421  }
3422  }
3423 
3424  int64_t Imm = ExtractImmediate(G, SE);
3425  if (G->isZero() || Imm == 0)
3426  return;
3427  Formula F = Base;
3428  F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3429  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3430  return;
3431  if (IsScaledReg)
3432  F.ScaledReg = G;
3433  else
3434  F.BaseRegs[Idx] = G;
3435  (void)InsertFormula(LU, LUIdx, F);
3436 }
3437 
3438 /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
3439 void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3440  Formula Base) {
3441  // TODO: For now, just add the min and max offset, because it usually isn't
3442  // worthwhile looking at everything inbetween.
3443  SmallVector<int64_t, 2> Worklist;
3444  Worklist.push_back(LU.MinOffset);
3445  if (LU.MaxOffset != LU.MinOffset)
3446  Worklist.push_back(LU.MaxOffset);
3447 
3448  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3449  GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3450  if (Base.Scale == 1)
3451  GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
3452  /* IsScaledReg */ true);
3453 }
3454 
3455 /// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
3456 /// the comparison. For example, x == y -> x*c == y*c.
3457 void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3458  Formula Base) {
3459  if (LU.Kind != LSRUse::ICmpZero) return;
3460 
3461  // Determine the integer type for the base formula.
3462  Type *IntTy = Base.getType();
3463  if (!IntTy) return;
3464  if (SE.getTypeSizeInBits(IntTy) > 64) return;
3465 
3466  // Don't do this if there is more than one offset.
3467  if (LU.MinOffset != LU.MaxOffset) return;
3468 
3469  assert(!Base.BaseGV && "ICmpZero use is not legal!");
3470 
3471  // Check each interesting stride.
3472  for (int64_t Factor : Factors) {
3473  // Check that the multiplication doesn't overflow.
3474  if (Base.BaseOffset == INT64_MIN && Factor == -1)
3475  continue;
3476  int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3477  if (NewBaseOffset / Factor != Base.BaseOffset)
3478  continue;
3479  // If the offset will be truncated at this use, check that it is in bounds.
3480  if (!IntTy->isPointerTy() &&
3481  !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3482  continue;
3483 
3484  // Check that multiplying with the use offset doesn't overflow.
3485  int64_t Offset = LU.MinOffset;
3486  if (Offset == INT64_MIN && Factor == -1)
3487  continue;
3488  Offset = (uint64_t)Offset * Factor;
3489  if (Offset / Factor != LU.MinOffset)
3490  continue;
3491  // If the offset will be truncated at this use, check that it is in bounds.
3492  if (!IntTy->isPointerTy() &&
3493  !ConstantInt::isValueValidForType(IntTy, Offset))
3494  continue;
3495 
3496  Formula F = Base;
3497  F.BaseOffset = NewBaseOffset;
3498 
3499  // Check that this scale is legal.
3500  if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3501  continue;
3502 
3503  // Compensate for the use having MinOffset built into it.
3504  F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3505 
3506  const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3507 
3508  // Check that multiplying with each base register doesn't overflow.
3509  for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3510  F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
3511  if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3512  goto next;
3513  }
3514 
3515  // Check that multiplying with the scaled register doesn't overflow.
3516  if (F.ScaledReg) {
3517  F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
3518  if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3519  continue;
3520  }
3521 
3522  // Check that multiplying with the unfolded offset doesn't overflow.
3523  if (F.UnfoldedOffset != 0) {
3524  if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
3525  continue;
3526  F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3527  if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3528  continue;
3529  // If the offset will be truncated, check that it is in bounds.
3530  if (!IntTy->isPointerTy() &&
3531  !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3532  continue;
3533  }
3534 
3535  // If we make it here and it's legal, add it.
3536  (void)InsertFormula(LU, LUIdx, F);
3537  next:;
3538  }
3539 }
3540 
3541 /// GenerateScales - Generate stride factor reuse formulae by making use of
3542 /// scaled-offset address modes, for example.
3543 void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
3544  // Determine the integer type for the base formula.
3545  Type *IntTy = Base.getType();
3546  if (!IntTy) return;
3547 
3548  // If this Formula already has a scaled register, we can't add another one.
3549  // Try to unscale the formula to generate a better scale.
3550  if (Base.Scale != 0 && !Base.Unscale())
3551  return;
3552 
3553  assert(Base.Scale == 0 && "Unscale did not did its job!");
3554 
3555  // Check each interesting stride.
3556  for (int64_t Factor : Factors) {
3557  Base.Scale = Factor;
3558  Base.HasBaseReg = Base.BaseRegs.size() > 1;
3559  // Check whether this scale is going to be legal.
3560  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3561  Base)) {
3562  // As a special-case, handle special out-of-loop Basic users specially.
3563  // TODO: Reconsider this special case.
3564  if (LU.Kind == LSRUse::Basic &&
3565  isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3566  LU.AccessTy, Base) &&
3567  LU.AllFixupsOutsideLoop)
3568  LU.Kind = LSRUse::Special;
3569  else
3570  continue;
3571  }
3572  // For an ICmpZero, negating a solitary base register won't lead to
3573  // new solutions.
3574  if (LU.Kind == LSRUse::ICmpZero &&
3575  !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3576  continue;
3577  // For each addrec base reg, apply the scale, if possible.
3578  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3579  if (const SCEVAddRecExpr *AR =
3580  dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
3581  const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3582  if (FactorS->isZero())
3583  continue;
3584  // Divide out the factor, ignoring high bits, since we'll be
3585  // scaling the value back up in the end.
3586  if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
3587  // TODO: This could be optimized to avoid all the copying.
3588  Formula F = Base;
3589  F.ScaledReg = Quotient;
3590  F.DeleteBaseReg(F.BaseRegs[i]);
3591  // The canonical representation of 1*reg is reg, which is already in
3592  // Base. In that case, do not try to insert the formula, it will be
3593  // rejected anyway.
3594  if (F.Scale == 1 && F.BaseRegs.empty())
3595  continue;
3596  (void)InsertFormula(LU, LUIdx, F);
3597  }
3598  }
3599  }
3600 }
3601 
3602 /// GenerateTruncates - Generate reuse formulae from different IV types.
3603 void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
3604  // Don't bother truncating symbolic values.
3605  if (Base.BaseGV) return;
3606 
3607  // Determine the integer type for the base formula.
3608  Type *DstTy = Base.getType();
3609  if (!DstTy) return;
3610  DstTy = SE.getEffectiveSCEVType(DstTy);
3611 
3612  for (Type *SrcTy : Types) {
3613  if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
3614  Formula F = Base;
3615 
3616  if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
3617  for (const SCEV *&BaseReg : F.BaseRegs)
3618  BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
3619 
3620  // TODO: This assumes we've done basic processing on all uses and
3621  // have an idea what the register usage is.
3622  if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3623  continue;
3624 
3625  (void)InsertFormula(LU, LUIdx, F);
3626  }
3627  }
3628 }
3629 
3630 namespace {
3631 
3632 /// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
3633 /// defer modifications so that the search phase doesn't have to worry about
3634 /// the data structures moving underneath it.
3635 struct WorkItem {
3636  size_t LUIdx;
3637  int64_t Imm;
3638  const SCEV *OrigReg;
3639 
3640  WorkItem(size_t LI, int64_t I, const SCEV *R)
3641  : LUIdx(LI), Imm(I), OrigReg(R) {}
3642 
3643  void print(raw_ostream &OS) const;
3644  void dump() const;
3645 };
3646 
3647 }
3648 
3649 void WorkItem::print(raw_ostream &OS) const {
3650  OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
3651  << " , add offset " << Imm;
3652 }
3653 
3654 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3655 void WorkItem::dump() const {
3656  print(errs()); errs() << '\n';
3657 }
3658 #endif
3659 
3660 /// GenerateCrossUseConstantOffsets - Look for registers which are a constant
3661 /// distance apart and try to form reuse opportunities between them.
3662 void LSRInstance::GenerateCrossUseConstantOffsets() {
3663  // Group the registers by their value without any added constant offset.
3664  typedef std::map<int64_t, const SCEV *> ImmMapTy;
3666  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
3668  for (const SCEV *Use : RegUses) {
3669  const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
3670  int64_t Imm = ExtractImmediate(Reg, SE);
3671  auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
3672  if (Pair.second)
3673  Sequence.push_back(Reg);
3674  Pair.first->second.insert(std::make_pair(Imm, Use));
3675  UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
3676  }
3677 
3678  // Now examine each set of registers with the same base value. Build up
3679  // a list of work to do and do the work in a separate step so that we're
3680  // not adding formulae and register counts while we're searching.
3681  SmallVector<WorkItem, 32> WorkItems;
3682  SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
3683  for (const SCEV *Reg : Sequence) {
3684  const ImmMapTy &Imms = Map.find(Reg)->second;
3685 
3686  // It's not worthwhile looking for reuse if there's only one offset.
3687  if (Imms.size() == 1)
3688  continue;
3689 
3690  DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
3691  for (const auto &Entry : Imms)
3692  dbgs() << ' ' << Entry.first;
3693  dbgs() << '\n');
3694 
3695  // Examine each offset.
3696  for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3697  J != JE; ++J) {
3698  const SCEV *OrigReg = J->second;
3699 
3700  int64_t JImm = J->first;
3701  const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3702 
3703  if (!isa<SCEVConstant>(OrigReg) &&
3704  UsedByIndicesMap[Reg].count() == 1) {
3705  DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
3706  continue;
3707  }
3708 
3709  // Conservatively examine offsets between this orig reg a few selected
3710  // other orig regs.
3711  ImmMapTy::const_iterator OtherImms[] = {
3712  Imms.begin(), std::prev(Imms.end()),
3713  Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3714  2)
3715  };
3716  for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
3717  ImmMapTy::const_iterator M = OtherImms[i];
3718  if (M == J || M == JE) continue;
3719 
3720  // Compute the difference between the two.
3721  int64_t Imm = (uint64_t)JImm - M->first;
3722  for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
3723  LUIdx = UsedByIndices.find_next(LUIdx))
3724  // Make a memo of this use, offset, and register tuple.
3725  if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
3726  WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
3727  }
3728  }
3729  }
3730 
3731  Map.clear();
3732  Sequence.clear();
3733  UsedByIndicesMap.clear();
3734  UniqueItems.clear();
3735 
3736  // Now iterate through the worklist and add new formulae.
3737  for (const WorkItem &WI : WorkItems) {
3738  size_t LUIdx = WI.LUIdx;
3739  LSRUse &LU = Uses[LUIdx];
3740  int64_t Imm = WI.Imm;
3741  const SCEV *OrigReg = WI.OrigReg;
3742 
3743  Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
3744  const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3745  unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
3746 
3747  // TODO: Use a more targeted data structure.
3748  for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
3749  Formula F = LU.Formulae[L];
3750  // FIXME: The code for the scaled and unscaled registers looks
3751  // very similar but slightly different. Investigate if they
3752  // could be merged. That way, we would not have to unscale the
3753  // Formula.
3754  F.Unscale();
3755  // Use the immediate in the scaled register.
3756  if (F.ScaledReg == OrigReg) {
3757  int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
3758  // Don't create 50 + reg(-50).
3759  if (F.referencesReg(SE.getSCEV(
3760  ConstantInt::get(IntTy, -(uint64_t)Offset))))
3761  continue;
3762  Formula NewF = F;
3763  NewF.BaseOffset = Offset;
3764  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3765  NewF))
3766  continue;
3767  NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
3768 
3769  // If the new scale is a constant in a register, and adding the constant
3770  // value to the immediate would produce a value closer to zero than the
3771  // immediate itself, then the formula isn't worthwhile.
3772  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
3773  if (C->getValue()->isNegative() !=
3774  (NewF.BaseOffset < 0) &&
3775  (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
3776  .ule(std::abs(NewF.BaseOffset)))
3777  continue;
3778 
3779  // OK, looks good.
3780  NewF.Canonicalize();
3781  (void)InsertFormula(LU, LUIdx, NewF);
3782  } else {
3783  // Use the immediate in a base register.
3784  for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
3785  const SCEV *BaseReg = F.BaseRegs[N];
3786  if (BaseReg != OrigReg)
3787  continue;
3788  Formula NewF = F;
3789  NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
3790  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
3791  LU.Kind, LU.AccessTy, NewF)) {
3792  if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
3793  continue;
3794  NewF = F;
3795  NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3796  }
3797  NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
3798 
3799  // If the new formula has a constant in a register, and adding the
3800  // constant value to the immediate would produce a value closer to
3801  // zero than the immediate itself, then the formula isn't worthwhile.
3802  for (const SCEV *NewReg : NewF.BaseRegs)
3803  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
3804  if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
3805  std::abs(NewF.BaseOffset)) &&
3806  (C->getValue()->getValue() +
3807  NewF.BaseOffset).countTrailingZeros() >=
3808  countTrailingZeros<uint64_t>(NewF.BaseOffset))
3809  goto skip_formula;
3810 
3811  // Ok, looks good.
3812  NewF.Canonicalize();
3813  (void)InsertFormula(LU, LUIdx, NewF);
3814  break;
3815  skip_formula:;
3816  }
3817  }
3818  }
3819  }
3820 }
3821 
3822 /// GenerateAllReuseFormulae - Generate formulae for each use.
3823 void
3824 LSRInstance::GenerateAllReuseFormulae() {
3825  // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
3826  // queries are more precise.
3827  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3828  LSRUse &LU = Uses[LUIdx];
3829  for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3830  GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
3831  for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3832  GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
3833  }
3834  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3835  LSRUse &LU = Uses[LUIdx];
3836  for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3837  GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
3838  for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3839  GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
3840  for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3841  GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
3842  for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3843  GenerateScales(LU, LUIdx, LU.Formulae[i]);
3844  }
3845  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3846  LSRUse &LU = Uses[LUIdx];
3847  for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3848  GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
3849  }
3850 
3851  GenerateCrossUseConstantOffsets();
3852 
3853  DEBUG(dbgs() << "\n"
3854  "After generating reuse formulae:\n";
3855  print_uses(dbgs()));
3856 }
3857 
3858 /// If there are multiple formulae with the same set of registers used
3859 /// by other uses, pick the best one and delete the others.
3860 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
3861  DenseSet<const SCEV *> VisitedRegs;
3864 #ifndef NDEBUG
3865  bool ChangedFormulae = false;
3866 #endif
3867 
3868  // Collect the best formula for each unique set of shared registers. This
3869  // is reset for each use.
3870  typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
3871  BestFormulaeTy;
3872  BestFormulaeTy BestFormulae;
3873 
3874  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3875  LSRUse &LU = Uses[LUIdx];
3876  DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
3877 
3878  bool Any = false;
3879  for (size_t FIdx = 0, NumForms = LU.Formulae.size();
3880  FIdx != NumForms; ++FIdx) {
3881  Formula &F = LU.Formulae[FIdx];
3882 
3883  // Some formulas are instant losers. For example, they may depend on
3884  // nonexistent AddRecs from other loops. These need to be filtered
3885  // immediately, otherwise heuristics could choose them over others leading
3886  // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
3887  // avoids the need to recompute this information across formulae using the
3888  // same bad AddRec. Passing LoserRegs is also essential unless we remove
3889  // the corresponding bad register from the Regs set.
3890  Cost CostF;
3891  Regs.clear();
3892  CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
3893  &LoserRegs);
3894  if (CostF.isLoser()) {
3895  // During initial formula generation, undesirable formulae are generated
3896  // by uses within other loops that have some non-trivial address mode or
3897  // use the postinc form of the IV. LSR needs to provide these formulae
3898  // as the basis of rediscovering the desired formula that uses an AddRec
3899  // corresponding to the existing phi. Once all formulae have been
3900  // generated, these initial losers may be pruned.
3901  DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
3902  dbgs() << "\n");
3903  }
3904  else {
3906  for (const SCEV *Reg : F.BaseRegs) {
3907  if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
3908  Key.push_back(Reg);
3909  }
3910  if (F.ScaledReg &&
3911  RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
3912  Key.push_back(F.ScaledReg);
3913  // Unstable sort by host order ok, because this is only used for
3914  // uniquifying.
3915  std::sort(Key.begin(), Key.end());
3916 
3917  std::pair<BestFormulaeTy::const_iterator, bool> P =
3918  BestFormulae.insert(std::make_pair(Key, FIdx));
3919  if (P.second)
3920  continue;
3921 
3922  Formula &Best = LU.Formulae[P.first->second];
3923 
3924  Cost CostBest;
3925  Regs.clear();
3926  CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
3927  DT, LU);
3928  if (CostF < CostBest)
3929  std::swap(F, Best);
3930  DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
3931  dbgs() << "\n"
3932  " in favor of formula "; Best.print(dbgs());
3933  dbgs() << '\n');
3934  }
3935 #ifndef NDEBUG
3936  ChangedFormulae = true;
3937 #endif
3938  LU.DeleteFormula(F);
3939  --FIdx;
3940  --NumForms;
3941  Any = true;
3942  }
3943 
3944  // Now that we've filtered out some formulae, recompute the Regs set.
3945  if (Any)
3946  LU.RecomputeRegs(LUIdx, RegUses);
3947 
3948  // Reset this to prepare for the next use.
3949  BestFormulae.clear();
3950  }
3951 
3952  DEBUG(if (ChangedFormulae) {
3953  dbgs() << "\n"
3954  "After filtering out undesirable candidates:\n";
3955  print_uses(dbgs());
3956  });
3957 }
3958 
3959 // This is a rough guess that seems to work fairly well.
3960 static const size_t ComplexityLimit = UINT16_MAX;
3961 
3962 /// EstimateSearchSpaceComplexity - Estimate the worst-case number of
3963 /// solutions the solver might have to consider. It almost never considers
3964 /// this many solutions because it prune the search space, but the pruning
3965 /// isn't always sufficient.
3966 size_t LSRInstance::EstimateSearchSpaceComplexity() const {
3967  size_t Power = 1;
3968  for (const LSRUse &LU : Uses) {
3969  size_t FSize = LU.Formulae.size();
3970  if (FSize >= ComplexityLimit) {
3971  Power = ComplexityLimit;
3972  break;
3973  }
3974  Power *= FSize;
3975  if (Power >= ComplexityLimit)
3976  break;
3977  }
3978  return Power;
3979 }
3980 
3981 /// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
3982 /// of the registers of another formula, it won't help reduce register
3983 /// pressure (though it may not necessarily hurt register pressure); remove
3984 /// it to simplify the system.
3985 void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
3986  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
3987  DEBUG(dbgs() << "The search space is too complex.\n");
3988 
3989  DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
3990  "which use a superset of registers used by other "
3991  "formulae.\n");
3992 
3993  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3994  LSRUse &LU = Uses[LUIdx];
3995  bool Any = false;
3996  for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
3997  Formula &F = LU.Formulae[i];
3998  // Look for a formula with a constant or GV in a register. If the use
3999  // also has a formula with that same value in an immediate field,
4000  // delete the one that uses a register.
4002  I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4003  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4004  Formula NewF = F;
4005  NewF.BaseOffset += C->getValue()->getSExtValue();
4006  NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4007  (I - F.BaseRegs.begin()));
4008  if (LU.HasFormulaWithSameRegs(NewF)) {
4009  DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
4010  LU.DeleteFormula(F);
4011  --i;
4012  --e;
4013  Any = true;
4014  break;
4015  }
4016  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4017  if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4018  if (!F.BaseGV) {
4019  Formula NewF = F;
4020  NewF.BaseGV = GV;
4021  NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4022  (I - F.BaseRegs.begin()));
4023  if (LU.HasFormulaWithSameRegs(NewF)) {
4024  DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4025  dbgs() << '\n');
4026  LU.DeleteFormula(F);
4027  --i;
4028  --e;
4029  Any = true;
4030  break;
4031  }
4032  }
4033  }
4034  }
4035  }
4036  if (Any)
4037  LU.RecomputeRegs(LUIdx, RegUses);
4038  }
4039 
4040  DEBUG(dbgs() << "After pre-selection:\n";
4041  print_uses(dbgs()));
4042  }
4043 }
4044 
4045 /// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
4046 /// for expressions like A, A+1, A+2, etc., allocate a single register for
4047 /// them.
4048 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4049  if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4050  return;
4051 
4052  DEBUG(dbgs() << "The search space is too complex.\n"
4053  "Narrowing the search space by assuming that uses separated "
4054  "by a constant offset will use the same registers.\n");
4055 
4056  // This is especially useful for unrolled loops.
4057 
4058  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4059  LSRUse &LU = Uses[LUIdx];
4060  for (const Formula &F : LU.Formulae) {
4061  if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4062  continue;
4063 
4064  LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4065  if (!LUThatHas)
4066  continue;
4067 
4068  if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
4069  LU.Kind, LU.AccessTy))
4070  continue;
4071 
4072  DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
4073 
4074  LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4075 
4076  // Update the relocs to reference the new use.
4077  for (LSRFixup &Fixup : Fixups) {
4078  if (Fixup.LUIdx == LUIdx) {
4079  Fixup.LUIdx = LUThatHas - &Uses.front();
4080  Fixup.Offset += F.BaseOffset;
4081  // Add the new offset to LUThatHas' offset list.
4082  if (LUThatHas->Offsets.back() != Fixup.Offset) {
4083  LUThatHas->Offsets.push_back(Fixup.Offset);
4084  if (Fixup.Offset > LUThatHas->MaxOffset)
4085  LUThatHas->MaxOffset = Fixup.Offset;
4086  if (Fixup.Offset < LUThatHas->MinOffset)
4087  LUThatHas->MinOffset = Fixup.Offset;
4088  }
4089  DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
4090  }
4091  if (Fixup.LUIdx == NumUses-1)
4092  Fixup.LUIdx = LUIdx;
4093  }
4094 
4095  // Delete formulae from the new use which are no longer legal.
4096  bool Any = false;
4097  for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4098  Formula &F = LUThatHas->Formulae[i];
4099  if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4100  LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4101  DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4102  dbgs() << '\n');
4103  LUThatHas->DeleteFormula(F);
4104  --i;
4105  --e;
4106  Any = true;
4107  }
4108  }
4109 
4110  if (Any)
4111  LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4112 
4113  // Delete the old use.
4114  DeleteUse(LU, LUIdx);
4115  --LUIdx;
4116  --NumUses;
4117  break;
4118  }
4119  }
4120 
4121  DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4122 }
4123 
4124 /// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
4125 /// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4126 /// we've done more filtering, as it may be able to find more formulae to
4127 /// eliminate.
4128 void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4129  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4130  DEBUG(dbgs() << "The search space is too complex.\n");
4131 
4132  DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
4133  "undesirable dedicated registers.\n");
4134 
4135  FilterOutUndesirableDedicatedRegisters();
4136 
4137  DEBUG(dbgs() << "After pre-selection:\n";
4138  print_uses(dbgs()));
4139  }
4140 }
4141 
4142 /// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
4143 /// to be profitable, and then in any use which has any reference to that
4144 /// register, delete all formulae which do not reference that register.
4145 void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4146  // With all other options exhausted, loop until the system is simple
4147  // enough to handle.
4149  while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4150  // Ok, we have too many of formulae on our hands to conveniently handle.
4151  // Use a rough heuristic to thin out the list.
4152  DEBUG(dbgs() << "The search space is too complex.\n");
4153 
4154  // Pick the register which is used by the most LSRUses, which is likely
4155  // to be a good reuse register candidate.
4156  const SCEV *Best = nullptr;
4157  unsigned BestNum = 0;
4158  for (const SCEV *Reg : RegUses) {
4159  if (Taken.count(Reg))
4160  continue;
4161  if (!Best)
4162  Best = Reg;
4163  else {
4164  unsigned Count = RegUses.getUsedByIndices(Reg).count();
4165  if (Count > BestNum) {
4166  Best = Reg;
4167  BestNum = Count;
4168  }
4169  }
4170  }
4171 
4172  DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
4173  << " will yield profitable reuse.\n");
4174  Taken.insert(Best);
4175 
4176  // In any use with formulae which references this register, delete formulae
4177  // which don't reference it.
4178  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4179  LSRUse &LU = Uses[LUIdx];
4180  if (!LU.Regs.count(Best)) continue;
4181 
4182  bool Any = false;
4183  for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4184  Formula &F = LU.Formulae[i];
4185  if (!F.referencesReg(Best)) {
4186  DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
4187  LU.DeleteFormula(F);
4188  --e;
4189  --i;
4190  Any = true;
4191  assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
4192  continue;
4193  }
4194  }
4195 
4196  if (Any)
4197  LU.RecomputeRegs(LUIdx, RegUses);
4198  }
4199 
4200  DEBUG(dbgs() << "After pre-selection:\n";
4201  print_uses(dbgs()));
4202  }
4203 }
4204 
4205 /// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
4206 /// formulae to choose from, use some rough heuristics to prune down the number
4207 /// of formulae. This keeps the main solver from taking an extraordinary amount
4208 /// of time in some worst-case scenarios.
4209 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4210  NarrowSearchSpaceByDetectingSupersets();
4211  NarrowSearchSpaceByCollapsingUnrolledCode();
4212  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4213  NarrowSearchSpaceByPickingWinnerRegs();
4214 }
4215 
4216 /// SolveRecurse - This is the recursive solver.
4217 void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4218  Cost &SolutionCost,
4220  const Cost &CurCost,
4221  const SmallPtrSet<const SCEV *, 16> &CurRegs,
4222  DenseSet<const SCEV *> &VisitedRegs) const {
4223  // Some ideas:
4224  // - prune more:
4225  // - use more aggressive filtering
4226  // - sort the formula so that the most profitable solutions are found first
4227  // - sort the uses too
4228  // - search faster:
4229  // - don't compute a cost, and then compare. compare while computing a cost
4230  // and bail early.
4231  // - track register sets with SmallBitVector
4232 
4233  const LSRUse &LU = Uses[Workspace.size()];
4234 
4235  // If this use references any register that's already a part of the
4236  // in-progress solution, consider it a requirement that a formula must
4237  // reference that register in order to be considered. This prunes out
4238  // unprofitable searching.
4240  for (const SCEV *S : CurRegs)
4241  if (LU.Regs.count(S))
4242  ReqRegs.insert(S);
4243 
4245  Cost NewCost;
4246  for (const Formula &F : LU.Formulae) {
4247  // Ignore formulae which may not be ideal in terms of register reuse of
4248  // ReqRegs. The formula should use all required registers before
4249  // introducing new ones.
4250  int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
4251  for (const SCEV *Reg : ReqRegs) {
4252  if ((F.ScaledReg && F.ScaledReg == Reg) ||
4253  std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
4254  F.BaseRegs.end()) {
4255  --NumReqRegsToFind;
4256  if (NumReqRegsToFind == 0)
4257  break;
4258  }
4259  }
4260  if (NumReqRegsToFind != 0) {
4261  // If none of the formulae satisfied the required registers, then we could
4262  // clear ReqRegs and try again. Currently, we simply give up in this case.
4263  continue;
4264  }
4265 
4266  // Evaluate the cost of the current formula. If it's already worse than
4267  // the current best, prune the search at that point.
4268  NewCost = CurCost;
4269  NewRegs = CurRegs;
4270  NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
4271  LU);
4272  if (NewCost < SolutionCost) {
4273  Workspace.push_back(&F);
4274  if (Workspace.size() != Uses.size()) {
4275  SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4276  NewRegs, VisitedRegs);
4277  if (F.getNumRegs() == 1 && Workspace.size() == 1)
4278  VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4279  } else {
4280  DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
4281  dbgs() << ".\n Regs:";
4282  for (const SCEV *S : NewRegs)
4283  dbgs() << ' ' << *S;
4284  dbgs() << '\n');
4285 
4286  SolutionCost = NewCost;
4287  Solution = Workspace;
4288  }
4289  Workspace.pop_back();
4290  }
4291  }
4292 }
4293 
4294 /// Solve - Choose one formula from each use. Return the results in the given
4295 /// Solution vector.
4296 void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4298  Cost SolutionCost;
4299  SolutionCost.Lose();
4300  Cost CurCost;
4302  DenseSet<const SCEV *> VisitedRegs;
4303  Workspace.reserve(Uses.size());
4304 
4305  // SolveRecurse does all the work.
4306  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4307  CurRegs, VisitedRegs);
4308  if (Solution.empty()) {
4309  DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
4310  return;
4311  }
4312 
4313  // Ok, we've now made all our decisions.
4314  DEBUG(dbgs() << "\n"
4315  "The chosen solution requires "; SolutionCost.print(dbgs());
4316  dbgs() << ":\n";
4317  for (size_t i = 0, e = Uses.size(); i != e; ++i) {
4318  dbgs() << " ";
4319  Uses[i].print(dbgs());
4320  dbgs() << "\n"
4321  " ";
4322  Solution[i]->print(dbgs());
4323  dbgs() << '\n';
4324  });
4325 
4326  assert(Solution.size() == Uses.size() && "Malformed solution!");
4327 }
4328 
4329 /// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
4330 /// the dominator tree far as we can go while still being dominated by the
4331 /// input positions. This helps canonicalize the insert position, which
4332 /// encourages sharing.
4334 LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4335  const SmallVectorImpl<Instruction *> &Inputs)
4336  const {
4337  for (;;) {
4338  const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4339  unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4340 
4341  BasicBlock *IDom;
4342  for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
4343  if (!Rung) return IP;
4344  Rung = Rung->getIDom();
4345  if (!Rung) return IP;
4346  IDom = Rung->getBlock();
4347 
4348  // Don't climb into a loop though.
4349  const Loop *IDomLoop = LI.getLoopFor(IDom);
4350  unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4351  if (IDomDepth <= IPLoopDepth &&
4352  (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4353  break;
4354  }
4355 
4356  bool AllDominate = true;
4357  Instruction *BetterPos = nullptr;
4358  Instruction *Tentative = IDom->getTerminator();
4359  for (Instruction *Inst : Inputs) {
4360  if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
4361  AllDominate = false;
4362  break;
4363  }
4364  // Attempt to find an insert position in the middle of the block,
4365  // instead of at the end, so that it can be used for other expansions.
4366  if (IDom == Inst->getParent() &&
4367  (!BetterPos || !DT.dominates(Inst, BetterPos)))
4368  BetterPos = std::next(BasicBlock::iterator(Inst));
4369  }
4370  if (!AllDominate)
4371  break;
4372  if (BetterPos)
4373  IP = BetterPos;
4374  else
4375  IP = Tentative;
4376  }
4377 
4378  return IP;
4379 }
4380 
4381 /// AdjustInsertPositionForExpand - Determine an input position which will be
4382 /// dominated by the operands and which will dominate the result.
4384 LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
4385  const LSRFixup &LF,
4386  const LSRUse &LU,
4387  SCEVExpander &Rewriter) const {
4388  // Collect some instructions which must be dominated by the
4389  // expanding replacement. These must be dominated by any operands that
4390  // will be required in the expansion.
4392  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4393  Inputs.push_back(I);
4394  if (LU.Kind == LSRUse::ICmpZero)
4395  if (Instruction *I =
4396  dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4397  Inputs.push_back(I);
4398  if (LF.PostIncLoops.count(L)) {
4399  if (LF.isUseFullyOutsideLoop(L))
4400  Inputs.push_back(L->getLoopLatch()->getTerminator());
4401  else
4402  Inputs.push_back(IVIncInsertPos);
4403  }
4404  // The expansion must also be dominated by the increment positions of any
4405  // loops it for which it is using post-inc mode.
4406  for (const Loop *PIL : LF.PostIncLoops) {
4407  if (PIL == L) continue;
4408 
4409  // Be dominated by the loop exit.
4410  SmallVector<BasicBlock *, 4> ExitingBlocks;
4411  PIL->getExitingBlocks(ExitingBlocks);
4412  if (!ExitingBlocks.empty()) {
4413  BasicBlock *BB = ExitingBlocks[0];
4414  for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
4415  BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
4416  Inputs.push_back(BB->getTerminator());
4417  }
4418  }
4419 
4420  assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
4421  && !isa<DbgInfoIntrinsic>(LowestIP) &&
4422  "Insertion point must be a normal instruction");
4423 
4424  // Then, climb up the immediate dominator tree as far as we can go while
4425  // still being dominated by the input positions.
4426  BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
4427 
4428  // Don't insert instructions before PHI nodes.
4429  while (isa<PHINode>(IP)) ++IP;
4430 
4431  // Ignore landingpad instructions.
4432  while (isa<LandingPadInst>(IP)) ++IP;
4433 
4434  // Ignore debug intrinsics.
4435  while (isa<DbgInfoIntrinsic>(IP)) ++IP;
4436 
4437  // Set IP below instructions recently inserted by SCEVExpander. This keeps the
4438  // IP consistent across expansions and allows the previously inserted
4439  // instructions to be reused by subsequent expansion.
4440  while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
4441 
4442  return IP;
4443 }
4444 
4445 /// Expand - Emit instructions for the leading candidate expression for this
4446 /// LSRUse (this is called "expanding").
4447 Value *LSRInstance::Expand(const LSRFixup &LF,
4448  const Formula &F,
4450  SCEVExpander &Rewriter,
4451  SmallVectorImpl<WeakVH> &DeadInsts) const {
4452  const LSRUse &LU = Uses[LF.LUIdx];
4453  if (LU.RigidFormula)
4454  return LF.OperandValToReplace;
4455 
4456  // Determine an input position which will be dominated by the operands and
4457  // which will dominate the result.
4458  IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
4459 
4460  // Inform the Rewriter if we have a post-increment use, so that it can
4461  // perform an advantageous expansion.
4462  Rewriter.setPostInc(LF.PostIncLoops);
4463 
4464  // This is the type that the user actually needs.
4465  Type *OpTy = LF.OperandValToReplace->getType();
4466  // This will be the type that we'll initially expand to.
4467  Type *Ty = F.getType();
4468  if (!Ty)
4469  // No type known; just expand directly to the ultimate type.
4470  Ty = OpTy;
4471  else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
4472  // Expand directly to the ultimate type if it's the right size.
4473  Ty = OpTy;
4474  // This is the type to do integer arithmetic in.
4475  Type *IntTy = SE.getEffectiveSCEVType(Ty);
4476 
4477  // Build up a list of operands to add together to form the full base.
4479 
4480  // Expand the BaseRegs portion.
4481  for (const SCEV *Reg : F.BaseRegs) {
4482  assert(!Reg->isZero() && "Zero allocated in a base register!");
4483 
4484  // If we're expanding for a post-inc user, make the post-inc adjustment.
4485  PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4487  LF.UserInst, LF.OperandValToReplace,
4488  Loops, SE, DT);
4489 
4490  Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
4491  }
4492 
4493  // Expand the ScaledReg portion.
4494  Value *ICmpScaledV = nullptr;
4495  if (F.Scale != 0) {
4496  const SCEV *ScaledS = F.ScaledReg;
4497 
4498  // If we're expanding for a post-inc user, make the post-inc adjustment.
4499  PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4500  ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
4501  LF.UserInst, LF.OperandValToReplace,
4502  Loops, SE, DT);
4503 
4504  if (LU.Kind == LSRUse::ICmpZero) {
4505  // Expand ScaleReg as if it was part of the base regs.
4506  if (F.Scale == 1)
4507  Ops.push_back(
4508  SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
4509  else {
4510  // An interesting way of "folding" with an icmp is to use a negated
4511  // scale, which we'll implement by inserting it into the other operand
4512  // of the icmp.
4513  assert(F.Scale == -1 &&
4514  "The only scale supported by ICmpZero uses is -1!");
4515  ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
4516  }
4517  } else {
4518  // Otherwise just expand the scaled register and an explicit scale,
4519  // which is expected to be matched as part of the address.
4520 
4521  // Flush the operand list to suppress SCEVExpander hoisting address modes.
4522  // Unless the addressing mode will not be folded.
4523  if (!Ops.empty() && LU.Kind == LSRUse::Address &&
4524  isAMCompletelyFolded(TTI, LU, F)) {
4525  Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4526  Ops.clear();
4527  Ops.push_back(SE.getUnknown(FullV));
4528  }
4529  ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
4530  if (F.Scale != 1)
4531  ScaledS =
4532  SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
4533  Ops.push_back(ScaledS);
4534  }
4535  }
4536 
4537  // Expand the GV portion.
4538  if (F.BaseGV) {
4539  // Flush the operand list to suppress SCEVExpander hoisting.
4540  if (!Ops.empty()) {
4541  Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4542  Ops.clear();
4543  Ops.push_back(SE.getUnknown(FullV));
4544  }
4545  Ops.push_back(SE.getUnknown(F.BaseGV));
4546  }
4547 
4548  // Flush the operand list to suppress SCEVExpander hoisting of both folded and
4549  // unfolded offsets. LSR assumes they both live next to their uses.
4550  if (!Ops.empty()) {
4551  Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4552  Ops.clear();
4553  Ops.push_back(SE.getUnknown(FullV));
4554  }
4555 
4556  // Expand the immediate portion.
4557  int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
4558  if (Offset != 0) {
4559  if (LU.Kind == LSRUse::ICmpZero) {
4560  // The other interesting way of "folding" with an ICmpZero is to use a
4561  // negated immediate.
4562  if (!ICmpScaledV)
4563  ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
4564  else {
4565  Ops.push_back(SE.getUnknown(ICmpScaledV));
4566  ICmpScaledV = ConstantInt::get(IntTy, Offset);
4567  }
4568  } else {
4569  // Just add the immediate values. These again are expected to be matched
4570  // as part of the address.
4571  Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
4572  }
4573  }
4574 
4575  // Expand the unfolded offset portion.
4576  int64_t UnfoldedOffset = F.UnfoldedOffset;
4577  if (UnfoldedOffset != 0) {
4578  // Just add the immediate values.
4579  Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
4580  UnfoldedOffset)));
4581  }
4582 
4583  // Emit instructions summing all the operands.
4584  const SCEV *FullS = Ops.empty() ?
4585  SE.getConstant(IntTy, 0) :
4586  SE.getAddExpr(Ops);
4587  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
4588 
4589  // We're done expanding now, so reset the rewriter.
4590  Rewriter.clearPostInc();
4591 
4592  // An ICmpZero Formula represents an ICmp which we're handling as a
4593  // comparison against zero. Now that we've expanded an expression for that
4594  // form, update the ICmp's other operand.
4595  if (LU.Kind == LSRUse::ICmpZero) {
4596  ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4597  DeadInsts.emplace_back(CI->getOperand(1));
4598  assert(!F.BaseGV && "ICmp does not support folding a global value and "
4599  "a scale at the same time!");
4600  if (F.Scale == -1) {
4601  if (ICmpScaledV->getType() != OpTy) {
4602  Instruction *Cast =
4603  CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
4604  OpTy, false),
4605  ICmpScaledV, OpTy, "tmp", CI);
4606  ICmpScaledV = Cast;
4607  }
4608  CI->setOperand(1, ICmpScaledV);
4609  } else {
4610  // A scale of 1 means that the scale has been expanded as part of the
4611  // base regs.
4612  assert((F.Scale == 0 || F.Scale == 1) &&
4613  "ICmp does not support folding a global value and "
4614  "a scale at the same time!");
4615  Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
4616  -(uint64_t)Offset);
4617  if (C->getType() != OpTy)
4618  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
4619  OpTy, false),
4620  C, OpTy);
4621 
4622  CI->setOperand(1, C);
4623  }
4624  }
4625 
4626  return FullV;
4627 }
4628 
4629 /// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
4630 /// of their operands effectively happens in their predecessor blocks, so the
4631 /// expression may need to be expanded in multiple places.
4632 void LSRInstance::RewriteForPHI(PHINode *PN,
4633  const LSRFixup &LF,
4634  const Formula &F,
4635  SCEVExpander &Rewriter,
4636  SmallVectorImpl<WeakVH> &DeadInsts,
4637  Pass *P) const {
4639  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
4640  if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
4641  BasicBlock *BB = PN->getIncomingBlock(i);
4642 
4643  // If this is a critical edge, split the edge so that we do not insert
4644  // the code on all predecessor/successor paths. We do this unless this
4645  // is the canonical backedge for this loop, which complicates post-inc
4646  // users.
4647  if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
4648  !isa<IndirectBrInst>(BB->getTerminator())) {
4649  BasicBlock *Parent = PN->getParent();
4650  Loop *PNLoop = LI.getLoopFor(Parent);
4651  if (!PNLoop || Parent != PNLoop->getHeader()) {
4652  // Split the critical edge.
4653  BasicBlock *NewBB = nullptr;
4654  if (!Parent->isLandingPad()) {
4655  NewBB = SplitCriticalEdge(BB, Parent,
4657  .setMergeIdenticalEdges()
4658  .setDontDeleteUselessPHIs());
4659  } else {
4661  SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
4662  /*AliasAnalysis*/ nullptr, &DT, &LI);
4663  NewBB = NewBBs[0];
4664  }
4665  // If NewBB==NULL, then SplitCriticalEdge refused to split because all
4666  // phi predecessors are identical. The simple thing to do is skip
4667  // splitting in this case rather than complicate the API.
4668  if (NewBB) {
4669  // If PN is outside of the loop and BB is in the loop, we want to
4670  // move the block to be immediately before the PHI block, not
4671  // immediately after BB.
4672  if (L->contains(BB) && !L->contains(PN))
4673  NewBB->moveBefore(PN->getParent());
4674 
4675  // Splitting the edge can reduce the number of PHI entries we have.
4676  e = PN->getNumIncomingValues();
4677  BB = NewBB;
4678  i = PN->getBasicBlockIndex(BB);
4679  }
4680  }
4681  }
4682 
4683  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
4684  Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
4685  if (!Pair.second)
4686  PN->setIncomingValue(i, Pair.first->second);
4687  else {
4688  Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
4689 
4690  // If this is reuse-by-noop-cast, insert the noop cast.
4691  Type *OpTy = LF.OperandValToReplace->getType();
4692  if (FullV->getType() != OpTy)
4693  FullV =
4694  CastInst::Create(CastInst::getCastOpcode(FullV, false,
4695  OpTy, false),
4696  FullV, LF.OperandValToReplace->getType(),
4697  "tmp", BB->getTerminator());
4698 
4699  PN->setIncomingValue(i, FullV);
4700  Pair.first->second = FullV;
4701  }
4702  }
4703 }
4704 
4705 /// Rewrite - Emit instructions for the leading candidate expression for this
4706 /// LSRUse (this is called "expanding"), and update the UserInst to reference
4707 /// the newly expanded value.
4708 void LSRInstance::Rewrite(const LSRFixup &LF,
4709  const Formula &F,
4710  SCEVExpander &Rewriter,
4711  SmallVectorImpl<WeakVH> &DeadInsts,
4712  Pass *P) const {
4713  // First, find an insertion point that dominates UserInst. For PHI nodes,
4714  // find the nearest block which dominates all the relevant uses.
4715  if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
4716  RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
4717  } else {
4718  Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
4719 
4720  // If this is reuse-by-noop-cast, insert the noop cast.
4721  Type *OpTy = LF.OperandValToReplace->getType();
4722  if (FullV->getType() != OpTy) {
4723  Instruction *Cast =
4724  CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
4725  FullV, OpTy, "tmp", LF.UserInst);
4726  FullV = Cast;
4727  }
4728 
4729  // Update the user. ICmpZero is handled specially here (for now) because
4730  // Expand may have updated one of the operands of the icmp already, and
4731  // its new value may happen to be equal to LF.OperandValToReplace, in
4732  // which case doing replaceUsesOfWith leads to replacing both operands
4733  // with the same value. TODO: Reorganize this.
4734  if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
4735  LF.UserInst->setOperand(0, FullV);
4736  else
4737  LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
4738  }
4739 
4740  DeadInsts.emplace_back(LF.OperandValToReplace);
4741 }
4742 
4743 /// ImplementSolution - Rewrite all the fixup locations with new values,
4744 /// following the chosen solution.
4745 void
4746 LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
4747  Pass *P) {
4748  // Keep track of instructions we may have made dead, so that
4749  // we can remove them after we are done working.
4750  SmallVector<WeakVH, 16> DeadInsts;
4751 
4752  SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
4753  "lsr");
4754 #ifndef NDEBUG
4755  Rewriter.setDebugType(DEBUG_TYPE);
4756 #endif
4757  Rewriter.disableCanonicalMode();
4758  Rewriter.enableLSRMode();
4759  Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
4760 
4761  // Mark phi nodes that terminate chains so the expander tries to reuse them.
4762  for (const IVChain &Chain : IVChainVec) {
4763  if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
4764  Rewriter.setChainedPhi(PN);
4765  }
4766 
4767  // Expand the new value definitions and update the users.
4768  for (const LSRFixup &Fixup : Fixups) {
4769  Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
4770 
4771  Changed = true;
4772  }
4773 
4774  for (const IVChain &Chain : IVChainVec) {
4775  GenerateIVChain(Chain, Rewriter, DeadInsts);
4776  Changed = true;
4777  }
4778  // Clean up after ourselves. This must be done before deleting any
4779  // instructions.
4780  Rewriter.clear();
4781 
4782  Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
4783 }
4784 
4785 LSRInstance::LSRInstance(Loop *L, Pass *P)
4786  : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
4787  DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
4788  LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
4789  TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
4790  *L->getHeader()->getParent())),
4791  L(L), Changed(false), IVIncInsertPos(nullptr) {
4792  // If LoopSimplify form is not available, stay out of trouble.
4793  if (!L->isLoopSimplifyForm())
4794  return;
4795 
4796  // If there's no interesting work to be done, bail early.
4797  if (IU.empty()) return;
4798 
4799  // If there's too much analysis to be done, bail early. We won't be able to
4800  // model the problem anyway.
4801  unsigned NumUsers = 0;
4802  for (const IVStrideUse &U : IU) {
4803  if (++NumUsers > MaxIVUsers) {
4804  (void)U;
4805  DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n");
4806  return;
4807  }
4808  }
4809 
4810 #ifndef NDEBUG
4811  // All dominating loops must have preheaders, or SCEVExpander may not be able
4812  // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
4813  //
4814  // IVUsers analysis should only create users that are dominated by simple loop
4815  // headers. Since this loop should dominate all of its users, its user list
4816  // should be empty if this loop itself is not within a simple loop nest.
4817  for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
4818  Rung; Rung = Rung->getIDom()) {
4819  BasicBlock *BB = Rung->getBlock();
4820  const Loop *DomLoop = LI.getLoopFor(BB);
4821  if (DomLoop && DomLoop->getHeader() == BB) {
4822  assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
4823  }
4824  }
4825 #endif // DEBUG
4826 
4827  DEBUG(dbgs() << "\nLSR on loop ";
4828  L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
4829  dbgs() << ":\n");
4830 
4831  // First, perform some low-level loop optimizations.
4832  OptimizeShadowIV();
4833  OptimizeLoopTermCond();
4834 
4835  // If loop preparation eliminates all interesting IV users, bail.
4836  if (IU.empty()) return;
4837 
4838  // Skip nested loops until we can model them better with formulae.
4839  if (!L->empty()) {
4840  DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
4841  return;
4842  }
4843 
4844  // Start collecting data and preparing for the solver.
4845  CollectChains();
4846  CollectInterestingTypesAndFactors();
4847  CollectFixupsAndInitialFormulae();
4848  CollectLoopInvariantFixupsAndFormulae();
4849 
4850  assert(!Uses.empty() && "IVUsers reported at least one use");
4851  DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
4852  print_uses(dbgs()));
4853 
4854  // Now use the reuse data to generate a bunch of interesting ways
4855  // to formulate the values needed for the uses.
4856  GenerateAllReuseFormulae();
4857 
4858  FilterOutUndesirableDedicatedRegisters();
4859  NarrowSearchSpaceUsingHeuristics();
4860 
4862  Solve(Solution);
4863 
4864  // Release memory that is no longer needed.
4865  Factors.clear();
4866  Types.clear();
4867  RegUses.clear();
4868 
4869  if (Solution.empty())
4870  return;
4871 
4872 #ifndef NDEBUG
4873  // Formulae should be legal.
4874  for (const LSRUse &LU : Uses) {
4875  for (const Formula &F : LU.Formulae)
4876  assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4877  F) && "Illegal formula generated!");
4878  };
4879 #endif
4880 
4881  // Now that we've decided what we want, make it so.
4882  ImplementSolution(Solution, P);
4883 }
4884 
4885 void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
4886  if (Factors.empty() && Types.empty()) return;
4887 
4888  OS << "LSR has identified the following interesting factors and types: ";
4889  bool First = true;
4890 
4891  for (int64_t Factor : Factors) {
4892  if (!First) OS << ", ";
4893  First = false;
4894  OS << '*' << Factor;
4895  }
4896 
4897  for (Type *Ty : Types) {
4898  if (!First) OS << ", ";
4899  First = false;
4900  OS << '(' << *Ty << ')';
4901  }
4902  OS << '\n';
4903 }
4904 
4905 void LSRInstance::print_fixups(raw_ostream &OS) const {
4906  OS << "LSR is examining the following fixup sites:\n";
4907  for (const LSRFixup &LF : Fixups) {
4908  dbgs() << " ";
4909  LF.print(OS);
4910  OS << '\n';
4911  }
4912 }
4913 
4914 void LSRInstance::print_uses(raw_ostream &OS) const {
4915  OS << "LSR is examining the following uses:\n";
4916  for (const LSRUse &LU : Uses) {
4917  dbgs() << " ";
4918  LU.print(OS);
4919  OS << '\n';
4920  for (const Formula &F : LU.Formulae) {
4921  OS << " ";
4922  F.print(OS);
4923  OS << '\n';
4924  }
4925  }
4926 }
4927 
4928 void LSRInstance::print(raw_ostream &OS) const {
4929  print_factors_and_types(OS);
4930  print_fixups(OS);
4931  print_uses(OS);
4932 }
4933 
4934 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4935 void LSRInstance::dump() const {
4936  print(errs()); errs() << '\n';
4937 }
4938 #endif
4939 
4940 namespace {
4941 
4942 class LoopStrengthReduce : public LoopPass {
4943 public:
4944  static char ID; // Pass ID, replacement for typeid
4945  LoopStrengthReduce();
4946 
4947 private:
4948  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
4949  void getAnalysisUsage(AnalysisUsage &AU) const override;
4950 };
4951 
4952 }
4953 
4954 char LoopStrengthReduce::ID = 0;
4955 INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
4956  "Loop Strength Reduction", false, false)
4962 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
4963 INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
4964  "Loop Strength Reduction", false, false)
4965 
4966 
4968  return new LoopStrengthReduce();
4969 }
4970 
4971 LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
4973 }
4974 
4975 void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
4976  // We split critical edges, so we change the CFG. However, we do update
4977  // many analyses if they are around.
4979 
4987  // Requiring LoopSimplify a second time here prevents IVUsers from running
4988  // twice, since LoopSimplify was invalidated by running ScalarEvolution.
4990  AU.addRequired<IVUsers>();
4991  AU.addPreserved<IVUsers>();
4993 }
4994 
4995 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
4996  if (skipOptnoneFunction(L))
4997  return false;
4998 
4999  bool Changed = false;
5000 
5001  // Run the main LSR transformation.
5002  Changed |= LSRInstance(L, this).getChanged();
5003 
5004  // Remove any extra phis created by processing inner loops.
5005  Changed |= DeleteDeadPHIs(L->getHeader());
5006  if (EnablePhiElim && L->isLoopSimplifyForm()) {
5007  SmallVector<WeakVH, 16> DeadInsts;
5008  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
5009  SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
5010 #ifndef NDEBUG
5011  Rewriter.setDebugType(DEBUG_TYPE);
5012 #endif
5013  unsigned numFolded = Rewriter.replaceCongruentIVs(
5014  L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
5015  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5016  *L->getHeader()->getParent()));
5017  if (numFolded) {
5018  Changed = true;
5020  DeleteDeadPHIs(L->getHeader());
5021  }
5022  }
5023  return Changed;
5024 }
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:82
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
iterator_range< use_iterator > uses()
Definition: Value.h:283
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT abs() const
Get the absolute value;.
Definition: APInt.h:1571
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallPtrSet.h:78
const SCEV * TransformForPostIncUse(TransformKind Kind, const SCEV *S, Instruction *User, Value *OperandValToReplace, PostIncLoopSet &Loops, ScalarEvolution &SE, DominatorTree &DT)
TransformForPostIncUse - Transform the given expression according to the given transformation kind...
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
isExistingPhi - Return true if this AddRec is already a phi in its loop.
Pass * createLoopStrengthReducePass()
SmallBitVector - This is a 'bitvector' (really, a variable-sized bit array), optimized for the case w...
const SCEV * getConstant(ConstantInt *V)
static void Found()
LLVMContext & getContext() const
bool isZero() const
isZero - Return true if the expression is a constant zero.
static const size_t ComplexityLimit
DenseSet - This implements a dense probed hash-table based set.
Definition: DenseSet.h:39
unsigned less than
ScalarEvolution - This class is the main scalar evolution driver.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:78
Denormalize - Perform the inverse transform on the expression with the given loop set...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale)
isLegalUse - Test whether we know how to expand the current formula.
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for bail out...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
properlyDominates - Return true if elements that makes up the given SCEV properly dominate the specif...
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:378
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
getStepRecurrence - This method constructs and returns the recurrence indicating how much this expres...
void initializeLoopStrengthReducePass(PassRegistry &)
void setDebugType(const char *s)
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
int getFPMantissaWidth() const
getFPMantissaWidth - Return the width of the mantissa of this type.
Definition: Type.cpp:146
bool isLoopInvariant(const SCEV *S, const Loop *L)
isLoopInvariant - Return true if the value of the given SCEV is unchanging in the specified loop...
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
static bool isCanonical(const MDString *S)
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
F(f)
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:822
iv Induction Variable Users
Definition: IVUsers.cpp:43
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void reserve(size_type N)
Definition: SmallVector.h:401
bool isTrueWhenEqual(CondCode Cond)
isTrueWhenEqual - Return true if the specified condition returns true if the two operands to the cond...
Definition: ISDOpcodes.h:850
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:79
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:64
op_iterator op_begin()
Definition: User.h:183
BlockT * getHeader() const
Definition: LoopInfo.h:96
SCEVCastExpr - This is the base class for unary cast operator classes.
const SCEV * getStart() const
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:156
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:242
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:319
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
#define DEBUG_TYPE
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
Hexagon Hardware Loops
bool isUnconditional() const
SelectInst - This class represents the LLVM 'select' instruction.
Option class for critical edge splitting.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:106
void clearPostInc()
Disable all post-inc expansion.
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
findIVOperand - Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop...
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
Reg
All possible values of the reg field in the ModR/M byte.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
getExitingBlocks - Return all blocks inside the loop that have successors outside of the loop...
Definition: LoopInfoImpl.h:35
uint64_t getTypeSizeInBits(Type *Ty) const
getTypeSizeInBits - Return the size in bits of the specified type, for which isSCEVable must return t...
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:250
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Instruction * clone() const
clone() - Create a copy of 'this' instruction that is identical in all ways except the following: ...
op_iterator op_begin() const
SCEVMulExpr - This node represents multiplication of some number of SCEVs.
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
#define G(x, y, z)
Definition: MD5.cpp:52
bool isLoopSimplifyForm() const
isLoopSimplifyForm - Return true if the Loop is in the form that the LoopSimplify form transforms loo...
Definition: LoopInfo.cpp:199
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:117
loop Loop Strength false
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:102
void clear()
Definition: SmallSet.h:107
LLVMContext & getContext() const
getContext - Return the LLVMContext in which this type was uniqued.
Definition: Type.h:125
SCEVAddRecExpr - This node represents a polynomial recurrence on the trip count of the specified loop...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:238
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:69
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
DeleteDeadPHIs - Examine each PHI in the given block and delete it if it is dead. ...
Base class for the actual dominator tree node.
AnalysisUsage & addPreservedID(const void *ID)
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:109
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
Type * getEffectiveSCEVType(Type *Ty) const
getEffectiveSCEVType - Return a type with the same bitwidth as the given type and which represents ho...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1316
This class represents a truncation of integer types.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
getAddRecExpr - Get an add recurrence expression for the specified loop.
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
unsigned getNumIncomingValues() const
getNumIncomingValues - Return the number of incoming edges
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:24
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
Definition: InstrTypes.h:57
BasicBlock * SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions())
SplitCriticalEdge - If this edge is a critical edge, insert a new node to split the critical edge...
static bool isCompatibleIVType(Value *LVal, Value *RVal)
isCompatibleIVType - Return true if we allow an IV chain to include both types.
#define P(N)
SCEVUnknown - This means that we are dealing with an entirely unknown SCEV value, and only represent ...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
#define true
Definition: ConvertUTF.c:66
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
bool isAffine() const
isAffine - Return true if this represents an expression A + B*x where A and B are loop invariant valu...
bool isSCEVable(Type *Ty) const
isSCEVable - Test if values of the given type are analyzable within the SCEV framework.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Definition: IVUsers.h:50
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
BlockT * getLoopPreheader() const
getLoopPreheader - If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:108
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg)
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
isMulSExtable - Return true if the given mul can be sign-extended without changing its value...
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
PointerIntPair - This class implements a pair of a pointer and small integer.
SCEVUDivExpr - This class represents a binary unsigned division operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
DoInitialMatch - Recursion helper for InitialMatch.
static volatile int One
Definition: InfiniteTest.cpp:9
Type * getType() const
getType - Return the LLVM type of this SCEV expression.
BranchInst - Conditional or Unconditional Branch instruction.
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:65
This is an important base class in LLVM.
Definition: Constant.h:41
const SCEV * getOperand(unsigned i) const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Normalize - Normalize according to the given loops.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:230
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:185
BasicBlock * getIncomingBlock(unsigned i) const
getIncomingBlock - Return incoming basic block number i.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
This instruction compares its operands according to the predicate given to the constructor.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:697
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
ExtractImmediate - If S involves the addition of a constant integer value, return that integer value...
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1876
Value * getOperand(unsigned i) const
Definition: User.h:118
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
op_range operands()
Definition: User.h:191
iterator begin() const
Definition: SmallPtrSet.h:286
static Type * getAccessType(const Instruction *Inst)
getAccessType - Return the type of the memory being accessed.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:760
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
Definition: Constants.cpp:230
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:416
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
iterator erase(iterator I)
Definition: SmallVector.h:455
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
isAddSExtable - Return true if the given add can be sign-extended without changing its value...
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
APInt LLVM_ATTRIBUTE_UNUSED_RESULT srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1924
char & LoopSimplifyID
void setChainedPhi(PHINode *PN)
PowerPC TLS Dynamic Call Fixup
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:214
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
getExactSDiv - Return an expression for LHS /s RHS, if it can be determined and if the remainder is k...
signed less than
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:304
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:217
void clear()
clear - Clear all bits.
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:147
DomTreeNodeBase< NodeT > * getIDom() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value * getIncomingValue(unsigned i) const
getIncomingValue - Return incoming value number x
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:276
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
Definition: IVUsers.h:56
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the input value to the specified...
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
void setIVIncInsertPos(const Loop *L, Instruction *Pos)
Set the current IV increment loop and position.
static Value * getWideOperand(Value *Oper)
getWideOperand - IVChain logic must consistenctly peek base TruncInst operands, so wrap it in a conve...
CHAIN = SC CHAIN, Imm128 - System call.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:332
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
ConstantInt * getValue() const
ppc loop data prefetch
void setOperand(unsigned i, Value *Val)
Definition: User.h:122
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
bool isAllOnesValue() const
isAllOnesValue - Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:88
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
size_t size() const
size - Returns the number of bits in this bitvector.
Class for arbitrary precision integers.
Definition: APInt.h:73
SCEVAddExpr - This node represents an addition of some number of SCEVs.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty)
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, AliasAnalysis *AA=nullptr, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, bool PreserveLCSSA=false)
SplitLandingPadPredecessors - This method transforms the landing pad, OrigBB, by introducing two new ...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
SCEVSMaxExpr - This class represents a signed maximum selection.
static bool isAddressUse(Instruction *Inst, Value *OperandVal)
isAddressUse - Returns true if the specified instruction is using the specified value as an address...
iterator_range< user_iterator > users()
Definition: Value.h:300
NodeT * getBlock() const
This class uses information about analyze scalars to rewrite expressions in canonical form...
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:481
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getAddExpr - Get a canonical add expression, or something simpler if possible.
loop Loop Strength Reduction
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:481
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
Virtual Register Rewriter
Definition: VirtRegMap.cpp:190
iterator end() const
Definition: SmallPtrSet.h:289
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:337
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Value * getCondition() const
void emplace_back(ArgTypes &&...Args)
Definition: SmallVector.h:652
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:367
SCEV - This class represents an analyzed expression in the program.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:413
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:311
static const SCEV * getExprBase(const SCEV *S)
getExprBase - Return an approximation of this SCEV expression's "base", or NULL for any constant...
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:651
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:37
INITIALIZE_PASS_BEGIN(LoopStrengthReduce,"loop-reduce","Loop Strength Reduction", false, false) INITIALIZE_PASS_END(LoopStrengthReduce
This class represents a cast unsigned integer to floating point.
SCEVUMaxExpr - This class represents an unsigned maximum selection.
const Loop * getLoop() const
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Definition: IVUsers.cpp:364
loop reduce
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
signed less or equal
const SCEV * getBackedgeTakenCount(const Loop *L)
getBackedgeTakenCount - If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCouldNotCompute object.
bool use_empty() const
Definition: Value.h:275
This class represents a cast from signed integer to floating point.
static bool DeleteTriviallyDeadInstructions(SmallVectorImpl< WeakVH > &DeadInsts)
DeleteTriviallyDeadInstructions - If any of the instructions is the specified set are trivially dead...
const ARM::ArchExtKind Kind
unsigned getSCEVType() const
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:332
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
isInstructionTriviallyDead - Return true if the result produced by the instruction is not used...
Definition: Local.cpp:282
LLVM Value Representation.
Definition: Value.h:69
const SCEV * getSCEV(Value *V)
getSCEV - Return a SCEV expression for the full generality of the specified expression.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
ExtractSymbol - If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a new SCEV with that value excluded.
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
moveBefore - Unlink this instruction from its current basic block and insert it into the basic block ...
Definition: Instruction.cpp:89
IVStrideUse - Keep track of one use of a strided induction variable.
Definition: IVUsers.h:37
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
CollectSubexprs - Split S into subexpressions which can be pulled out into separate registers...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
bool empty() const
Definition: LoopInfo.h:135
#define DEBUG(X)
Definition: Debug.h:92
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
const SCEV * getUnknown(Value *V)
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:737
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
inline cost
op_iterator op_end() const
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
This pass exposes codegen information to IR-level passes.
DomTreeNodeBase< NodeT > * getNode(NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
hasComputableLoopEvolution - Return true if the given SCEV changes value in a known way in the specif...
SCEVNAryExpr - This node is a base class providing common functionality for n'ary operators...
void setIncomingValue(unsigned i, Value *V)
void setPostInc(const PostIncLoopSet &L)
Enable post-inc expansion for addrecs referring to the given loops.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:125
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned getLoopDepth() const
getLoopDepth - Return the nesting level of this loop.
Definition: LoopInfo.h:89
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMulExpr - Get a canonical multiply expression, or something simpler if possible.
int getBasicBlockIndex(const BasicBlock *BB) const
getBasicBlockIndex - Return the first index of the specified basic block in the value list for this P...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103
const BasicBlock * getParent() const
Definition: Instruction.h:72
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:169
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
isAddRecSExtable - Return true if the given addrec can be sign-extended without changing its value...
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
SCEVConstant - This class represents a constant integer value.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
void resize(size_type N)
Definition: SmallVector.h:376