Bug Summary

File:lib/Transforms/Scalar/LoopStrengthReduce.cpp
Warning:line 4153, column 5
Forming reference to null pointer

Annotated Source Code

1//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
14// This pass performs a strength reduction on array references inside loops that
15// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
19//
20// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated and filtered.
37//
38// TODO: Handle multiple loops at a time.
39//
40// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41// of a GlobalValue?
42//
43// TODO: When truncation is free, truncate ICmp users' operands to make it a
44// smaller encoding (on x86 at least).
45//
46// TODO: When a negated register is used by an add (such as in a list of
47// multiple base registers, or as the increment expression in an addrec),
48// we may not actually need both reg and (-1 * reg) in registers; the
49// negation can be implemented by using a sub instead of an add. The
50// lack of support for taking this into consideration when making
51// register pressure decisions is partly worked around by the "Special"
52// use kind.
53//
54//===----------------------------------------------------------------------===//
55
56#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/Hashing.h"
59#include "llvm/ADT/STLExtras.h"
60#include "llvm/ADT/SetVector.h"
61#include "llvm/ADT/SmallBitVector.h"
62#include "llvm/Analysis/IVUsers.h"
63#include "llvm/Analysis/LoopPass.h"
64#include "llvm/Analysis/LoopPassManager.h"
65#include "llvm/Analysis/ScalarEvolutionExpander.h"
66#include "llvm/Analysis/TargetTransformInfo.h"
67#include "llvm/IR/Constants.h"
68#include "llvm/IR/DerivedTypes.h"
69#include "llvm/IR/Dominators.h"
70#include "llvm/IR/Instructions.h"
71#include "llvm/IR/IntrinsicInst.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/ValueHandle.h"
74#include "llvm/Support/CommandLine.h"
75#include "llvm/Support/Debug.h"
76#include "llvm/Support/raw_ostream.h"
77#include "llvm/Transforms/Scalar.h"
78#include "llvm/Transforms/Utils/BasicBlockUtils.h"
79#include "llvm/Transforms/Utils/Local.h"
80#include <algorithm>
81using namespace llvm;
82
83#define DEBUG_TYPE"loop-reduce" "loop-reduce"
84
85/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
86/// bail out. This threshold is far beyond the number of users that LSR can
87/// conceivably solve, so it should not affect generated code, but catches the
88/// worst cases before LSR burns too much compile time and stack space.
89static const unsigned MaxIVUsers = 200;
90
91// Temporary flag to cleanup congruent phis after LSR phi expansion.
92// It's currently disabled until we can determine whether it's truly useful or
93// not. The flag should be removed after the v3.0 release.
94// This is now needed for ivchains.
95static cl::opt<bool> EnablePhiElim(
96 "enable-lsr-phielim", cl::Hidden, cl::init(true),
97 cl::desc("Enable LSR phi elimination"));
98
99#ifndef NDEBUG
100// Stress test IV chain generation.
101static cl::opt<bool> StressIVChain(
102 "stress-ivchain", cl::Hidden, cl::init(false),
103 cl::desc("Stress test LSR IV chains"));
104#else
105static bool StressIVChain = false;
106#endif
107
108namespace {
109
110struct MemAccessTy {
111 /// Used in situations where the accessed memory type is unknown.
112 static const unsigned UnknownAddressSpace = ~0u;
113
114 Type *MemTy;
115 unsigned AddrSpace;
116
117 MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
118
119 MemAccessTy(Type *Ty, unsigned AS) :
120 MemTy(Ty), AddrSpace(AS) {}
121
122 bool operator==(MemAccessTy Other) const {
123 return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
124 }
125
126 bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
127
128 static MemAccessTy getUnknown(LLVMContext &Ctx) {
129 return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
130 }
131};
132
133/// This class holds data which is used to order reuse candidates.
134class RegSortData {
135public:
136 /// This represents the set of LSRUse indices which reference
137 /// a particular register.
138 SmallBitVector UsedByIndices;
139
140 void print(raw_ostream &OS) const;
141 void dump() const;
142};
143
144}
145
146void RegSortData::print(raw_ostream &OS) const {
147 OS << "[NumUses=" << UsedByIndices.count() << ']';
148}
149
150LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__))
151void RegSortData::dump() const {
152 print(errs()); errs() << '\n';
153}
154
155namespace {
156
157/// Map register candidates to information about how they are used.
158class RegUseTracker {
159 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
160
161 RegUsesTy RegUsesMap;
162 SmallVector<const SCEV *, 16> RegSequence;
163
164public:
165 void countRegister(const SCEV *Reg, size_t LUIdx);
166 void dropRegister(const SCEV *Reg, size_t LUIdx);
167 void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
168
169 bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
170
171 const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
172
173 void clear();
174
175 typedef SmallVectorImpl<const SCEV *>::iterator iterator;
176 typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
177 iterator begin() { return RegSequence.begin(); }
178 iterator end() { return RegSequence.end(); }
179 const_iterator begin() const { return RegSequence.begin(); }
180 const_iterator end() const { return RegSequence.end(); }
181};
182
183}
184
185void
186RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
187 std::pair<RegUsesTy::iterator, bool> Pair =
188 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
189 RegSortData &RSD = Pair.first->second;
190 if (Pair.second)
191 RegSequence.push_back(Reg);
192 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
193 RSD.UsedByIndices.set(LUIdx);
194}
195
196void
197RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
198 RegUsesTy::iterator It = RegUsesMap.find(Reg);
199 assert(It != RegUsesMap.end())((It != RegUsesMap.end()) ? static_cast<void> (0) : __assert_fail
("It != RegUsesMap.end()", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 199, __PRETTY_FUNCTION__))
;
200 RegSortData &RSD = It->second;
201 assert(RSD.UsedByIndices.size() > LUIdx)((RSD.UsedByIndices.size() > LUIdx) ? static_cast<void>
(0) : __assert_fail ("RSD.UsedByIndices.size() > LUIdx", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 201, __PRETTY_FUNCTION__))
;
202 RSD.UsedByIndices.reset(LUIdx);
203}
204
205void
206RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
207 assert(LUIdx <= LastLUIdx)((LUIdx <= LastLUIdx) ? static_cast<void> (0) : __assert_fail
("LUIdx <= LastLUIdx", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 207, __PRETTY_FUNCTION__))
;
208
209 // Update RegUses. The data structure is not optimized for this purpose;
210 // we must iterate through it and update each of the bit vectors.
211 for (auto &Pair : RegUsesMap) {
212 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
213 if (LUIdx < UsedByIndices.size())
214 UsedByIndices[LUIdx] =
215 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
216 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
217 }
218}
219
220bool
221RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
222 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
223 if (I == RegUsesMap.end())
224 return false;
225 const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
226 int i = UsedByIndices.find_first();
227 if (i == -1) return false;
228 if ((size_t)i != LUIdx) return true;
229 return UsedByIndices.find_next(i) != -1;
230}
231
232const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
233 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
234 assert(I != RegUsesMap.end() && "Unknown register!")((I != RegUsesMap.end() && "Unknown register!") ? static_cast
<void> (0) : __assert_fail ("I != RegUsesMap.end() && \"Unknown register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 234, __PRETTY_FUNCTION__))
;
235 return I->second.UsedByIndices;
236}
237
238void RegUseTracker::clear() {
239 RegUsesMap.clear();
240 RegSequence.clear();
241}
242
243namespace {
244
245/// This class holds information that describes a formula for computing
246/// satisfying a use. It may include broken-out immediates and scaled registers.
247struct Formula {
248 /// Global base address used for complex addressing.
249 GlobalValue *BaseGV;
250
251 /// Base offset for complex addressing.
252 int64_t BaseOffset;
253
254 /// Whether any complex addressing has a base register.
255 bool HasBaseReg;
256
257 /// The scale of any complex addressing.
258 int64_t Scale;
259
260 /// The list of "base" registers for this use. When this is non-empty. The
261 /// canonical representation of a formula is
262 /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
263 /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
264 /// #1 enforces that the scaled register is always used when at least two
265 /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
266 /// #2 enforces that 1 * reg is reg.
267 /// This invariant can be temporarly broken while building a formula.
268 /// However, every formula inserted into the LSRInstance must be in canonical
269 /// form.
270 SmallVector<const SCEV *, 4> BaseRegs;
271
272 /// The 'scaled' register for this use. This should be non-null when Scale is
273 /// not zero.
274 const SCEV *ScaledReg;
275
276 /// An additional constant offset which added near the use. This requires a
277 /// temporary register, but the offset itself can live in an add immediate
278 /// field rather than a register.
279 int64_t UnfoldedOffset;
280
281 Formula()
282 : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
283 ScaledReg(nullptr), UnfoldedOffset(0) {}
284
285 void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
286
287 bool isCanonical() const;
288
289 void canonicalize();
290
291 bool unscale();
292
293 size_t getNumRegs() const;
294 Type *getType() const;
295
296 void deleteBaseReg(const SCEV *&S);
297
298 bool referencesReg(const SCEV *S) const;
299 bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
300 const RegUseTracker &RegUses) const;
301
302 void print(raw_ostream &OS) const;
303 void dump() const;
304};
305
306}
307
308/// Recursion helper for initialMatch.
309static void DoInitialMatch(const SCEV *S, Loop *L,
310 SmallVectorImpl<const SCEV *> &Good,
311 SmallVectorImpl<const SCEV *> &Bad,
312 ScalarEvolution &SE) {
313 // Collect expressions which properly dominate the loop header.
314 if (SE.properlyDominates(S, L->getHeader())) {
315 Good.push_back(S);
316 return;
317 }
318
319 // Look at add operands.
320 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
321 for (const SCEV *S : Add->operands())
322 DoInitialMatch(S, L, Good, Bad, SE);
323 return;
324 }
325
326 // Look at addrec operands.
327 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
328 if (!AR->getStart()->isZero()) {
329 DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
330 DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
331 AR->getStepRecurrence(SE),
332 // FIXME: AR->getNoWrapFlags()
333 AR->getLoop(), SCEV::FlagAnyWrap),
334 L, Good, Bad, SE);
335 return;
336 }
337
338 // Handle a multiplication by -1 (negation) if it didn't fold.
339 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
340 if (Mul->getOperand(0)->isAllOnesValue()) {
341 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
342 const SCEV *NewMul = SE.getMulExpr(Ops);
343
344 SmallVector<const SCEV *, 4> MyGood;
345 SmallVector<const SCEV *, 4> MyBad;
346 DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
347 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
348 SE.getEffectiveSCEVType(NewMul->getType())));
349 for (const SCEV *S : MyGood)
350 Good.push_back(SE.getMulExpr(NegOne, S));
351 for (const SCEV *S : MyBad)
352 Bad.push_back(SE.getMulExpr(NegOne, S));
353 return;
354 }
355
356 // Ok, we can't do anything interesting. Just stuff the whole thing into a
357 // register and hope for the best.
358 Bad.push_back(S);
359}
360
361/// Incorporate loop-variant parts of S into this Formula, attempting to keep
362/// all loop-invariant and loop-computable values in a single base register.
363void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
364 SmallVector<const SCEV *, 4> Good;
365 SmallVector<const SCEV *, 4> Bad;
366 DoInitialMatch(S, L, Good, Bad, SE);
367 if (!Good.empty()) {
368 const SCEV *Sum = SE.getAddExpr(Good);
369 if (!Sum->isZero())
370 BaseRegs.push_back(Sum);
371 HasBaseReg = true;
372 }
373 if (!Bad.empty()) {
374 const SCEV *Sum = SE.getAddExpr(Bad);
375 if (!Sum->isZero())
376 BaseRegs.push_back(Sum);
377 HasBaseReg = true;
378 }
379 canonicalize();
380}
381
382/// \brief Check whether or not this formula statisfies the canonical
383/// representation.
384/// \see Formula::BaseRegs.
385bool Formula::isCanonical() const {
386 if (ScaledReg)
387 return Scale != 1 || !BaseRegs.empty();
388 return BaseRegs.size() <= 1;
389}
390
391/// \brief Helper method to morph a formula into its canonical representation.
392/// \see Formula::BaseRegs.
393/// Every formula having more than one base register, must use the ScaledReg
394/// field. Otherwise, we would have to do special cases everywhere in LSR
395/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
396/// On the other hand, 1*reg should be canonicalized into reg.
397void Formula::canonicalize() {
398 if (isCanonical())
399 return;
400 // So far we did not need this case. This is easy to implement but it is
401 // useless to maintain dead code. Beside it could hurt compile time.
402 assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.")((!BaseRegs.empty() && "1*reg => reg, should not be needed."
) ? static_cast<void> (0) : __assert_fail ("!BaseRegs.empty() && \"1*reg => reg, should not be needed.\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 402, __PRETTY_FUNCTION__))
;
403 // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
404 ScaledReg = BaseRegs.back();
405 BaseRegs.pop_back();
406 Scale = 1;
407 size_t BaseRegsSize = BaseRegs.size();
408 size_t Try = 0;
409 // If ScaledReg is an invariant, try to find a variant expression.
410 while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
411 std::swap(ScaledReg, BaseRegs[Try++]);
412}
413
414/// \brief Get rid of the scale in the formula.
415/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
416/// \return true if it was possible to get rid of the scale, false otherwise.
417/// \note After this operation the formula may not be in the canonical form.
418bool Formula::unscale() {
419 if (Scale != 1)
420 return false;
421 Scale = 0;
422 BaseRegs.push_back(ScaledReg);
423 ScaledReg = nullptr;
424 return true;
425}
426
427/// Return the total number of register operands used by this formula. This does
428/// not include register uses implied by non-constant addrec strides.
429size_t Formula::getNumRegs() const {
430 return !!ScaledReg + BaseRegs.size();
431}
432
433/// Return the type of this formula, if it has one, or null otherwise. This type
434/// is meaningless except for the bit size.
435Type *Formula::getType() const {
436 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
437 ScaledReg ? ScaledReg->getType() :
438 BaseGV ? BaseGV->getType() :
439 nullptr;
440}
441
442/// Delete the given base reg from the BaseRegs list.
443void Formula::deleteBaseReg(const SCEV *&S) {
444 if (&S != &BaseRegs.back())
445 std::swap(S, BaseRegs.back());
446 BaseRegs.pop_back();
447}
448
449/// Test if this formula references the given register.
450bool Formula::referencesReg(const SCEV *S) const {
451 return S == ScaledReg || is_contained(BaseRegs, S);
452}
453
454/// Test whether this formula uses registers which are used by uses other than
455/// the use with the given index.
456bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
457 const RegUseTracker &RegUses) const {
458 if (ScaledReg)
459 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
460 return true;
461 for (const SCEV *BaseReg : BaseRegs)
462 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
463 return true;
464 return false;
465}
466
467void Formula::print(raw_ostream &OS) const {
468 bool First = true;
469 if (BaseGV) {
470 if (!First) OS << " + "; else First = false;
471 BaseGV->printAsOperand(OS, /*PrintType=*/false);
472 }
473 if (BaseOffset != 0) {
474 if (!First) OS << " + "; else First = false;
475 OS << BaseOffset;
476 }
477 for (const SCEV *BaseReg : BaseRegs) {
478 if (!First) OS << " + "; else First = false;
479 OS << "reg(" << *BaseReg << ')';
480 }
481 if (HasBaseReg && BaseRegs.empty()) {
482 if (!First) OS << " + "; else First = false;
483 OS << "**error: HasBaseReg**";
484 } else if (!HasBaseReg && !BaseRegs.empty()) {
485 if (!First) OS << " + "; else First = false;
486 OS << "**error: !HasBaseReg**";
487 }
488 if (Scale != 0) {
489 if (!First) OS << " + "; else First = false;
490 OS << Scale << "*reg(";
491 if (ScaledReg)
492 OS << *ScaledReg;
493 else
494 OS << "<unknown>";
495 OS << ')';
496 }
497 if (UnfoldedOffset != 0) {
498 if (!First) OS << " + ";
499 OS << "imm(" << UnfoldedOffset << ')';
500 }
501}
502
503LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__))
504void Formula::dump() const {
505 print(errs()); errs() << '\n';
506}
507
508/// Return true if the given addrec can be sign-extended without changing its
509/// value.
510static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
511 Type *WideTy =
512 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
513 return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
514}
515
516/// Return true if the given add can be sign-extended without changing its
517/// value.
518static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
519 Type *WideTy =
520 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
521 return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
522}
523
524/// Return true if the given mul can be sign-extended without changing its
525/// value.
526static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
527 Type *WideTy =
528 IntegerType::get(SE.getContext(),
529 SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
530 return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
531}
532
533/// Return an expression for LHS /s RHS, if it can be determined and if the
534/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
535/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
536/// the multiplication may overflow, which is useful when the result will be
537/// used in a context where the most significant bits are ignored.
538static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
539 ScalarEvolution &SE,
540 bool IgnoreSignificantBits = false) {
541 // Handle the trivial case, which works for any SCEV type.
542 if (LHS == RHS)
543 return SE.getConstant(LHS->getType(), 1);
544
545 // Handle a few RHS special cases.
546 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
547 if (RC) {
548 const APInt &RA = RC->getAPInt();
549 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
550 // some folding.
551 if (RA.isAllOnesValue())
552 return SE.getMulExpr(LHS, RC);
553 // Handle x /s 1 as x.
554 if (RA == 1)
555 return LHS;
556 }
557
558 // Check for a division of a constant by a constant.
559 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
560 if (!RC)
561 return nullptr;
562 const APInt &LA = C->getAPInt();
563 const APInt &RA = RC->getAPInt();
564 if (LA.srem(RA) != 0)
565 return nullptr;
566 return SE.getConstant(LA.sdiv(RA));
567 }
568
569 // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
570 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
571 if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
572 const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
573 IgnoreSignificantBits);
574 if (!Step) return nullptr;
575 const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
576 IgnoreSignificantBits);
577 if (!Start) return nullptr;
578 // FlagNW is independent of the start value, step direction, and is
579 // preserved with smaller magnitude steps.
580 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
581 return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
582 }
583 return nullptr;
584 }
585
586 // Distribute the sdiv over add operands, if the add doesn't overflow.
587 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
588 if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
589 SmallVector<const SCEV *, 8> Ops;
590 for (const SCEV *S : Add->operands()) {
591 const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
592 if (!Op) return nullptr;
593 Ops.push_back(Op);
594 }
595 return SE.getAddExpr(Ops);
596 }
597 return nullptr;
598 }
599
600 // Check for a multiply operand that we can pull RHS out of.
601 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
602 if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
603 SmallVector<const SCEV *, 4> Ops;
604 bool Found = false;
605 for (const SCEV *S : Mul->operands()) {
606 if (!Found)
607 if (const SCEV *Q = getExactSDiv(S, RHS, SE,
608 IgnoreSignificantBits)) {
609 S = Q;
610 Found = true;
611 }
612 Ops.push_back(S);
613 }
614 return Found ? SE.getMulExpr(Ops) : nullptr;
615 }
616 return nullptr;
617 }
618
619 // Otherwise we don't know.
620 return nullptr;
621}
622
623/// If S involves the addition of a constant integer value, return that integer
624/// value, and mutate S to point to a new SCEV with that value excluded.
625static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
626 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
627 if (C->getAPInt().getMinSignedBits() <= 64) {
628 S = SE.getConstant(C->getType(), 0);
629 return C->getValue()->getSExtValue();
630 }
631 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
632 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
633 int64_t Result = ExtractImmediate(NewOps.front(), SE);
634 if (Result != 0)
635 S = SE.getAddExpr(NewOps);
636 return Result;
637 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
638 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
639 int64_t Result = ExtractImmediate(NewOps.front(), SE);
640 if (Result != 0)
641 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
642 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
643 SCEV::FlagAnyWrap);
644 return Result;
645 }
646 return 0;
647}
648
649/// If S involves the addition of a GlobalValue address, return that symbol, and
650/// mutate S to point to a new SCEV with that value excluded.
651static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
652 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
653 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
654 S = SE.getConstant(GV->getType(), 0);
655 return GV;
656 }
657 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
658 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
659 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
660 if (Result)
661 S = SE.getAddExpr(NewOps);
662 return Result;
663 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
664 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
665 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
666 if (Result)
667 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
668 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
669 SCEV::FlagAnyWrap);
670 return Result;
671 }
672 return nullptr;
673}
674
675/// Returns true if the specified instruction is using the specified value as an
676/// address.
677static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
678 bool isAddress = isa<LoadInst>(Inst);
679 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
680 if (SI->getOperand(1) == OperandVal)
681 isAddress = true;
682 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
683 // Addressing modes can also be folded into prefetches and a variety
684 // of intrinsics.
685 switch (II->getIntrinsicID()) {
686 default: break;
687 case Intrinsic::prefetch:
688 if (II->getArgOperand(0) == OperandVal)
689 isAddress = true;
690 break;
691 }
692 }
693 return isAddress;
694}
695
696/// Return the type of the memory being accessed.
697static MemAccessTy getAccessType(const Instruction *Inst) {
698 MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
699 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
700 AccessTy.MemTy = SI->getOperand(0)->getType();
701 AccessTy.AddrSpace = SI->getPointerAddressSpace();
702 } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
703 AccessTy.AddrSpace = LI->getPointerAddressSpace();
704 }
705
706 // All pointers have the same requirements, so canonicalize them to an
707 // arbitrary pointer type to minimize variation.
708 if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
709 AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
710 PTy->getAddressSpace());
711
712 return AccessTy;
713}
714
715/// Return true if this AddRec is already a phi in its loop.
716static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
717 for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
718 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
719 if (SE.isSCEVable(PN->getType()) &&
720 (SE.getEffectiveSCEVType(PN->getType()) ==
721 SE.getEffectiveSCEVType(AR->getType())) &&
722 SE.getSCEV(PN) == AR)
723 return true;
724 }
725 return false;
726}
727
728/// Check if expanding this expression is likely to incur significant cost. This
729/// is tricky because SCEV doesn't track which expressions are actually computed
730/// by the current IR.
731///
732/// We currently allow expansion of IV increments that involve adds,
733/// multiplication by constants, and AddRecs from existing phis.
734///
735/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
736/// obvious multiple of the UDivExpr.
737static bool isHighCostExpansion(const SCEV *S,
738 SmallPtrSetImpl<const SCEV*> &Processed,
739 ScalarEvolution &SE) {
740 // Zero/One operand expressions
741 switch (S->getSCEVType()) {
742 case scUnknown:
743 case scConstant:
744 return false;
745 case scTruncate:
746 return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
747 Processed, SE);
748 case scZeroExtend:
749 return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
750 Processed, SE);
751 case scSignExtend:
752 return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
753 Processed, SE);
754 }
755
756 if (!Processed.insert(S).second)
757 return false;
758
759 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
760 for (const SCEV *S : Add->operands()) {
761 if (isHighCostExpansion(S, Processed, SE))
762 return true;
763 }
764 return false;
765 }
766
767 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
768 if (Mul->getNumOperands() == 2) {
769 // Multiplication by a constant is ok
770 if (isa<SCEVConstant>(Mul->getOperand(0)))
771 return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
772
773 // If we have the value of one operand, check if an existing
774 // multiplication already generates this expression.
775 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
776 Value *UVal = U->getValue();
777 for (User *UR : UVal->users()) {
778 // If U is a constant, it may be used by a ConstantExpr.
779 Instruction *UI = dyn_cast<Instruction>(UR);
780 if (UI && UI->getOpcode() == Instruction::Mul &&
781 SE.isSCEVable(UI->getType())) {
782 return SE.getSCEV(UI) == Mul;
783 }
784 }
785 }
786 }
787 }
788
789 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
790 if (isExistingPhi(AR, SE))
791 return false;
792 }
793
794 // Fow now, consider any other type of expression (div/mul/min/max) high cost.
795 return true;
796}
797
798/// If any of the instructions is the specified set are trivially dead, delete
799/// them and see if this makes any of their operands subsequently dead.
800static bool
801DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
802 bool Changed = false;
803
804 while (!DeadInsts.empty()) {
805 Value *V = DeadInsts.pop_back_val();
806 Instruction *I = dyn_cast_or_null<Instruction>(V);
807
808 if (!I || !isInstructionTriviallyDead(I))
809 continue;
810
811 for (Use &O : I->operands())
812 if (Instruction *U = dyn_cast<Instruction>(O)) {
813 O = nullptr;
814 if (U->use_empty())
815 DeadInsts.emplace_back(U);
816 }
817
818 I->eraseFromParent();
819 Changed = true;
820 }
821
822 return Changed;
823}
824
825namespace {
826class LSRUse;
827}
828
829/// \brief Check if the addressing mode defined by \p F is completely
830/// folded in \p LU at isel time.
831/// This includes address-mode folding and special icmp tricks.
832/// This function returns true if \p LU can accommodate what \p F
833/// defines and up to 1 base + 1 scaled + offset.
834/// In other words, if \p F has several base registers, this function may
835/// still return true. Therefore, users still need to account for
836/// additional base registers and/or unfolded offsets to derive an
837/// accurate cost model.
838static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
839 const LSRUse &LU, const Formula &F);
840// Get the cost of the scaling factor used in F for LU.
841static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
842 const LSRUse &LU, const Formula &F);
843
844namespace {
845
846/// This class is used to measure and compare candidate formulae.
847class Cost {
848 /// TODO: Some of these could be merged. Also, a lexical ordering
849 /// isn't always optimal.
850 unsigned NumRegs;
851 unsigned AddRecCost;
852 unsigned NumIVMuls;
853 unsigned NumBaseAdds;
854 unsigned ImmCost;
855 unsigned SetupCost;
856 unsigned ScaleCost;
857
858public:
859 Cost()
860 : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
861 SetupCost(0), ScaleCost(0) {}
862
863 bool operator<(const Cost &Other) const;
864
865 void Lose();
866
867#ifndef NDEBUG
868 // Once any of the metrics loses, they must all remain losers.
869 bool isValid() {
870 return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
871 | ImmCost | SetupCost | ScaleCost) != ~0u)
872 || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
873 & ImmCost & SetupCost & ScaleCost) == ~0u);
874 }
875#endif
876
877 bool isLoser() {
878 assert(isValid() && "invalid cost")((isValid() && "invalid cost") ? static_cast<void>
(0) : __assert_fail ("isValid() && \"invalid cost\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 878, __PRETTY_FUNCTION__))
;
879 return NumRegs == ~0u;
880 }
881
882 void RateFormula(const TargetTransformInfo &TTI,
883 const Formula &F,
884 SmallPtrSetImpl<const SCEV *> &Regs,
885 const DenseSet<const SCEV *> &VisitedRegs,
886 const Loop *L,
887 ScalarEvolution &SE, DominatorTree &DT,
888 const LSRUse &LU,
889 SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
890
891 void print(raw_ostream &OS) const;
892 void dump() const;
893
894private:
895 void RateRegister(const SCEV *Reg,
896 SmallPtrSetImpl<const SCEV *> &Regs,
897 const Loop *L,
898 ScalarEvolution &SE, DominatorTree &DT);
899 void RatePrimaryRegister(const SCEV *Reg,
900 SmallPtrSetImpl<const SCEV *> &Regs,
901 const Loop *L,
902 ScalarEvolution &SE, DominatorTree &DT,
903 SmallPtrSetImpl<const SCEV *> *LoserRegs);
904};
905
906/// An operand value in an instruction which is to be replaced with some
907/// equivalent, possibly strength-reduced, replacement.
908struct LSRFixup {
909 /// The instruction which will be updated.
910 Instruction *UserInst;
911
912 /// The operand of the instruction which will be replaced. The operand may be
913 /// used more than once; every instance will be replaced.
914 Value *OperandValToReplace;
915
916 /// If this user is to use the post-incremented value of an induction
917 /// variable, this variable is non-null and holds the loop associated with the
918 /// induction variable.
919 PostIncLoopSet PostIncLoops;
920
921 /// A constant offset to be added to the LSRUse expression. This allows
922 /// multiple fixups to share the same LSRUse with different offsets, for
923 /// example in an unrolled loop.
924 int64_t Offset;
925
926 bool isUseFullyOutsideLoop(const Loop *L) const;
927
928 LSRFixup();
929
930 void print(raw_ostream &OS) const;
931 void dump() const;
932};
933
934
935/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
936/// SmallVectors of const SCEV*.
937struct UniquifierDenseMapInfo {
938 static SmallVector<const SCEV *, 4> getEmptyKey() {
939 SmallVector<const SCEV *, 4> V;
940 V.push_back(reinterpret_cast<const SCEV *>(-1));
941 return V;
942 }
943
944 static SmallVector<const SCEV *, 4> getTombstoneKey() {
945 SmallVector<const SCEV *, 4> V;
946 V.push_back(reinterpret_cast<const SCEV *>(-2));
947 return V;
948 }
949
950 static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
951 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
952 }
953
954 static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
955 const SmallVector<const SCEV *, 4> &RHS) {
956 return LHS == RHS;
957 }
958};
959
960/// This class holds the state that LSR keeps for each use in IVUsers, as well
961/// as uses invented by LSR itself. It includes information about what kinds of
962/// things can be folded into the user, information about the user itself, and
963/// information about how the use may be satisfied. TODO: Represent multiple
964/// users of the same expression in common?
965class LSRUse {
966 DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
967
968public:
969 /// An enum for a kind of use, indicating what types of scaled and immediate
970 /// operands it might support.
971 enum KindType {
972 Basic, ///< A normal use, with no folding.
973 Special, ///< A special case of basic, allowing -1 scales.
974 Address, ///< An address use; folding according to TargetLowering
975 ICmpZero ///< An equality icmp with both operands folded into one.
976 // TODO: Add a generic icmp too?
977 };
978
979 typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
980
981 KindType Kind;
982 MemAccessTy AccessTy;
983
984 /// The list of operands which are to be replaced.
985 SmallVector<LSRFixup, 8> Fixups;
986
987 /// Keep track of the min and max offsets of the fixups.
988 int64_t MinOffset;
989 int64_t MaxOffset;
990
991 /// This records whether all of the fixups using this LSRUse are outside of
992 /// the loop, in which case some special-case heuristics may be used.
993 bool AllFixupsOutsideLoop;
994
995 /// RigidFormula is set to true to guarantee that this use will be associated
996 /// with a single formula--the one that initially matched. Some SCEV
997 /// expressions cannot be expanded. This allows LSR to consider the registers
998 /// used by those expressions without the need to expand them later after
999 /// changing the formula.
1000 bool RigidFormula;
1001
1002 /// This records the widest use type for any fixup using this
1003 /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1004 /// fixup widths to be equivalent, because the narrower one may be relying on
1005 /// the implicit truncation to truncate away bogus bits.
1006 Type *WidestFixupType;
1007
1008 /// A list of ways to build a value that can satisfy this user. After the
1009 /// list is populated, one of these is selected heuristically and used to
1010 /// formulate a replacement for OperandValToReplace in UserInst.
1011 SmallVector<Formula, 12> Formulae;
1012
1013 /// The set of register candidates used by all formulae in this LSRUse.
1014 SmallPtrSet<const SCEV *, 4> Regs;
1015
1016 LSRUse(KindType K, MemAccessTy AT)
1017 : Kind(K), AccessTy(AT), MinOffset(INT64_MAX(9223372036854775807L)), MaxOffset(INT64_MIN(-9223372036854775807L -1)),
1018 AllFixupsOutsideLoop(true), RigidFormula(false),
1019 WidestFixupType(nullptr) {}
1020
1021 LSRFixup &getNewFixup() {
1022 Fixups.push_back(LSRFixup());
1023 return Fixups.back();
1024 }
1025
1026 void pushFixup(LSRFixup &f) {
1027 Fixups.push_back(f);
1028 if (f.Offset > MaxOffset)
1029 MaxOffset = f.Offset;
1030 if (f.Offset < MinOffset)
1031 MinOffset = f.Offset;
1032 }
1033
1034 bool HasFormulaWithSameRegs(const Formula &F) const;
1035 bool InsertFormula(const Formula &F);
1036 void DeleteFormula(Formula &F);
1037 void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1038
1039 void print(raw_ostream &OS) const;
1040 void dump() const;
1041};
1042
1043}
1044
1045/// Tally up interesting quantities from the given register.
1046void Cost::RateRegister(const SCEV *Reg,
1047 SmallPtrSetImpl<const SCEV *> &Regs,
1048 const Loop *L,
1049 ScalarEvolution &SE, DominatorTree &DT) {
1050 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1051 // If this is an addrec for another loop, don't second-guess its addrec phi
1052 // nodes. LSR isn't currently smart enough to reason about more than one
1053 // loop at a time. LSR has already run on inner loops, will not run on outer
1054 // loops, and cannot be expected to change sibling loops.
1055 if (AR->getLoop() != L) {
1056 // If the AddRec exists, consider it's register free and leave it alone.
1057 if (isExistingPhi(AR, SE))
1058 return;
1059
1060 // Otherwise, do not consider this formula at all.
1061 Lose();
1062 return;
1063 }
1064 AddRecCost += 1; /// TODO: This should be a function of the stride.
1065
1066 // Add the step value register, if it needs one.
1067 // TODO: The non-affine case isn't precisely modeled here.
1068 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1069 if (!Regs.count(AR->getOperand(1))) {
1070 RateRegister(AR->getOperand(1), Regs, L, SE, DT);
1071 if (isLoser())
1072 return;
1073 }
1074 }
1075 }
1076 ++NumRegs;
1077
1078 // Rough heuristic; favor registers which don't require extra setup
1079 // instructions in the preheader.
1080 if (!isa<SCEVUnknown>(Reg) &&
1081 !isa<SCEVConstant>(Reg) &&
1082 !(isa<SCEVAddRecExpr>(Reg) &&
1083 (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
1084 isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
1085 ++SetupCost;
1086
1087 NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1088 SE.hasComputableLoopEvolution(Reg, L);
1089}
1090
1091/// Record this register in the set. If we haven't seen it before, rate
1092/// it. Optional LoserRegs provides a way to declare any formula that refers to
1093/// one of those regs an instant loser.
1094void Cost::RatePrimaryRegister(const SCEV *Reg,
1095 SmallPtrSetImpl<const SCEV *> &Regs,
1096 const Loop *L,
1097 ScalarEvolution &SE, DominatorTree &DT,
1098 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1099 if (LoserRegs && LoserRegs->count(Reg)) {
1100 Lose();
1101 return;
1102 }
1103 if (Regs.insert(Reg).second) {
1104 RateRegister(Reg, Regs, L, SE, DT);
1105 if (LoserRegs && isLoser())
1106 LoserRegs->insert(Reg);
1107 }
1108}
1109
1110void Cost::RateFormula(const TargetTransformInfo &TTI,
1111 const Formula &F,
1112 SmallPtrSetImpl<const SCEV *> &Regs,
1113 const DenseSet<const SCEV *> &VisitedRegs,
1114 const Loop *L,
1115 ScalarEvolution &SE, DominatorTree &DT,
1116 const LSRUse &LU,
1117 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1118 assert(F.isCanonical() && "Cost is accurate only for canonical formula")((F.isCanonical() && "Cost is accurate only for canonical formula"
) ? static_cast<void> (0) : __assert_fail ("F.isCanonical() && \"Cost is accurate only for canonical formula\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1118, __PRETTY_FUNCTION__))
;
1119 // Tally up the registers.
1120 if (const SCEV *ScaledReg = F.ScaledReg) {
1121 if (VisitedRegs.count(ScaledReg)) {
1122 Lose();
1123 return;
1124 }
1125 RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
1126 if (isLoser())
1127 return;
1128 }
1129 for (const SCEV *BaseReg : F.BaseRegs) {
1130 if (VisitedRegs.count(BaseReg)) {
1131 Lose();
1132 return;
1133 }
1134 RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
1135 if (isLoser())
1136 return;
1137 }
1138
1139 // Determine how many (unfolded) adds we'll need inside the loop.
1140 size_t NumBaseParts = F.getNumRegs();
1141 if (NumBaseParts > 1)
1142 // Do not count the base and a possible second register if the target
1143 // allows to fold 2 registers.
1144 NumBaseAdds +=
1145 NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1146 NumBaseAdds += (F.UnfoldedOffset != 0);
1147
1148 // Accumulate non-free scaling amounts.
1149 ScaleCost += getScalingFactorCost(TTI, LU, F);
1150
1151 // Tally up the non-zero immediates.
1152 for (const LSRFixup &Fixup : LU.Fixups) {
1153 int64_t O = Fixup.Offset;
1154 int64_t Offset = (uint64_t)O + F.BaseOffset;
1155 if (F.BaseGV)
1156 ImmCost += 64; // Handle symbolic values conservatively.
1157 // TODO: This should probably be the pointer size.
1158 else if (Offset != 0)
1159 ImmCost += APInt(64, Offset, true).getMinSignedBits();
1160
1161 // Check with target if this offset with this instruction is
1162 // specifically not supported.
1163 if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
1164 !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
1165 NumBaseAdds++;
1166 }
1167 assert(isValid() && "invalid cost")((isValid() && "invalid cost") ? static_cast<void>
(0) : __assert_fail ("isValid() && \"invalid cost\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1167, __PRETTY_FUNCTION__))
;
1168}
1169
1170/// Set this cost to a losing value.
1171void Cost::Lose() {
1172 NumRegs = ~0u;
1173 AddRecCost = ~0u;
1174 NumIVMuls = ~0u;
1175 NumBaseAdds = ~0u;
1176 ImmCost = ~0u;
1177 SetupCost = ~0u;
1178 ScaleCost = ~0u;
1179}
1180
1181/// Choose the lower cost.
1182bool Cost::operator<(const Cost &Other) const {
1183 return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
1184 ImmCost, SetupCost) <
1185 std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
1186 Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
1187 Other.SetupCost);
1188}
1189
1190void Cost::print(raw_ostream &OS) const {
1191 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1192 if (AddRecCost != 0)
1193 OS << ", with addrec cost " << AddRecCost;
1194 if (NumIVMuls != 0)
1195 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1196 if (NumBaseAdds != 0)
1197 OS << ", plus " << NumBaseAdds << " base add"
1198 << (NumBaseAdds == 1 ? "" : "s");
1199 if (ScaleCost != 0)
1200 OS << ", plus " << ScaleCost << " scale cost";
1201 if (ImmCost != 0)
1202 OS << ", plus " << ImmCost << " imm cost";
1203 if (SetupCost != 0)
1204 OS << ", plus " << SetupCost << " setup cost";
1205}
1206
1207LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__))
1208void Cost::dump() const {
1209 print(errs()); errs() << '\n';
1210}
1211
1212LSRFixup::LSRFixup()
1213 : UserInst(nullptr), OperandValToReplace(nullptr),
1214 Offset(0) {}
1215
1216/// Test whether this fixup always uses its value outside of the given loop.
1217bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1218 // PHI nodes use their value in their incoming blocks.
1219 if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1220 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1221 if (PN->getIncomingValue(i) == OperandValToReplace &&
1222 L->contains(PN->getIncomingBlock(i)))
1223 return false;
1224 return true;
1225 }
1226
1227 return !L->contains(UserInst);
1228}
1229
1230void LSRFixup::print(raw_ostream &OS) const {
1231 OS << "UserInst=";
1232 // Store is common and interesting enough to be worth special-casing.
1233 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1234 OS << "store ";
1235 Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
1236 } else if (UserInst->getType()->isVoidTy())
1237 OS << UserInst->getOpcodeName();
1238 else
1239 UserInst->printAsOperand(OS, /*PrintType=*/false);
1240
1241 OS << ", OperandValToReplace=";
1242 OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
1243
1244 for (const Loop *PIL : PostIncLoops) {
1245 OS << ", PostIncLoop=";
1246 PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
1247 }
1248
1249 if (Offset != 0)
1250 OS << ", Offset=" << Offset;
1251}
1252
1253LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__))
1254void LSRFixup::dump() const {
1255 print(errs()); errs() << '\n';
1256}
1257
1258/// Test whether this use as a formula which has the same registers as the given
1259/// formula.
1260bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1261 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1262 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1263 // Unstable sort by host order ok, because this is only used for uniquifying.
1264 std::sort(Key.begin(), Key.end());
1265 return Uniquifier.count(Key);
1266}
1267
1268/// If the given formula has not yet been inserted, add it to the list, and
1269/// return true. Return false otherwise. The formula must be in canonical form.
1270bool LSRUse::InsertFormula(const Formula &F) {
1271 assert(F.isCanonical() && "Invalid canonical representation")((F.isCanonical() && "Invalid canonical representation"
) ? static_cast<void> (0) : __assert_fail ("F.isCanonical() && \"Invalid canonical representation\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1271, __PRETTY_FUNCTION__))
;
1272
1273 if (!Formulae.empty() && RigidFormula)
1274 return false;
1275
1276 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1277 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1278 // Unstable sort by host order ok, because this is only used for uniquifying.
1279 std::sort(Key.begin(), Key.end());
1280
1281 if (!Uniquifier.insert(Key).second)
1282 return false;
1283
1284 // Using a register to hold the value of 0 is not profitable.
1285 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&(((!F.ScaledReg || !F.ScaledReg->isZero()) && "Zero allocated in a scaled register!"
) ? static_cast<void> (0) : __assert_fail ("(!F.ScaledReg || !F.ScaledReg->isZero()) && \"Zero allocated in a scaled register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1286, __PRETTY_FUNCTION__))
1286 "Zero allocated in a scaled register!")(((!F.ScaledReg || !F.ScaledReg->isZero()) && "Zero allocated in a scaled register!"
) ? static_cast<void> (0) : __assert_fail ("(!F.ScaledReg || !F.ScaledReg->isZero()) && \"Zero allocated in a scaled register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1286, __PRETTY_FUNCTION__))
;
1287#ifndef NDEBUG
1288 for (const SCEV *BaseReg : F.BaseRegs)
1289 assert(!BaseReg->isZero() && "Zero allocated in a base register!")((!BaseReg->isZero() && "Zero allocated in a base register!"
) ? static_cast<void> (0) : __assert_fail ("!BaseReg->isZero() && \"Zero allocated in a base register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1289, __PRETTY_FUNCTION__))
;
1290#endif
1291
1292 // Add the formula to the list.
1293 Formulae.push_back(F);
1294
1295 // Record registers now being used by this use.
1296 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1297 if (F.ScaledReg)
1298 Regs.insert(F.ScaledReg);
1299
1300 return true;
1301}
1302
1303/// Remove the given formula from this use's list.
1304void LSRUse::DeleteFormula(Formula &F) {
1305 if (&F != &Formulae.back())
1306 std::swap(F, Formulae.back());
1307 Formulae.pop_back();
1308}
1309
1310/// Recompute the Regs field, and update RegUses.
1311void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1312 // Now that we've filtered out some formulae, recompute the Regs set.
1313 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1314 Regs.clear();
1315 for (const Formula &F : Formulae) {
1316 if (F.ScaledReg) Regs.insert(F.ScaledReg);
1317 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1318 }
1319
1320 // Update the RegTracker.
1321 for (const SCEV *S : OldRegs)
1322 if (!Regs.count(S))
1323 RegUses.dropRegister(S, LUIdx);
1324}
1325
1326void LSRUse::print(raw_ostream &OS) const {
1327 OS << "LSR Use: Kind=";
1328 switch (Kind) {
1329 case Basic: OS << "Basic"; break;
1330 case Special: OS << "Special"; break;
1331 case ICmpZero: OS << "ICmpZero"; break;
1332 case Address:
1333 OS << "Address of ";
1334 if (AccessTy.MemTy->isPointerTy())
1335 OS << "pointer"; // the full pointer type could be really verbose
1336 else {
1337 OS << *AccessTy.MemTy;
1338 }
1339
1340 OS << " in addrspace(" << AccessTy.AddrSpace << ')';
1341 }
1342
1343 OS << ", Offsets={";
1344 bool NeedComma = false;
1345 for (const LSRFixup &Fixup : Fixups) {
1346 if (NeedComma) OS << ',';
1347 OS << Fixup.Offset;
1348 NeedComma = true;
1349 }
1350 OS << '}';
1351
1352 if (AllFixupsOutsideLoop)
1353 OS << ", all-fixups-outside-loop";
1354
1355 if (WidestFixupType)
1356 OS << ", widest fixup type: " << *WidestFixupType;
1357}
1358
1359LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__))
1360void LSRUse::dump() const {
1361 print(errs()); errs() << '\n';
1362}
1363
1364static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1365 LSRUse::KindType Kind, MemAccessTy AccessTy,
1366 GlobalValue *BaseGV, int64_t BaseOffset,
1367 bool HasBaseReg, int64_t Scale) {
1368 switch (Kind) {
1369 case LSRUse::Address:
1370 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
1371 HasBaseReg, Scale, AccessTy.AddrSpace);
1372
1373 case LSRUse::ICmpZero:
1374 // There's not even a target hook for querying whether it would be legal to
1375 // fold a GV into an ICmp.
1376 if (BaseGV)
1377 return false;
1378
1379 // ICmp only has two operands; don't allow more than two non-trivial parts.
1380 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1381 return false;
1382
1383 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1384 // putting the scaled register in the other operand of the icmp.
1385 if (Scale != 0 && Scale != -1)
1386 return false;
1387
1388 // If we have low-level target information, ask the target if it can fold an
1389 // integer immediate on an icmp.
1390 if (BaseOffset != 0) {
1391 // We have one of:
1392 // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1393 // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1394 // Offs is the ICmp immediate.
1395 if (Scale == 0)
1396 // The cast does the right thing with INT64_MIN.
1397 BaseOffset = -(uint64_t)BaseOffset;
1398 return TTI.isLegalICmpImmediate(BaseOffset);
1399 }
1400
1401 // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1402 return true;
1403
1404 case LSRUse::Basic:
1405 // Only handle single-register values.
1406 return !BaseGV && Scale == 0 && BaseOffset == 0;
1407
1408 case LSRUse::Special:
1409 // Special case Basic to handle -1 scales.
1410 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1411 }
1412
1413 llvm_unreachable("Invalid LSRUse Kind!")::llvm::llvm_unreachable_internal("Invalid LSRUse Kind!", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1413)
;
1414}
1415
1416static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1417 int64_t MinOffset, int64_t MaxOffset,
1418 LSRUse::KindType Kind, MemAccessTy AccessTy,
1419 GlobalValue *BaseGV, int64_t BaseOffset,
1420 bool HasBaseReg, int64_t Scale) {
1421 // Check for overflow.
1422 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1423 (MinOffset > 0))
1424 return false;
1425 MinOffset = (uint64_t)BaseOffset + MinOffset;
1426 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1427 (MaxOffset > 0))
1428 return false;
1429 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1430
1431 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
1432 HasBaseReg, Scale) &&
1433 isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
1434 HasBaseReg, Scale);
1435}
1436
1437static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1438 int64_t MinOffset, int64_t MaxOffset,
1439 LSRUse::KindType Kind, MemAccessTy AccessTy,
1440 const Formula &F) {
1441 // For the purpose of isAMCompletelyFolded either having a canonical formula
1442 // or a scale not equal to zero is correct.
1443 // Problems may arise from non canonical formulae having a scale == 0.
1444 // Strictly speaking it would best to just rely on canonical formulae.
1445 // However, when we generate the scaled formulae, we first check that the
1446 // scaling factor is profitable before computing the actual ScaledReg for
1447 // compile time sake.
1448 assert((F.isCanonical() || F.Scale != 0))(((F.isCanonical() || F.Scale != 0)) ? static_cast<void>
(0) : __assert_fail ("(F.isCanonical() || F.Scale != 0)", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1448, __PRETTY_FUNCTION__))
;
1449 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1450 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1451}
1452
1453/// Test whether we know how to expand the current formula.
1454static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1455 int64_t MaxOffset, LSRUse::KindType Kind,
1456 MemAccessTy AccessTy, GlobalValue *BaseGV,
1457 int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
1458 // We know how to expand completely foldable formulae.
1459 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1460 BaseOffset, HasBaseReg, Scale) ||
1461 // Or formulae that use a base register produced by a sum of base
1462 // registers.
1463 (Scale == 1 &&
1464 isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1465 BaseGV, BaseOffset, true, 0));
1466}
1467
1468static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1469 int64_t MaxOffset, LSRUse::KindType Kind,
1470 MemAccessTy AccessTy, const Formula &F) {
1471 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1472 F.BaseOffset, F.HasBaseReg, F.Scale);
1473}
1474
1475static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1476 const LSRUse &LU, const Formula &F) {
1477 return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1478 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1479 F.Scale);
1480}
1481
1482static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
1483 const LSRUse &LU, const Formula &F) {
1484 if (!F.Scale)
1485 return 0;
1486
1487 // If the use is not completely folded in that instruction, we will have to
1488 // pay an extra cost only for scale != 1.
1489 if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1490 LU.AccessTy, F))
1491 return F.Scale != 1;
1492
1493 switch (LU.Kind) {
1494 case LSRUse::Address: {
1495 // Check the scaling factor cost with both the min and max offsets.
1496 int ScaleCostMinOffset = TTI.getScalingFactorCost(
1497 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
1498 F.Scale, LU.AccessTy.AddrSpace);
1499 int ScaleCostMaxOffset = TTI.getScalingFactorCost(
1500 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
1501 F.Scale, LU.AccessTy.AddrSpace);
1502
1503 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&((ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >=
0 && "Legal addressing mode has an illegal cost!") ?
static_cast<void> (0) : __assert_fail ("ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && \"Legal addressing mode has an illegal cost!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1504, __PRETTY_FUNCTION__))
1504 "Legal addressing mode has an illegal cost!")((ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >=
0 && "Legal addressing mode has an illegal cost!") ?
static_cast<void> (0) : __assert_fail ("ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && \"Legal addressing mode has an illegal cost!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1504, __PRETTY_FUNCTION__))
;
1505 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1506 }
1507 case LSRUse::ICmpZero:
1508 case LSRUse::Basic:
1509 case LSRUse::Special:
1510 // The use is completely folded, i.e., everything is folded into the
1511 // instruction.
1512 return 0;
1513 }
1514
1515 llvm_unreachable("Invalid LSRUse Kind!")::llvm::llvm_unreachable_internal("Invalid LSRUse Kind!", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1515)
;
1516}
1517
1518static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1519 LSRUse::KindType Kind, MemAccessTy AccessTy,
1520 GlobalValue *BaseGV, int64_t BaseOffset,
1521 bool HasBaseReg) {
1522 // Fast-path: zero is always foldable.
1523 if (BaseOffset == 0 && !BaseGV) return true;
1524
1525 // Conservatively, create an address with an immediate and a
1526 // base and a scale.
1527 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1528
1529 // Canonicalize a scale of 1 to a base register if the formula doesn't
1530 // already have a base register.
1531 if (!HasBaseReg && Scale == 1) {
1532 Scale = 0;
1533 HasBaseReg = true;
1534 }
1535
1536 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
1537 HasBaseReg, Scale);
1538}
1539
1540static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1541 ScalarEvolution &SE, int64_t MinOffset,
1542 int64_t MaxOffset, LSRUse::KindType Kind,
1543 MemAccessTy AccessTy, const SCEV *S,
1544 bool HasBaseReg) {
1545 // Fast-path: zero is always foldable.
1546 if (S->isZero()) return true;
1547
1548 // Conservatively, create an address with an immediate and a
1549 // base and a scale.
1550 int64_t BaseOffset = ExtractImmediate(S, SE);
1551 GlobalValue *BaseGV = ExtractSymbol(S, SE);
1552
1553 // If there's anything else involved, it's not foldable.
1554 if (!S->isZero()) return false;
1555
1556 // Fast-path: zero is always foldable.
1557 if (BaseOffset == 0 && !BaseGV) return true;
1558
1559 // Conservatively, create an address with an immediate and a
1560 // base and a scale.
1561 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1562
1563 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1564 BaseOffset, HasBaseReg, Scale);
1565}
1566
1567namespace {
1568
1569/// An individual increment in a Chain of IV increments. Relate an IV user to
1570/// an expression that computes the IV it uses from the IV used by the previous
1571/// link in the Chain.
1572///
1573/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1574/// original IVOperand. The head of the chain's IVOperand is only valid during
1575/// chain collection, before LSR replaces IV users. During chain generation,
1576/// IncExpr can be used to find the new IVOperand that computes the same
1577/// expression.
1578struct IVInc {
1579 Instruction *UserInst;
1580 Value* IVOperand;
1581 const SCEV *IncExpr;
1582
1583 IVInc(Instruction *U, Value *O, const SCEV *E):
1584 UserInst(U), IVOperand(O), IncExpr(E) {}
1585};
1586
1587// The list of IV increments in program order. We typically add the head of a
1588// chain without finding subsequent links.
1589struct IVChain {
1590 SmallVector<IVInc,1> Incs;
1591 const SCEV *ExprBase;
1592
1593 IVChain() : ExprBase(nullptr) {}
1594
1595 IVChain(const IVInc &Head, const SCEV *Base)
1596 : Incs(1, Head), ExprBase(Base) {}
1597
1598 typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
1599
1600 // Return the first increment in the chain.
1601 const_iterator begin() const {
1602 assert(!Incs.empty())((!Incs.empty()) ? static_cast<void> (0) : __assert_fail
("!Incs.empty()", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1602, __PRETTY_FUNCTION__))
;
1603 return std::next(Incs.begin());
1604 }
1605 const_iterator end() const {
1606 return Incs.end();
1607 }
1608
1609 // Returns true if this chain contains any increments.
1610 bool hasIncs() const { return Incs.size() >= 2; }
1611
1612 // Add an IVInc to the end of this chain.
1613 void add(const IVInc &X) { Incs.push_back(X); }
1614
1615 // Returns the last UserInst in the chain.
1616 Instruction *tailUserInst() const { return Incs.back().UserInst; }
1617
1618 // Returns true if IncExpr can be profitably added to this chain.
1619 bool isProfitableIncrement(const SCEV *OperExpr,
1620 const SCEV *IncExpr,
1621 ScalarEvolution&);
1622};
1623
1624/// Helper for CollectChains to track multiple IV increment uses. Distinguish
1625/// between FarUsers that definitely cross IV increments and NearUsers that may
1626/// be used between IV increments.
1627struct ChainUsers {
1628 SmallPtrSet<Instruction*, 4> FarUsers;
1629 SmallPtrSet<Instruction*, 4> NearUsers;
1630};
1631
1632/// This class holds state for the main loop strength reduction logic.
1633class LSRInstance {
1634 IVUsers &IU;
1635 ScalarEvolution &SE;
1636 DominatorTree &DT;
1637 LoopInfo &LI;
1638 const TargetTransformInfo &TTI;
1639 Loop *const L;
1640 bool Changed;
1641
1642 /// This is the insert position that the current loop's induction variable
1643 /// increment should be placed. In simple loops, this is the latch block's
1644 /// terminator. But in more complicated cases, this is a position which will
1645 /// dominate all the in-loop post-increment users.
1646 Instruction *IVIncInsertPos;
1647
1648 /// Interesting factors between use strides.
1649 SmallSetVector<int64_t, 8> Factors;
1650
1651 /// Interesting use types, to facilitate truncation reuse.
1652 SmallSetVector<Type *, 4> Types;
1653
1654 /// The list of interesting uses.
1655 SmallVector<LSRUse, 16> Uses;
1656
1657 /// Track which uses use which register candidates.
1658 RegUseTracker RegUses;
1659
1660 // Limit the number of chains to avoid quadratic behavior. We don't expect to
1661 // have more than a few IV increment chains in a loop. Missing a Chain falls
1662 // back to normal LSR behavior for those uses.
1663 static const unsigned MaxChains = 8;
1664
1665 /// IV users can form a chain of IV increments.
1666 SmallVector<IVChain, MaxChains> IVChainVec;
1667
1668 /// IV users that belong to profitable IVChains.
1669 SmallPtrSet<Use*, MaxChains> IVIncSet;
1670
1671 void OptimizeShadowIV();
1672 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1673 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1674 void OptimizeLoopTermCond();
1675
1676 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1677 SmallVectorImpl<ChainUsers> &ChainUsersVec);
1678 void FinalizeChain(IVChain &Chain);
1679 void CollectChains();
1680 void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1681 SmallVectorImpl<WeakVH> &DeadInsts);
1682
1683 void CollectInterestingTypesAndFactors();
1684 void CollectFixupsAndInitialFormulae();
1685
1686 // Support for sharing of LSRUses between LSRFixups.
1687 typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
1688 UseMapTy UseMap;
1689
1690 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
1691 LSRUse::KindType Kind, MemAccessTy AccessTy);
1692
1693 std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
1694 MemAccessTy AccessTy);
1695
1696 void DeleteUse(LSRUse &LU, size_t LUIdx);
1697
1698 LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
1699
1700 void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1701 void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1702 void CountRegisters(const Formula &F, size_t LUIdx);
1703 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1704
1705 void CollectLoopInvariantFixupsAndFormulae();
1706
1707 void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1708 unsigned Depth = 0);
1709
1710 void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
1711 const Formula &Base, unsigned Depth,
1712 size_t Idx, bool IsScaledReg = false);
1713 void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
1714 void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1715 const Formula &Base, size_t Idx,
1716 bool IsScaledReg = false);
1717 void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1718 void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1719 const Formula &Base,
1720 const SmallVectorImpl<int64_t> &Worklist,
1721 size_t Idx, bool IsScaledReg = false);
1722 void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1723 void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1724 void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1725 void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1726 void GenerateCrossUseConstantOffsets();
1727 void GenerateAllReuseFormulae();
1728
1729 void FilterOutUndesirableDedicatedRegisters();
1730
1731 size_t EstimateSearchSpaceComplexity() const;
1732 void NarrowSearchSpaceByDetectingSupersets();
1733 void NarrowSearchSpaceByCollapsingUnrolledCode();
1734 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1735 void NarrowSearchSpaceByPickingWinnerRegs();
1736 void NarrowSearchSpaceUsingHeuristics();
1737
1738 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1739 Cost &SolutionCost,
1740 SmallVectorImpl<const Formula *> &Workspace,
1741 const Cost &CurCost,
1742 const SmallPtrSet<const SCEV *, 16> &CurRegs,
1743 DenseSet<const SCEV *> &VisitedRegs) const;
1744 void Solve(SmallVectorImpl<const Formula *> &Solution) const;
1745
1746 BasicBlock::iterator
1747 HoistInsertPosition(BasicBlock::iterator IP,
1748 const SmallVectorImpl<Instruction *> &Inputs) const;
1749 BasicBlock::iterator
1750 AdjustInsertPositionForExpand(BasicBlock::iterator IP,
1751 const LSRFixup &LF,
1752 const LSRUse &LU,
1753 SCEVExpander &Rewriter) const;
1754
1755 Value *Expand(const LSRUse &LU, const LSRFixup &LF,
1756 const Formula &F,
1757 BasicBlock::iterator IP,
1758 SCEVExpander &Rewriter,
1759 SmallVectorImpl<WeakVH> &DeadInsts) const;
1760 void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
1761 const Formula &F,
1762 SCEVExpander &Rewriter,
1763 SmallVectorImpl<WeakVH> &DeadInsts) const;
1764 void Rewrite(const LSRUse &LU, const LSRFixup &LF,
1765 const Formula &F,
1766 SCEVExpander &Rewriter,
1767 SmallVectorImpl<WeakVH> &DeadInsts) const;
1768 void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
1769
1770public:
1771 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
1772 LoopInfo &LI, const TargetTransformInfo &TTI);
1773
1774 bool getChanged() const { return Changed; }
1775
1776 void print_factors_and_types(raw_ostream &OS) const;
1777 void print_fixups(raw_ostream &OS) const;
1778 void print_uses(raw_ostream &OS) const;
1779 void print(raw_ostream &OS) const;
1780 void dump() const;
1781};
1782
1783}
1784
1785/// If IV is used in a int-to-float cast inside the loop then try to eliminate
1786/// the cast operation.
1787void LSRInstance::OptimizeShadowIV() {
1788 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1789 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1790 return;
1791
1792 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
1793 UI != E; /* empty */) {
1794 IVUsers::const_iterator CandidateUI = UI;
1795 ++UI;
1796 Instruction *ShadowUse = CandidateUI->getUser();
1797 Type *DestTy = nullptr;
1798 bool IsSigned = false;
1799
1800 /* If shadow use is a int->float cast then insert a second IV
1801 to eliminate this cast.
1802
1803 for (unsigned i = 0; i < n; ++i)
1804 foo((double)i);
1805
1806 is transformed into
1807
1808 double d = 0.0;
1809 for (unsigned i = 0; i < n; ++i, ++d)
1810 foo(d);
1811 */
1812 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1813 IsSigned = false;
1814 DestTy = UCast->getDestTy();
1815 }
1816 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1817 IsSigned = true;
1818 DestTy = SCast->getDestTy();
1819 }
1820 if (!DestTy) continue;
1821
1822 // If target does not support DestTy natively then do not apply
1823 // this transformation.
1824 if (!TTI.isTypeLegal(DestTy)) continue;
1825
1826 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1827 if (!PH) continue;
1828 if (PH->getNumIncomingValues() != 2) continue;
1829
1830 Type *SrcTy = PH->getType();
1831 int Mantissa = DestTy->getFPMantissaWidth();
1832 if (Mantissa == -1) continue;
1833 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
1834 continue;
1835
1836 unsigned Entry, Latch;
1837 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1838 Entry = 0;
1839 Latch = 1;
1840 } else {
1841 Entry = 1;
1842 Latch = 0;
1843 }
1844
1845 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1846 if (!Init) continue;
1847 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
1848 (double)Init->getSExtValue() :
1849 (double)Init->getZExtValue());
1850
1851 BinaryOperator *Incr =
1852 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1853 if (!Incr) continue;
1854 if (Incr->getOpcode() != Instruction::Add
1855 && Incr->getOpcode() != Instruction::Sub)
1856 continue;
1857
1858 /* Initialize new IV, double d = 0.0 in above example. */
1859 ConstantInt *C = nullptr;
1860 if (Incr->getOperand(0) == PH)
1861 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1862 else if (Incr->getOperand(1) == PH)
1863 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1864 else
1865 continue;
1866
1867 if (!C) continue;
1868
1869 // Ignore negative constants, as the code below doesn't handle them
1870 // correctly. TODO: Remove this restriction.
1871 if (!C->getValue().isStrictlyPositive()) continue;
1872
1873 /* Add new PHINode. */
1874 PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
1875
1876 /* create new increment. '++d' in above example. */
1877 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
1878 BinaryOperator *NewIncr =
1879 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1880 Instruction::FAdd : Instruction::FSub,
1881 NewPH, CFP, "IV.S.next.", Incr);
1882
1883 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1884 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1885
1886 /* Remove cast operation */
1887 ShadowUse->replaceAllUsesWith(NewPH);
1888 ShadowUse->eraseFromParent();
1889 Changed = true;
1890 break;
1891 }
1892}
1893
1894/// If Cond has an operand that is an expression of an IV, set the IV user and
1895/// stride information and return true, otherwise return false.
1896bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
1897 for (IVStrideUse &U : IU)
1898 if (U.getUser() == Cond) {
1899 // NOTE: we could handle setcc instructions with multiple uses here, but
1900 // InstCombine does it as well for simple uses, it's not clear that it
1901 // occurs enough in real life to handle.
1902 CondUse = &U;
1903 return true;
1904 }
1905 return false;
1906}
1907
1908/// Rewrite the loop's terminating condition if it uses a max computation.
1909///
1910/// This is a narrow solution to a specific, but acute, problem. For loops
1911/// like this:
1912///
1913/// i = 0;
1914/// do {
1915/// p[i] = 0.0;
1916/// } while (++i < n);
1917///
1918/// the trip count isn't just 'n', because 'n' might not be positive. And
1919/// unfortunately this can come up even for loops where the user didn't use
1920/// a C do-while loop. For example, seemingly well-behaved top-test loops
1921/// will commonly be lowered like this:
1922//
1923/// if (n > 0) {
1924/// i = 0;
1925/// do {
1926/// p[i] = 0.0;
1927/// } while (++i < n);
1928/// }
1929///
1930/// and then it's possible for subsequent optimization to obscure the if
1931/// test in such a way that indvars can't find it.
1932///
1933/// When indvars can't find the if test in loops like this, it creates a
1934/// max expression, which allows it to give the loop a canonical
1935/// induction variable:
1936///
1937/// i = 0;
1938/// max = n < 1 ? 1 : n;
1939/// do {
1940/// p[i] = 0.0;
1941/// } while (++i != max);
1942///
1943/// Canonical induction variables are necessary because the loop passes
1944/// are designed around them. The most obvious example of this is the
1945/// LoopInfo analysis, which doesn't remember trip count values. It
1946/// expects to be able to rediscover the trip count each time it is
1947/// needed, and it does this using a simple analysis that only succeeds if
1948/// the loop has a canonical induction variable.
1949///
1950/// However, when it comes time to generate code, the maximum operation
1951/// can be quite costly, especially if it's inside of an outer loop.
1952///
1953/// This function solves this problem by detecting this type of loop and
1954/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1955/// the instructions for the maximum computation.
1956///
1957ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1958 // Check that the loop matches the pattern we're looking for.
1959 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1960 Cond->getPredicate() != CmpInst::ICMP_NE)
1961 return Cond;
1962
1963 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1964 if (!Sel || !Sel->hasOneUse()) return Cond;
1965
1966 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1967 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1968 return Cond;
1969 const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
1970
1971 // Add one to the backedge-taken count to get the trip count.
1972 const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
1973 if (IterationCount != SE.getSCEV(Sel)) return Cond;
1974
1975 // Check for a max calculation that matches the pattern. There's no check
1976 // for ICMP_ULE here because the comparison would be with zero, which
1977 // isn't interesting.
1978 CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1979 const SCEVNAryExpr *Max = nullptr;
1980 if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
1981 Pred = ICmpInst::ICMP_SLE;
1982 Max = S;
1983 } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
1984 Pred = ICmpInst::ICMP_SLT;
1985 Max = S;
1986 } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
1987 Pred = ICmpInst::ICMP_ULT;
1988 Max = U;
1989 } else {
1990 // No match; bail.
1991 return Cond;
1992 }
1993
1994 // To handle a max with more than two operands, this optimization would
1995 // require additional checking and setup.
1996 if (Max->getNumOperands() != 2)
1997 return Cond;
1998
1999 const SCEV *MaxLHS = Max->getOperand(0);
2000 const SCEV *MaxRHS = Max->getOperand(1);
2001
2002 // ScalarEvolution canonicalizes constants to the left. For < and >, look
2003 // for a comparison with 1. For <= and >=, a comparison with zero.
2004 if (!MaxLHS ||
2005 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2006 return Cond;
2007
2008 // Check the relevant induction variable for conformance to
2009 // the pattern.
2010 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2011 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2012 if (!AR || !AR->isAffine() ||
2013 AR->getStart() != One ||
2014 AR->getStepRecurrence(SE) != One)
2015 return Cond;
2016
2017 assert(AR->getLoop() == L &&((AR->getLoop() == L && "Loop condition operand is an addrec in a different loop!"
) ? static_cast<void> (0) : __assert_fail ("AR->getLoop() == L && \"Loop condition operand is an addrec in a different loop!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2018, __PRETTY_FUNCTION__))
2018 "Loop condition operand is an addrec in a different loop!")((AR->getLoop() == L && "Loop condition operand is an addrec in a different loop!"
) ? static_cast<void> (0) : __assert_fail ("AR->getLoop() == L && \"Loop condition operand is an addrec in a different loop!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2018, __PRETTY_FUNCTION__))
;
2019
2020 // Check the right operand of the select, and remember it, as it will
2021 // be used in the new comparison instruction.
2022 Value *NewRHS = nullptr;
2023 if (ICmpInst::isTrueWhenEqual(Pred)) {
2024 // Look for n+1, and grab n.
2025 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2026 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2027 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2028 NewRHS = BO->getOperand(0);
2029 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2030 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2031 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2032 NewRHS = BO->getOperand(0);
2033 if (!NewRHS)
2034 return Cond;
2035 } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2036 NewRHS = Sel->getOperand(1);
2037 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2038 NewRHS = Sel->getOperand(2);
2039 else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2040 NewRHS = SU->getValue();
2041 else
2042 // Max doesn't match expected pattern.
2043 return Cond;
2044
2045 // Determine the new comparison opcode. It may be signed or unsigned,
2046 // and the original comparison may be either equality or inequality.
2047 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2048 Pred = CmpInst::getInversePredicate(Pred);
2049
2050 // Ok, everything looks ok to change the condition into an SLT or SGE and
2051 // delete the max calculation.
2052 ICmpInst *NewCond =
2053 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
2054
2055 // Delete the max calculation instructions.
2056 Cond->replaceAllUsesWith(NewCond);
2057 CondUse->setUser(NewCond);
2058 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2059 Cond->eraseFromParent();
2060 Sel->eraseFromParent();
2061 if (Cmp->use_empty())
2062 Cmp->eraseFromParent();
2063 return NewCond;
2064}
2065
2066/// Change loop terminating condition to use the postinc iv when possible.
2067void
2068LSRInstance::OptimizeLoopTermCond() {
2069 SmallPtrSet<Instruction *, 4> PostIncs;
2070
2071 // We need a different set of heuristics for rotated and non-rotated loops.
2072 // If a loop is rotated then the latch is also the backedge, so inserting
2073 // post-inc expressions just before the latch is ideal. To reduce live ranges
2074 // it also makes sense to rewrite terminating conditions to use post-inc
2075 // expressions.
2076 //
2077 // If the loop is not rotated then the latch is not a backedge; the latch
2078 // check is done in the loop head. Adding post-inc expressions before the
2079 // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2080 // in the loop body. In this case we do *not* want to use post-inc expressions
2081 // in the latch check, and we want to insert post-inc expressions before
2082 // the backedge.
2083 BasicBlock *LatchBlock = L->getLoopLatch();
2084 SmallVector<BasicBlock*, 8> ExitingBlocks;
2085 L->getExitingBlocks(ExitingBlocks);
2086 if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) {
2087 return LatchBlock != BB;
2088 })) {
2089 // The backedge doesn't exit the loop; treat this as a head-tested loop.
2090 IVIncInsertPos = LatchBlock->getTerminator();
2091 return;
2092 }
2093
2094 // Otherwise treat this as a rotated loop.
2095 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2096
2097 // Get the terminating condition for the loop if possible. If we
2098 // can, we want to change it to use a post-incremented version of its
2099 // induction variable, to allow coalescing the live ranges for the IV into
2100 // one register value.
2101
2102 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2103 if (!TermBr)
2104 continue;
2105 // FIXME: Overly conservative, termination condition could be an 'or' etc..
2106 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2107 continue;
2108
2109 // Search IVUsesByStride to find Cond's IVUse if there is one.
2110 IVStrideUse *CondUse = nullptr;
2111 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2112 if (!FindIVUserForCond(Cond, CondUse))
2113 continue;
2114
2115 // If the trip count is computed in terms of a max (due to ScalarEvolution
2116 // being unable to find a sufficient guard, for example), change the loop
2117 // comparison to use SLT or ULT instead of NE.
2118 // One consequence of doing this now is that it disrupts the count-down
2119 // optimization. That's not always a bad thing though, because in such
2120 // cases it may still be worthwhile to avoid a max.
2121 Cond = OptimizeMax(Cond, CondUse);
2122
2123 // If this exiting block dominates the latch block, it may also use
2124 // the post-inc value if it won't be shared with other uses.
2125 // Check for dominance.
2126 if (!DT.dominates(ExitingBlock, LatchBlock))
2127 continue;
2128
2129 // Conservatively avoid trying to use the post-inc value in non-latch
2130 // exits if there may be pre-inc users in intervening blocks.
2131 if (LatchBlock != ExitingBlock)
2132 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2133 // Test if the use is reachable from the exiting block. This dominator
2134 // query is a conservative approximation of reachability.
2135 if (&*UI != CondUse &&
2136 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2137 // Conservatively assume there may be reuse if the quotient of their
2138 // strides could be a legal scale.
2139 const SCEV *A = IU.getStride(*CondUse, L);
2140 const SCEV *B = IU.getStride(*UI, L);
2141 if (!A || !B) continue;
2142 if (SE.getTypeSizeInBits(A->getType()) !=
2143 SE.getTypeSizeInBits(B->getType())) {
2144 if (SE.getTypeSizeInBits(A->getType()) >
2145 SE.getTypeSizeInBits(B->getType()))
2146 B = SE.getSignExtendExpr(B, A->getType());
2147 else
2148 A = SE.getSignExtendExpr(A, B->getType());
2149 }
2150 if (const SCEVConstant *D =
2151 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
2152 const ConstantInt *C = D->getValue();
2153 // Stride of one or negative one can have reuse with non-addresses.
2154 if (C->isOne() || C->isAllOnesValue())
2155 goto decline_post_inc;
2156 // Avoid weird situations.
2157 if (C->getValue().getMinSignedBits() >= 64 ||
2158 C->getValue().isMinSignedValue())
2159 goto decline_post_inc;
2160 // Check for possible scaled-address reuse.
2161 MemAccessTy AccessTy = getAccessType(UI->getUser());
2162 int64_t Scale = C->getSExtValue();
2163 if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2164 /*BaseOffset=*/0,
2165 /*HasBaseReg=*/false, Scale,
2166 AccessTy.AddrSpace))
2167 goto decline_post_inc;
2168 Scale = -Scale;
2169 if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2170 /*BaseOffset=*/0,
2171 /*HasBaseReg=*/false, Scale,
2172 AccessTy.AddrSpace))
2173 goto decline_post_inc;
2174 }
2175 }
2176
2177 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n'; } } while (false)
2178 << *Cond << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n'; } } while (false)
;
2179
2180 // It's possible for the setcc instruction to be anywhere in the loop, and
2181 // possible for it to have multiple users. If it is not immediately before
2182 // the exiting block branch, move it.
2183 if (&*++BasicBlock::iterator(Cond) != TermBr) {
2184 if (Cond->hasOneUse()) {
2185 Cond->moveBefore(TermBr);
2186 } else {
2187 // Clone the terminating condition and insert into the loopend.
2188 ICmpInst *OldCond = Cond;
2189 Cond = cast<ICmpInst>(Cond->clone());
2190 Cond->setName(L->getHeader()->getName() + ".termcond");
2191 ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
2192
2193 // Clone the IVUse, as the old use still exists!
2194 CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2195 TermBr->replaceUsesOfWith(OldCond, Cond);
2196 }
2197 }
2198
2199 // If we get to here, we know that we can transform the setcc instruction to
2200 // use the post-incremented version of the IV, allowing us to coalesce the
2201 // live ranges for the IV correctly.
2202 CondUse->transformToPostInc(L);
2203 Changed = true;
2204
2205 PostIncs.insert(Cond);
2206 decline_post_inc:;
2207 }
2208
2209 // Determine an insertion point for the loop induction variable increment. It
2210 // must dominate all the post-inc comparisons we just set up, and it must
2211 // dominate the loop latch edge.
2212 IVIncInsertPos = L->getLoopLatch()->getTerminator();
2213 for (Instruction *Inst : PostIncs) {
2214 BasicBlock *BB =
2215 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
2216 Inst->getParent());
2217 if (BB == Inst->getParent())
2218 IVIncInsertPos = Inst;
2219 else if (BB != IVIncInsertPos->getParent())
2220 IVIncInsertPos = BB->getTerminator();
2221 }
2222}
2223
2224/// Determine if the given use can accommodate a fixup at the given offset and
2225/// other details. If so, update the use and return true.
2226bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2227 bool HasBaseReg, LSRUse::KindType Kind,
2228 MemAccessTy AccessTy) {
2229 int64_t NewMinOffset = LU.MinOffset;
2230 int64_t NewMaxOffset = LU.MaxOffset;
2231 MemAccessTy NewAccessTy = AccessTy;
2232
2233 // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2234 // something conservative, however this can pessimize in the case that one of
2235 // the uses will have all its uses outside the loop, for example.
2236 if (LU.Kind != Kind)
2237 return false;
2238
2239 // Check for a mismatched access type, and fall back conservatively as needed.
2240 // TODO: Be less conservative when the type is similar and can use the same
2241 // addressing modes.
2242 if (Kind == LSRUse::Address) {
2243 if (AccessTy != LU.AccessTy)
2244 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
2245 }
2246
2247 // Conservatively assume HasBaseReg is true for now.
2248 if (NewOffset < LU.MinOffset) {
2249 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2250 LU.MaxOffset - NewOffset, HasBaseReg))
2251 return false;
2252 NewMinOffset = NewOffset;
2253 } else if (NewOffset > LU.MaxOffset) {
2254 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2255 NewOffset - LU.MinOffset, HasBaseReg))
2256 return false;
2257 NewMaxOffset = NewOffset;
2258 }
2259
2260 // Update the use.
2261 LU.MinOffset = NewMinOffset;
2262 LU.MaxOffset = NewMaxOffset;
2263 LU.AccessTy = NewAccessTy;
2264 return true;
2265}
2266
2267/// Return an LSRUse index and an offset value for a fixup which needs the given
2268/// expression, with the given kind and optional access type. Either reuse an
2269/// existing use or create a new one, as needed.
2270std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
2271 LSRUse::KindType Kind,
2272 MemAccessTy AccessTy) {
2273 const SCEV *Copy = Expr;
2274 int64_t Offset = ExtractImmediate(Expr, SE);
2275
2276 // Basic uses can't accept any offset, for example.
2277 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2278 Offset, /*HasBaseReg=*/ true)) {
2279 Expr = Copy;
2280 Offset = 0;
2281 }
2282
2283 std::pair<UseMapTy::iterator, bool> P =
2284 UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2285 if (!P.second) {
2286 // A use already existed with this base.
2287 size_t LUIdx = P.first->second;
2288 LSRUse &LU = Uses[LUIdx];
2289 if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
2290 // Reuse this use.
2291 return std::make_pair(LUIdx, Offset);
2292 }
2293
2294 // Create a new use.
2295 size_t LUIdx = Uses.size();
2296 P.first->second = LUIdx;
2297 Uses.push_back(LSRUse(Kind, AccessTy));
2298 LSRUse &LU = Uses[LUIdx];
2299
2300 LU.MinOffset = Offset;
2301 LU.MaxOffset = Offset;
2302 return std::make_pair(LUIdx, Offset);
2303}
2304
2305/// Delete the given use from the Uses list.
2306void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2307 if (&LU != &Uses.back())
2308 std::swap(LU, Uses.back());
2309 Uses.pop_back();
2310
2311 // Update RegUses.
2312 RegUses.swapAndDropUse(LUIdx, Uses.size());
2313}
2314
2315/// Look for a use distinct from OrigLU which is has a formula that has the same
2316/// registers as the given formula.
2317LSRUse *
2318LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2319 const LSRUse &OrigLU) {
2320 // Search all uses for the formula. This could be more clever.
2321 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2322 LSRUse &LU = Uses[LUIdx];
2323 // Check whether this use is close enough to OrigLU, to see whether it's
2324 // worthwhile looking through its formulae.
2325 // Ignore ICmpZero uses because they may contain formulae generated by
2326 // GenerateICmpZeroScales, in which case adding fixup offsets may
2327 // be invalid.
2328 if (&LU != &OrigLU &&
2329 LU.Kind != LSRUse::ICmpZero &&
2330 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2331 LU.WidestFixupType == OrigLU.WidestFixupType &&
2332 LU.HasFormulaWithSameRegs(OrigF)) {
2333 // Scan through this use's formulae.
2334 for (const Formula &F : LU.Formulae) {
2335 // Check to see if this formula has the same registers and symbols
2336 // as OrigF.
2337 if (F.BaseRegs == OrigF.BaseRegs &&
2338 F.ScaledReg == OrigF.ScaledReg &&
2339 F.BaseGV == OrigF.BaseGV &&
2340 F.Scale == OrigF.Scale &&
2341 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2342 if (F.BaseOffset == 0)
2343 return &LU;
2344 // This is the formula where all the registers and symbols matched;
2345 // there aren't going to be any others. Since we declined it, we
2346 // can skip the rest of the formulae and proceed to the next LSRUse.
2347 break;
2348 }
2349 }
2350 }
2351 }
2352
2353 // Nothing looked good.
2354 return nullptr;
2355}
2356
2357void LSRInstance::CollectInterestingTypesAndFactors() {
2358 SmallSetVector<const SCEV *, 4> Strides;
2359
2360 // Collect interesting types and strides.
2361 SmallVector<const SCEV *, 4> Worklist;
2362 for (const IVStrideUse &U : IU) {
2363 const SCEV *Expr = IU.getExpr(U);
2364
2365 // Collect interesting types.
2366 Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2367
2368 // Add strides for mentioned loops.
2369 Worklist.push_back(Expr);
2370 do {
2371 const SCEV *S = Worklist.pop_back_val();
2372 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2373 if (AR->getLoop() == L)
2374 Strides.insert(AR->getStepRecurrence(SE));
2375 Worklist.push_back(AR->getStart());
2376 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2377 Worklist.append(Add->op_begin(), Add->op_end());
2378 }
2379 } while (!Worklist.empty());
2380 }
2381
2382 // Compute interesting factors from the set of interesting strides.
2383 for (SmallSetVector<const SCEV *, 4>::const_iterator
2384 I = Strides.begin(), E = Strides.end(); I != E; ++I)
2385 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2386 std::next(I); NewStrideIter != E; ++NewStrideIter) {
2387 const SCEV *OldStride = *I;
2388 const SCEV *NewStride = *NewStrideIter;
2389
2390 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2391 SE.getTypeSizeInBits(NewStride->getType())) {
2392 if (SE.getTypeSizeInBits(OldStride->getType()) >
2393 SE.getTypeSizeInBits(NewStride->getType()))
2394 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2395 else
2396 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2397 }
2398 if (const SCEVConstant *Factor =
2399 dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2400 SE, true))) {
2401 if (Factor->getAPInt().getMinSignedBits() <= 64)
2402 Factors.insert(Factor->getAPInt().getSExtValue());
2403 } else if (const SCEVConstant *Factor =
2404 dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2405 NewStride,
2406 SE, true))) {
2407 if (Factor->getAPInt().getMinSignedBits() <= 64)
2408 Factors.insert(Factor->getAPInt().getSExtValue());
2409 }
2410 }
2411
2412 // If all uses use the same type, don't bother looking for truncation-based
2413 // reuse.
2414 if (Types.size() == 1)
2415 Types.clear();
2416
2417 DEBUG(print_factors_and_types(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { print_factors_and_types(dbgs()); } } while
(false)
;
2418}
2419
2420/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
2421/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
2422/// IVStrideUses, we could partially skip this.
2423static User::op_iterator
2424findIVOperand(User::op_iterator OI, User::op_iterator OE,
2425 Loop *L, ScalarEvolution &SE) {
2426 for(; OI != OE; ++OI) {
2427 if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2428 if (!SE.isSCEVable(Oper->getType()))
2429 continue;
2430
2431 if (const SCEVAddRecExpr *AR =
2432 dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2433 if (AR->getLoop() == L)
2434 break;
2435 }
2436 }
2437 }
2438 return OI;
2439}
2440
2441/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
2442/// a convenient helper.
2443static Value *getWideOperand(Value *Oper) {
2444 if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2445 return Trunc->getOperand(0);
2446 return Oper;
2447}
2448
2449/// Return true if we allow an IV chain to include both types.
2450static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2451 Type *LType = LVal->getType();
2452 Type *RType = RVal->getType();
2453 return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
2454}
2455
2456/// Return an approximation of this SCEV expression's "base", or NULL for any
2457/// constant. Returning the expression itself is conservative. Returning a
2458/// deeper subexpression is more precise and valid as long as it isn't less
2459/// complex than another subexpression. For expressions involving multiple
2460/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
2461/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
2462/// IVInc==b-a.
2463///
2464/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2465/// SCEVUnknown, we simply return the rightmost SCEV operand.
2466static const SCEV *getExprBase(const SCEV *S) {
2467 switch (S->getSCEVType()) {
2468 default: // uncluding scUnknown.
2469 return S;
2470 case scConstant:
2471 return nullptr;
2472 case scTruncate:
2473 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2474 case scZeroExtend:
2475 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2476 case scSignExtend:
2477 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2478 case scAddExpr: {
2479 // Skip over scaled operands (scMulExpr) to follow add operands as long as
2480 // there's nothing more complex.
2481 // FIXME: not sure if we want to recognize negation.
2482 const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2483 for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2484 E(Add->op_begin()); I != E; ++I) {
2485 const SCEV *SubExpr = *I;
2486 if (SubExpr->getSCEVType() == scAddExpr)
2487 return getExprBase(SubExpr);
2488
2489 if (SubExpr->getSCEVType() != scMulExpr)
2490 return SubExpr;
2491 }
2492 return S; // all operands are scaled, be conservative.
2493 }
2494 case scAddRecExpr:
2495 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2496 }
2497}
2498
2499/// Return true if the chain increment is profitable to expand into a loop
2500/// invariant value, which may require its own register. A profitable chain
2501/// increment will be an offset relative to the same base. We allow such offsets
2502/// to potentially be used as chain increment as long as it's not obviously
2503/// expensive to expand using real instructions.
2504bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
2505 const SCEV *IncExpr,
2506 ScalarEvolution &SE) {
2507 // Aggressively form chains when -stress-ivchain.
2508 if (StressIVChain)
2509 return true;
2510
2511 // Do not replace a constant offset from IV head with a nonconstant IV
2512 // increment.
2513 if (!isa<SCEVConstant>(IncExpr)) {
2514 const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
2515 if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2516 return 0;
2517 }
2518
2519 SmallPtrSet<const SCEV*, 8> Processed;
2520 return !isHighCostExpansion(IncExpr, Processed, SE);
2521}
2522
2523/// Return true if the number of registers needed for the chain is estimated to
2524/// be less than the number required for the individual IV users. First prohibit
2525/// any IV users that keep the IV live across increments (the Users set should
2526/// be empty). Next count the number and type of increments in the chain.
2527///
2528/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2529/// effectively use postinc addressing modes. Only consider it profitable it the
2530/// increments can be computed in fewer registers when chained.
2531///
2532/// TODO: Consider IVInc free if it's already used in another chains.
2533static bool
2534isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
2535 ScalarEvolution &SE, const TargetTransformInfo &TTI) {
2536 if (StressIVChain)
2537 return true;
2538
2539 if (!Chain.hasIncs())
2540 return false;
2541
2542 if (!Users.empty()) {
2543 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
2544 for (Instruction *Inst : Users) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
2545 dbgs() << " " << *Inst << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
2546 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
;
2547 return false;
2548 }
2549 assert(!Chain.Incs.empty() && "empty IV chains are not allowed")((!Chain.Incs.empty() && "empty IV chains are not allowed"
) ? static_cast<void> (0) : __assert_fail ("!Chain.Incs.empty() && \"empty IV chains are not allowed\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2549, __PRETTY_FUNCTION__))
;
2550
2551 // The chain itself may require a register, so intialize cost to 1.
2552 int cost = 1;
2553
2554 // A complete chain likely eliminates the need for keeping the original IV in
2555 // a register. LSR does not currently know how to form a complete chain unless
2556 // the header phi already exists.
2557 if (isa<PHINode>(Chain.tailUserInst())
2558 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2559 --cost;
2560 }
2561 const SCEV *LastIncExpr = nullptr;
2562 unsigned NumConstIncrements = 0;
2563 unsigned NumVarIncrements = 0;
2564 unsigned NumReusedIncrements = 0;
2565 for (const IVInc &Inc : Chain) {
2566 if (Inc.IncExpr->isZero())
2567 continue;
2568
2569 // Incrementing by zero or some constant is neutral. We assume constants can
2570 // be folded into an addressing mode or an add's immediate operand.
2571 if (isa<SCEVConstant>(Inc.IncExpr)) {
2572 ++NumConstIncrements;
2573 continue;
2574 }
2575
2576 if (Inc.IncExpr == LastIncExpr)
2577 ++NumReusedIncrements;
2578 else
2579 ++NumVarIncrements;
2580
2581 LastIncExpr = Inc.IncExpr;
2582 }
2583 // An IV chain with a single increment is handled by LSR's postinc
2584 // uses. However, a chain with multiple increments requires keeping the IV's
2585 // value live longer than it needs to be if chained.
2586 if (NumConstIncrements > 1)
2587 --cost;
2588
2589 // Materializing increment expressions in the preheader that didn't exist in
2590 // the original code may cost a register. For example, sign-extended array
2591 // indices can produce ridiculous increments like this:
2592 // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2593 cost += NumVarIncrements;
2594
2595 // Reusing variable increments likely saves a register to hold the multiple of
2596 // the stride.
2597 cost -= NumReusedIncrements;
2598
2599 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " Cost: " << cost << "\n"
; } } while (false)
2600 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " Cost: " << cost << "\n"
; } } while (false)
;
2601
2602 return cost < 0;
2603}
2604
2605/// Add this IV user to an existing chain or make it the head of a new chain.
2606void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2607 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2608 // When IVs are used as types of varying widths, they are generally converted
2609 // to a wider type with some uses remaining narrow under a (free) trunc.
2610 Value *const NextIV = getWideOperand(IVOper);
2611 const SCEV *const OperExpr = SE.getSCEV(NextIV);
2612 const SCEV *const OperExprBase = getExprBase(OperExpr);
2613
2614 // Visit all existing chains. Check if its IVOper can be computed as a
2615 // profitable loop invariant increment from the last link in the Chain.
2616 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2617 const SCEV *LastIncExpr = nullptr;
2618 for (; ChainIdx < NChains; ++ChainIdx) {
2619 IVChain &Chain = IVChainVec[ChainIdx];
2620
2621 // Prune the solution space aggressively by checking that both IV operands
2622 // are expressions that operate on the same unscaled SCEVUnknown. This
2623 // "base" will be canceled by the subsequent getMinusSCEV call. Checking
2624 // first avoids creating extra SCEV expressions.
2625 if (!StressIVChain && Chain.ExprBase != OperExprBase)
2626 continue;
2627
2628 Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
2629 if (!isCompatibleIVType(PrevIV, NextIV))
2630 continue;
2631
2632 // A phi node terminates a chain.
2633 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2634 continue;
2635
2636 // The increment must be loop-invariant so it can be kept in a register.
2637 const SCEV *PrevExpr = SE.getSCEV(PrevIV);
2638 const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
2639 if (!SE.isLoopInvariant(IncExpr, L))
2640 continue;
2641
2642 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2643 LastIncExpr = IncExpr;
2644 break;
2645 }
2646 }
2647 // If we haven't found a chain, create a new one, unless we hit the max. Don't
2648 // bother for phi nodes, because they must be last in the chain.
2649 if (ChainIdx == NChains) {
2650 if (isa<PHINode>(UserInst))
2651 return;
2652 if (NChains >= MaxChains && !StressIVChain) {
2653 DEBUG(dbgs() << "IV Chain Limit\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain Limit\n"; } } while
(false)
;
2654 return;
2655 }
2656 LastIncExpr = OperExpr;
2657 // IVUsers may have skipped over sign/zero extensions. We don't currently
2658 // attempt to form chains involving extensions unless they can be hoisted
2659 // into this loop's AddRec.
2660 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2661 return;
2662 ++NChains;
2663 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2664 OperExprBase));
2665 ChainUsersVec.resize(NChains);
2666 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Head: (" << *UserInst << ") IV=" <<
*LastIncExpr << "\n"; } } while (false)
2667 << ") IV=" << *LastIncExpr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Head: (" << *UserInst << ") IV=" <<
*LastIncExpr << "\n"; } } while (false)
;
2668 } else {
2669 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Inc: (" << *UserInst << ") IV+" <<
*LastIncExpr << "\n"; } } while (false)
2670 << ") IV+" << *LastIncExpr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Inc: (" << *UserInst << ") IV+" <<
*LastIncExpr << "\n"; } } while (false)
;
2671 // Add this IV user to the end of the chain.
2672 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2673 }
2674 IVChain &Chain = IVChainVec[ChainIdx];
2675
2676 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2677 // This chain's NearUsers become FarUsers.
2678 if (!LastIncExpr->isZero()) {
2679 ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2680 NearUsers.end());
2681 NearUsers.clear();
2682 }
2683
2684 // All other uses of IVOperand become near uses of the chain.
2685 // We currently ignore intermediate values within SCEV expressions, assuming
2686 // they will eventually be used be the current chain, or can be computed
2687 // from one of the chain increments. To be more precise we could
2688 // transitively follow its user and only add leaf IV users to the set.
2689 for (User *U : IVOper->users()) {
2690 Instruction *OtherUse = dyn_cast<Instruction>(U);
2691 if (!OtherUse)
2692 continue;
2693 // Uses in the chain will no longer be uses if the chain is formed.
2694 // Include the head of the chain in this iteration (not Chain.begin()).
2695 IVChain::const_iterator IncIter = Chain.Incs.begin();
2696 IVChain::const_iterator IncEnd = Chain.Incs.end();
2697 for( ; IncIter != IncEnd; ++IncIter) {
2698 if (IncIter->UserInst == OtherUse)
2699 break;
2700 }
2701 if (IncIter != IncEnd)
2702 continue;
2703
2704 if (SE.isSCEVable(OtherUse->getType())
2705 && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2706 && IU.isIVUserOrOperand(OtherUse)) {
2707 continue;
2708 }
2709 NearUsers.insert(OtherUse);
2710 }
2711
2712 // Since this user is part of the chain, it's no longer considered a use
2713 // of the chain.
2714 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2715}
2716
2717/// Populate the vector of Chains.
2718///
2719/// This decreases ILP at the architecture level. Targets with ample registers,
2720/// multiple memory ports, and no register renaming probably don't want
2721/// this. However, such targets should probably disable LSR altogether.
2722///
2723/// The job of LSR is to make a reasonable choice of induction variables across
2724/// the loop. Subsequent passes can easily "unchain" computation exposing more
2725/// ILP *within the loop* if the target wants it.
2726///
2727/// Finding the best IV chain is potentially a scheduling problem. Since LSR
2728/// will not reorder memory operations, it will recognize this as a chain, but
2729/// will generate redundant IV increments. Ideally this would be corrected later
2730/// by a smart scheduler:
2731/// = A[i]
2732/// = A[i+x]
2733/// A[i] =
2734/// A[i+x] =
2735///
2736/// TODO: Walk the entire domtree within this loop, not just the path to the
2737/// loop latch. This will discover chains on side paths, but requires
2738/// maintaining multiple copies of the Chains state.
2739void LSRInstance::CollectChains() {
2740 DEBUG(dbgs() << "Collecting IV Chains.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Collecting IV Chains.\n";
} } while (false)
;
2741 SmallVector<ChainUsers, 8> ChainUsersVec;
2742
2743 SmallVector<BasicBlock *,8> LatchPath;
2744 BasicBlock *LoopHeader = L->getHeader();
2745 for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
2746 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2747 LatchPath.push_back(Rung->getBlock());
2748 }
2749 LatchPath.push_back(LoopHeader);
2750
2751 // Walk the instruction stream from the loop header to the loop latch.
2752 for (BasicBlock *BB : reverse(LatchPath)) {
2753 for (Instruction &I : *BB) {
2754 // Skip instructions that weren't seen by IVUsers analysis.
2755 if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
2756 continue;
2757
2758 // Ignore users that are part of a SCEV expression. This way we only
2759 // consider leaf IV Users. This effectively rediscovers a portion of
2760 // IVUsers analysis but in program order this time.
2761 if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
2762 continue;
2763
2764 // Remove this instruction from any NearUsers set it may be in.
2765 for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
2766 ChainIdx < NChains; ++ChainIdx) {
2767 ChainUsersVec[ChainIdx].NearUsers.erase(&I);
2768 }
2769 // Search for operands that can be chained.
2770 SmallPtrSet<Instruction*, 4> UniqueOperands;
2771 User::op_iterator IVOpEnd = I.op_end();
2772 User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
2773 while (IVOpIter != IVOpEnd) {
2774 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2775 if (UniqueOperands.insert(IVOpInst).second)
2776 ChainInstruction(&I, IVOpInst, ChainUsersVec);
2777 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2778 }
2779 } // Continue walking down the instructions.
2780 } // Continue walking down the domtree.
2781 // Visit phi backedges to determine if the chain can generate the IV postinc.
2782 for (BasicBlock::iterator I = L->getHeader()->begin();
2783 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
2784 if (!SE.isSCEVable(PN->getType()))
2785 continue;
2786
2787 Instruction *IncV =
2788 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
2789 if (IncV)
2790 ChainInstruction(PN, IncV, ChainUsersVec);
2791 }
2792 // Remove any unprofitable chains.
2793 unsigned ChainIdx = 0;
2794 for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
2795 UsersIdx < NChains; ++UsersIdx) {
2796 if (!isProfitableChain(IVChainVec[UsersIdx],
2797 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
2798 continue;
2799 // Preserve the chain at UsesIdx.
2800 if (ChainIdx != UsersIdx)
2801 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2802 FinalizeChain(IVChainVec[ChainIdx]);
2803 ++ChainIdx;
2804 }
2805 IVChainVec.resize(ChainIdx);
2806}
2807
2808void LSRInstance::FinalizeChain(IVChain &Chain) {
2809 assert(!Chain.Incs.empty() && "empty IV chains are not allowed")((!Chain.Incs.empty() && "empty IV chains are not allowed"
) ? static_cast<void> (0) : __assert_fail ("!Chain.Incs.empty() && \"empty IV chains are not allowed\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2809, __PRETTY_FUNCTION__))
;
2810 DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Final Chain: " << *
Chain.Incs[0].UserInst << "\n"; } } while (false)
;
2811
2812 for (const IVInc &Inc : Chain) {
2813 DEBUG(dbgs() << " Inc: " << Inc.UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Inc: " << Inc
.UserInst << "\n"; } } while (false)
;
2814 auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
2815 assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand")((UseI != Inc.UserInst->op_end() && "cannot find IV operand"
) ? static_cast<void> (0) : __assert_fail ("UseI != Inc.UserInst->op_end() && \"cannot find IV operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2815, __PRETTY_FUNCTION__))
;
2816 IVIncSet.insert(UseI);
2817 }
2818}
2819
2820/// Return true if the IVInc can be folded into an addressing mode.
2821static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
2822 Value *Operand, const TargetTransformInfo &TTI) {
2823 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
2824 if (!IncConst || !isAddressUse(UserInst, Operand))
2825 return false;
2826
2827 if (IncConst->getAPInt().getMinSignedBits() > 64)
2828 return false;
2829
2830 MemAccessTy AccessTy = getAccessType(UserInst);
2831 int64_t IncOffset = IncConst->getValue()->getSExtValue();
2832 if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
2833 IncOffset, /*HaseBaseReg=*/false))
2834 return false;
2835
2836 return true;
2837}
2838
2839/// Generate an add or subtract for each IVInc in a chain to materialize the IV
2840/// user's operand from the previous IV user's operand.
2841void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
2842 SmallVectorImpl<WeakVH> &DeadInsts) {
2843 // Find the new IVOperand for the head of the chain. It may have been replaced
2844 // by LSR.
2845 const IVInc &Head = Chain.Incs[0];
2846 User::op_iterator IVOpEnd = Head.UserInst->op_end();
2847 // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
2848 User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
2849 IVOpEnd, L, SE);
2850 Value *IVSrc = nullptr;
2851 while (IVOpIter != IVOpEnd) {
2852 IVSrc = getWideOperand(*IVOpIter);
2853
2854 // If this operand computes the expression that the chain needs, we may use
2855 // it. (Check this after setting IVSrc which is used below.)
2856 //
2857 // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
2858 // narrow for the chain, so we can no longer use it. We do allow using a
2859 // wider phi, assuming the LSR checked for free truncation. In that case we
2860 // should already have a truncate on this operand such that
2861 // getSCEV(IVSrc) == IncExpr.
2862 if (SE.getSCEV(*IVOpIter) == Head.IncExpr
2863 || SE.getSCEV(IVSrc) == Head.IncExpr) {
2864 break;
2865 }
2866 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2867 }
2868 if (IVOpIter == IVOpEnd) {
2869 // Gracefully give up on this chain.
2870 DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Concealed chain head: " <<
*Head.UserInst << "\n"; } } while (false)
;
2871 return;
2872 }
2873
2874 DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generate chain at: " <<
*IVSrc << "\n"; } } while (false)
;
2875 Type *IVTy = IVSrc->getType();
2876 Type *IntTy = SE.getEffectiveSCEVType(IVTy);
2877 const SCEV *LeftOverExpr = nullptr;
2878 for (const IVInc &Inc : Chain) {
2879 Instruction *InsertPt = Inc.UserInst;
2880 if (isa<PHINode>(InsertPt))
2881 InsertPt = L->getLoopLatch()->getTerminator();
2882
2883 // IVOper will replace the current IV User's operand. IVSrc is the IV
2884 // value currently held in a register.
2885 Value *IVOper = IVSrc;
2886 if (!Inc.IncExpr->isZero()) {
2887 // IncExpr was the result of subtraction of two narrow values, so must
2888 // be signed.
2889 const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
2890 LeftOverExpr = LeftOverExpr ?
2891 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
2892 }
2893 if (LeftOverExpr && !LeftOverExpr->isZero()) {
2894 // Expand the IV increment.
2895 Rewriter.clearPostInc();
2896 Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
2897 const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
2898 SE.getUnknown(IncV));
2899 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
2900
2901 // If an IV increment can't be folded, use it as the next IV value.
2902 if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
2903 assert(IVTy == IVOper->getType() && "inconsistent IV increment type")((IVTy == IVOper->getType() && "inconsistent IV increment type"
) ? static_cast<void> (0) : __assert_fail ("IVTy == IVOper->getType() && \"inconsistent IV increment type\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2903, __PRETTY_FUNCTION__))
;
2904 IVSrc = IVOper;
2905 LeftOverExpr = nullptr;
2906 }
2907 }
2908 Type *OperTy = Inc.IVOperand->getType();
2909 if (IVTy != OperTy) {
2910 assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&((SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy
) && "cannot extend a chained IV") ? static_cast<void
> (0) : __assert_fail ("SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && \"cannot extend a chained IV\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2911, __PRETTY_FUNCTION__))
2911 "cannot extend a chained IV")((SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy
) && "cannot extend a chained IV") ? static_cast<void
> (0) : __assert_fail ("SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && \"cannot extend a chained IV\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2911, __PRETTY_FUNCTION__))
;
2912 IRBuilder<> Builder(InsertPt);
2913 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
2914 }
2915 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
2916 DeadInsts.emplace_back(Inc.IVOperand);
2917 }
2918 // If LSR created a new, wider phi, we may also replace its postinc. We only
2919 // do this if we also found a wide value for the head of the chain.
2920 if (isa<PHINode>(Chain.tailUserInst())) {
2921 for (BasicBlock::iterator I = L->getHeader()->begin();
2922 PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
2923 if (!isCompatibleIVType(Phi, IVSrc))
2924 continue;
2925 Instruction *PostIncV = dyn_cast<Instruction>(
2926 Phi->getIncomingValueForBlock(L->getLoopLatch()));
2927 if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
2928 continue;
2929 Value *IVOper = IVSrc;
2930 Type *PostIncTy = PostIncV->getType();
2931 if (IVTy != PostIncTy) {
2932 assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types")((PostIncTy->isPointerTy() && "mixing int/ptr IV types"
) ? static_cast<void> (0) : __assert_fail ("PostIncTy->isPointerTy() && \"mixing int/ptr IV types\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2932, __PRETTY_FUNCTION__))
;
2933 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
2934 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
2935 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
2936 }
2937 Phi->replaceUsesOfWith(PostIncV, IVOper);
2938 DeadInsts.emplace_back(PostIncV);
2939 }
2940 }
2941}
2942
2943void LSRInstance::CollectFixupsAndInitialFormulae() {
2944 for (const IVStrideUse &U : IU) {
2945 Instruction *UserInst = U.getUser();
2946 // Skip IV users that are part of profitable IV Chains.
2947 User::op_iterator UseI =
2948 find(UserInst->operands(), U.getOperandValToReplace());
2949 assert(UseI != UserInst->op_end() && "cannot find IV operand")((UseI != UserInst->op_end() && "cannot find IV operand"
) ? static_cast<void> (0) : __assert_fail ("UseI != UserInst->op_end() && \"cannot find IV operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2949, __PRETTY_FUNCTION__))
;
2950 if (IVIncSet.count(UseI))
2951 continue;
2952
2953 LSRUse::KindType Kind = LSRUse::Basic;
2954 MemAccessTy AccessTy;
2955 if (isAddressUse(UserInst, U.getOperandValToReplace())) {
2956 Kind = LSRUse::Address;
2957 AccessTy = getAccessType(UserInst);
2958 }
2959
2960 const SCEV *S = IU.getExpr(U);
2961 PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
2962
2963 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2964 // (N - i == 0), and this allows (N - i) to be the expression that we work
2965 // with rather than just N or i, so we can consider the register
2966 // requirements for both N and i at the same time. Limiting this code to
2967 // equality icmps is not a problem because all interesting loops use
2968 // equality icmps, thanks to IndVarSimplify.
2969 if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
2970 if (CI->isEquality()) {
2971 // Swap the operands if needed to put the OperandValToReplace on the
2972 // left, for consistency.
2973 Value *NV = CI->getOperand(1);
2974 if (NV == U.getOperandValToReplace()) {
2975 CI->setOperand(1, CI->getOperand(0));
2976 CI->setOperand(0, NV);
2977 NV = CI->getOperand(1);
2978 Changed = true;
2979 }
2980
2981 // x == y --> x - y == 0
2982 const SCEV *N = SE.getSCEV(NV);
2983 if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
2984 // S is normalized, so normalize N before folding it into S
2985 // to keep the result normalized.
2986 N = TransformForPostIncUse(Normalize, N, CI, nullptr,
2987 TmpPostIncLoops, SE, DT);
2988 Kind = LSRUse::ICmpZero;
2989 S = SE.getMinusSCEV(N, S);
2990 }
2991
2992 // -1 and the negations of all interesting strides (except the negation
2993 // of -1) are now also interesting.
2994 for (size_t i = 0, e = Factors.size(); i != e; ++i)
2995 if (Factors[i] != -1)
2996 Factors.insert(-(uint64_t)Factors[i]);
2997 Factors.insert(-1);
2998 }
2999
3000 // Get or create an LSRUse.
3001 std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3002 size_t LUIdx = P.first;
3003 int64_t Offset = P.second;
3004 LSRUse &LU = Uses[LUIdx];
3005
3006 // Record the fixup.
3007 LSRFixup &LF = LU.getNewFixup();
3008 LF.UserInst = UserInst;
3009 LF.OperandValToReplace = U.getOperandValToReplace();
3010 LF.PostIncLoops = TmpPostIncLoops;
3011 LF.Offset = Offset;
3012 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3013
3014 if (!LU.WidestFixupType ||
3015 SE.getTypeSizeInBits(LU.WidestFixupType) <
3016 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3017 LU.WidestFixupType = LF.OperandValToReplace->getType();
3018
3019 // If this is the first use of this LSRUse, give it a formula.
3020 if (LU.Formulae.empty()) {
3021 InsertInitialFormula(S, LU, LUIdx);
3022 CountRegisters(LU.Formulae.back(), LUIdx);
3023 }
3024 }
3025
3026 DEBUG(print_fixups(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { print_fixups(dbgs()); } } while (false)
;
3027}
3028
3029/// Insert a formula for the given expression into the given use, separating out
3030/// loop-variant portions from loop-invariant and loop-computable portions.
3031void
3032LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
3033 // Mark uses whose expressions cannot be expanded.
3034 if (!isSafeToExpand(S, SE))
3035 LU.RigidFormula = true;
3036
3037 Formula F;
3038 F.initialMatch(S, L, SE);
3039 bool Inserted = InsertFormula(LU, LUIdx, F);
3040 assert(Inserted && "Initial formula already exists!")((Inserted && "Initial formula already exists!") ? static_cast
<void> (0) : __assert_fail ("Inserted && \"Initial formula already exists!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3040, __PRETTY_FUNCTION__))
; (void)Inserted;
3041}
3042
3043/// Insert a simple single-register formula for the given expression into the
3044/// given use.
3045void
3046LSRInstance::InsertSupplementalFormula(const SCEV *S,
3047 LSRUse &LU, size_t LUIdx) {
3048 Formula F;
3049 F.BaseRegs.push_back(S);
3050 F.HasBaseReg = true;
3051 bool Inserted = InsertFormula(LU, LUIdx, F);
3052 assert(Inserted && "Supplemental formula already exists!")((Inserted && "Supplemental formula already exists!")
? static_cast<void> (0) : __assert_fail ("Inserted && \"Supplemental formula already exists!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3052, __PRETTY_FUNCTION__))
; (void)Inserted;
3053}
3054
3055/// Note which registers are used by the given formula, updating RegUses.
3056void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3057 if (F.ScaledReg)
3058 RegUses.countRegister(F.ScaledReg, LUIdx);
3059 for (const SCEV *BaseReg : F.BaseRegs)
3060 RegUses.countRegister(BaseReg, LUIdx);
3061}
3062
3063/// If the given formula has not yet been inserted, add it to the list, and
3064/// return true. Return false otherwise.
3065bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3066 // Do not insert formula that we will not be able to expand.
3067 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Formula is illegal") ? static_cast<void>
(0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Formula is illegal\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3068, __PRETTY_FUNCTION__))
3068 "Formula is illegal")((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Formula is illegal") ? static_cast<void>
(0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Formula is illegal\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3068, __PRETTY_FUNCTION__))
;
3069 if (!LU.InsertFormula(F))
3070 return false;
3071
3072 CountRegisters(F, LUIdx);
3073 return true;
3074}
3075
3076/// Check for other uses of loop-invariant values which we're tracking. These
3077/// other uses will pin these values in registers, making them less profitable
3078/// for elimination.
3079/// TODO: This currently misses non-constant addrec step registers.
3080/// TODO: Should this give more weight to users inside the loop?
3081void
3082LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3083 SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
3084 SmallPtrSet<const SCEV *, 32> Visited;
3085
3086 while (!Worklist.empty()) {
3087 const SCEV *S = Worklist.pop_back_val();
3088
3089 // Don't process the same SCEV twice
3090 if (!Visited.insert(S).second)
3091 continue;
3092
3093 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3094 Worklist.append(N->op_begin(), N->op_end());
3095 else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3096 Worklist.push_back(C->getOperand());
3097 else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3098 Worklist.push_back(D->getLHS());
3099 Worklist.push_back(D->getRHS());
3100 } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3101 const Value *V = US->getValue();
3102 if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3103 // Look for instructions defined outside the loop.
3104 if (L->contains(Inst)) continue;
3105 } else if (isa<UndefValue>(V))
3106 // Undef doesn't have a live range, so it doesn't matter.
3107 continue;
3108 for (const Use &U : V->uses()) {
3109 const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3110 // Ignore non-instructions.
3111 if (!UserInst)
3112 continue;
3113 // Ignore instructions in other functions (as can happen with
3114 // Constants).
3115 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3116 continue;
3117 // Ignore instructions not dominated by the loop.
3118 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3119 UserInst->getParent() :
3120 cast<PHINode>(UserInst)->getIncomingBlock(
3121 PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3122 if (!DT.dominates(L->getHeader(), UseBB))
3123 continue;
3124 // Don't bother if the instruction is in a BB which ends in an EHPad.
3125 if (UseBB->getTerminator()->isEHPad())
3126 continue;
3127 // Ignore uses which are part of other SCEV expressions, to avoid
3128 // analyzing them multiple times.
3129 if (SE.isSCEVable(UserInst->getType())) {
3130 const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3131 // If the user is a no-op, look through to its uses.
3132 if (!isa<SCEVUnknown>(UserS))
3133 continue;
3134 if (UserS == US) {
3135 Worklist.push_back(
3136 SE.getUnknown(const_cast<Instruction *>(UserInst)));
3137 continue;
3138 }
3139 }
3140 // Ignore icmp instructions which are already being analyzed.
3141 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3142 unsigned OtherIdx = !U.getOperandNo();
3143 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
3144 if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3145 continue;
3146 }
3147
3148 std::pair<size_t, int64_t> P = getUse(
3149 S, LSRUse::Basic, MemAccessTy());
3150 size_t LUIdx = P.first;
3151 int64_t Offset = P.second;
3152 LSRUse &LU = Uses[LUIdx];
3153 LSRFixup &LF = LU.getNewFixup();
3154 LF.UserInst = const_cast<Instruction *>(UserInst);
3155 LF.OperandValToReplace = U;
3156 LF.Offset = Offset;
3157 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3158 if (!LU.WidestFixupType ||
3159 SE.getTypeSizeInBits(LU.WidestFixupType) <
3160 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3161 LU.WidestFixupType = LF.OperandValToReplace->getType();
3162 InsertSupplementalFormula(US, LU, LUIdx);
3163 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3164 break;
3165 }
3166 }
3167 }
3168}
3169
3170/// Split S into subexpressions which can be pulled out into separate
3171/// registers. If C is non-null, multiply each subexpression by C.
3172///
3173/// Return remainder expression after factoring the subexpressions captured by
3174/// Ops. If Ops is complete, return NULL.
3175static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3176 SmallVectorImpl<const SCEV *> &Ops,
3177 const Loop *L,
3178 ScalarEvolution &SE,
3179 unsigned Depth = 0) {
3180 // Arbitrarily cap recursion to protect compile time.
3181 if (Depth >= 3)
3182 return S;
3183
3184 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3185 // Break out add operands.
3186 for (const SCEV *S : Add->operands()) {
3187 const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
3188 if (Remainder)
3189 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3190 }
3191 return nullptr;
3192 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3193 // Split a non-zero base out of an addrec.
3194 if (AR->getStart()->isZero())
3195 return S;
3196
3197 const SCEV *Remainder = CollectSubexprs(AR->getStart(),
3198 C, Ops, L, SE, Depth+1);
3199 // Split the non-zero AddRec unless it is part of a nested recurrence that
3200 // does not pertain to this loop.
3201 if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3202 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3203 Remainder = nullptr;
3204 }
3205 if (Remainder != AR->getStart()) {
3206 if (!Remainder)
3207 Remainder = SE.getConstant(AR->getType(), 0);
3208 return SE.getAddRecExpr(Remainder,
3209 AR->getStepRecurrence(SE),
3210 AR->getLoop(),
3211 //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3212 SCEV::FlagAnyWrap);
3213 }
3214 } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3215 // Break (C * (a + b + c)) into C*a + C*b + C*c.
3216 if (Mul->getNumOperands() != 2)
3217 return S;
3218 if (const SCEVConstant *Op0 =
3219 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3220 C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
3221 const SCEV *Remainder =
3222 CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
3223 if (Remainder)
3224 Ops.push_back(SE.getMulExpr(C, Remainder));
3225 return nullptr;
3226 }
3227 }
3228 return S;
3229}
3230
3231/// \brief Helper function for LSRInstance::GenerateReassociations.
3232void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3233 const Formula &Base,
3234 unsigned Depth, size_t Idx,
3235 bool IsScaledReg) {
3236 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3237 SmallVector<const SCEV *, 8> AddOps;
3238 const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3239 if (Remainder)
3240 AddOps.push_back(Remainder);
3241
3242 if (AddOps.size() == 1)
3243 return;
3244
3245 for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3246 JE = AddOps.end();
3247 J != JE; ++J) {
3248
3249 // Loop-variant "unknown" values are uninteresting; we won't be able to
3250 // do anything meaningful with them.
3251 if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3252 continue;
3253
3254 // Don't pull a constant into a register if the constant could be folded
3255 // into an immediate field.
3256 if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3257 LU.AccessTy, *J, Base.getNumRegs() > 1))
3258 continue;
3259
3260 // Collect all operands except *J.
3261 SmallVector<const SCEV *, 8> InnerAddOps(
3262 ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
3263 InnerAddOps.append(std::next(J),
3264 ((const SmallVector<const SCEV *, 8> &)AddOps).end());
3265
3266 // Don't leave just a constant behind in a register if the constant could
3267 // be folded into an immediate field.
3268 if (InnerAddOps.size() == 1 &&
3269 isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3270 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3271 continue;
3272
3273 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3274 if (InnerSum->isZero())
3275 continue;
3276 Formula F = Base;
3277
3278 // Add the remaining pieces of the add back into the new formula.
3279 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3280 if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3281 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3282 InnerSumSC->getValue()->getZExtValue())) {
3283 F.UnfoldedOffset =
3284 (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
3285 if (IsScaledReg)
3286 F.ScaledReg = nullptr;
3287 else
3288 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3289 } else if (IsScaledReg)
3290 F.ScaledReg = InnerSum;
3291 else
3292 F.BaseRegs[Idx] = InnerSum;
3293
3294 // Add J as its own register, or an unfolded immediate.
3295 const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3296 if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3297 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3298 SC->getValue()->getZExtValue()))
3299 F.UnfoldedOffset =
3300 (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3301 else
3302 F.BaseRegs.push_back(*J);
3303 // We may have changed the number of register in base regs, adjust the
3304 // formula accordingly.
3305 F.canonicalize();
3306
3307 if (InsertFormula(LU, LUIdx, F))
3308 // If that formula hadn't been seen before, recurse to find more like
3309 // it.
3310 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3311 }
3312}
3313
3314/// Split out subexpressions from adds and the bases of addrecs.
3315void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3316 Formula Base, unsigned Depth) {
3317 assert(Base.isCanonical() && "Input must be in the canonical form")((Base.isCanonical() && "Input must be in the canonical form"
) ? static_cast<void> (0) : __assert_fail ("Base.isCanonical() && \"Input must be in the canonical form\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3317, __PRETTY_FUNCTION__))
;
3318 // Arbitrarily cap recursion to protect compile time.
3319 if (Depth >= 3)
3320 return;
3321
3322 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3323 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3324
3325 if (Base.Scale == 1)
3326 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3327 /* Idx */ -1, /* IsScaledReg */ true);
3328}
3329
3330/// Generate a formula consisting of all of the loop-dominating registers added
3331/// into a single register.
3332void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3333 Formula Base) {
3334 // This method is only interesting on a plurality of registers.
3335 if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3336 return;
3337
3338 // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
3339 // processing the formula.
3340 Base.unscale();
3341 Formula F = Base;
3342 F.BaseRegs.clear();
3343 SmallVector<const SCEV *, 4> Ops;
3344 for (const SCEV *BaseReg : Base.BaseRegs) {
3345 if (SE.properlyDominates(BaseReg, L->getHeader()) &&
3346 !SE.hasComputableLoopEvolution(BaseReg, L))
3347 Ops.push_back(BaseReg);
3348 else
3349 F.BaseRegs.push_back(BaseReg);
3350 }
3351 if (Ops.size() > 1) {
3352 const SCEV *Sum = SE.getAddExpr(Ops);
3353 // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3354 // opportunity to fold something. For now, just ignore such cases
3355 // rather than proceed with zero in a register.
3356 if (!Sum->isZero()) {
3357 F.BaseRegs.push_back(Sum);
3358 F.canonicalize();
3359 (void)InsertFormula(LU, LUIdx, F);
3360 }
3361 }
3362}
3363
3364/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
3365void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
3366 const Formula &Base, size_t Idx,
3367 bool IsScaledReg) {
3368 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3369 GlobalValue *GV = ExtractSymbol(G, SE);
3370 if (G->isZero() || !GV)
3371 return;
3372 Formula F = Base;
3373 F.BaseGV = GV;
3374 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3375 return;
3376 if (IsScaledReg)
3377 F.ScaledReg = G;
3378 else
3379 F.BaseRegs[Idx] = G;
3380 (void)InsertFormula(LU, LUIdx, F);
3381}
3382
3383/// Generate reuse formulae using symbolic offsets.
3384void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3385 Formula Base) {
3386 // We can't add a symbolic offset if the address already contains one.
3387 if (Base.BaseGV) return;
3388
3389 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3390 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3391 if (Base.Scale == 1)
3392 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
3393 /* IsScaledReg */ true);
3394}
3395
3396/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
3397void LSRInstance::GenerateConstantOffsetsImpl(
3398 LSRUse &LU, unsigned LUIdx, const Formula &Base,
3399 const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
3400 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3401 for (int64_t Offset : Worklist) {
3402 Formula F = Base;
3403 F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3404 if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
3405 LU.AccessTy, F)) {
3406 // Add the offset to the base register.
3407 const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
3408 // If it cancelled out, drop the base register, otherwise update it.
3409 if (NewG->isZero()) {
3410 if (IsScaledReg) {
3411 F.Scale = 0;
3412 F.ScaledReg = nullptr;
3413 } else
3414 F.deleteBaseReg(F.BaseRegs[Idx]);
3415 F.canonicalize();
3416 } else if (IsScaledReg)
3417 F.ScaledReg = NewG;
3418 else
3419 F.BaseRegs[Idx] = NewG;
3420
3421 (void)InsertFormula(LU, LUIdx, F);
3422 }
3423 }
3424
3425 int64_t Imm = ExtractImmediate(G, SE);
3426 if (G->isZero() || Imm == 0)
3427 return;
3428 Formula F = Base;
3429 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3430 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3431 return;
3432 if (IsScaledReg)
3433 F.ScaledReg = G;
3434 else
3435 F.BaseRegs[Idx] = G;
3436 (void)InsertFormula(LU, LUIdx, F);
3437}
3438
3439/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
3440void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3441 Formula Base) {
3442 // TODO: For now, just add the min and max offset, because it usually isn't
3443 // worthwhile looking at everything inbetween.
3444 SmallVector<int64_t, 2> Worklist;
3445 Worklist.push_back(LU.MinOffset);
3446 if (LU.MaxOffset != LU.MinOffset)
3447 Worklist.push_back(LU.MaxOffset);
3448
3449 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3450 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3451 if (Base.Scale == 1)
3452 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
3453 /* IsScaledReg */ true);
3454}
3455
3456/// For ICmpZero, check to see if we can scale up the comparison. For example, x
3457/// == y -> x*c == y*c.
3458void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3459 Formula Base) {
3460 if (LU.Kind != LSRUse::ICmpZero) return;
3461
3462 // Determine the integer type for the base formula.
3463 Type *IntTy = Base.getType();
3464 if (!IntTy) return;
3465 if (SE.getTypeSizeInBits(IntTy) > 64) return;
3466
3467 // Don't do this if there is more than one offset.
3468 if (LU.MinOffset != LU.MaxOffset) return;
3469
3470 assert(!Base.BaseGV && "ICmpZero use is not legal!")((!Base.BaseGV && "ICmpZero use is not legal!") ? static_cast
<void> (0) : __assert_fail ("!Base.BaseGV && \"ICmpZero use is not legal!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3470, __PRETTY_FUNCTION__))
;
3471
3472 // Check each interesting stride.
3473 for (int64_t Factor : Factors) {
3474 // Check that the multiplication doesn't overflow.
3475 if (Base.BaseOffset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3476 continue;
3477 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3478 if (NewBaseOffset / Factor != Base.BaseOffset)
3479 continue;
3480 // If the offset will be truncated at this use, check that it is in bounds.
3481 if (!IntTy->isPointerTy() &&
3482 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3483 continue;
3484
3485 // Check that multiplying with the use offset doesn't overflow.
3486 int64_t Offset = LU.MinOffset;
3487 if (Offset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3488 continue;
3489 Offset = (uint64_t)Offset * Factor;
3490 if (Offset / Factor != LU.MinOffset)
3491 continue;
3492 // If the offset will be truncated at this use, check that it is in bounds.
3493 if (!IntTy->isPointerTy() &&
3494 !ConstantInt::isValueValidForType(IntTy, Offset))
3495 continue;
3496
3497 Formula F = Base;
3498 F.BaseOffset = NewBaseOffset;
3499
3500 // Check that this scale is legal.
3501 if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3502 continue;
3503
3504 // Compensate for the use having MinOffset built into it.
3505 F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3506
3507 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3508
3509 // Check that multiplying with each base register doesn't overflow.
3510 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3511 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
3512 if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3513 goto next;
3514 }
3515
3516 // Check that multiplying with the scaled register doesn't overflow.
3517 if (F.ScaledReg) {
3518 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
3519 if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3520 continue;
3521 }
3522
3523 // Check that multiplying with the unfolded offset doesn't overflow.
3524 if (F.UnfoldedOffset != 0) {
3525 if (F.UnfoldedOffset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3526 continue;
3527 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3528 if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3529 continue;
3530 // If the offset will be truncated, check that it is in bounds.
3531 if (!IntTy->isPointerTy() &&
3532 !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3533 continue;
3534 }
3535
3536 // If we make it here and it's legal, add it.
3537 (void)InsertFormula(LU, LUIdx, F);
3538 next:;
3539 }
3540}
3541
3542/// Generate stride factor reuse formulae by making use of scaled-offset address
3543/// modes, for example.
3544void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
3545 // Determine the integer type for the base formula.
3546 Type *IntTy = Base.getType();
3547 if (!IntTy) return;
3548
3549 // If this Formula already has a scaled register, we can't add another one.
3550 // Try to unscale the formula to generate a better scale.
3551 if (Base.Scale != 0 && !Base.unscale())
3552 return;
3553
3554 assert(Base.Scale == 0 && "unscale did not did its job!")((Base.Scale == 0 && "unscale did not did its job!") ?
static_cast<void> (0) : __assert_fail ("Base.Scale == 0 && \"unscale did not did its job!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3554, __PRETTY_FUNCTION__))
;
3555
3556 // Check each interesting stride.
3557 for (int64_t Factor : Factors) {
3558 Base.Scale = Factor;
3559 Base.HasBaseReg = Base.BaseRegs.size() > 1;
3560 // Check whether this scale is going to be legal.
3561 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3562 Base)) {
3563 // As a special-case, handle special out-of-loop Basic users specially.
3564 // TODO: Reconsider this special case.
3565 if (LU.Kind == LSRUse::Basic &&
3566 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3567 LU.AccessTy, Base) &&
3568 LU.AllFixupsOutsideLoop)
3569 LU.Kind = LSRUse::Special;
3570 else
3571 continue;
3572 }
3573 // For an ICmpZero, negating a solitary base register won't lead to
3574 // new solutions.
3575 if (LU.Kind == LSRUse::ICmpZero &&
3576 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3577 continue;
3578 // For each addrec base reg, apply the scale, if possible.
3579 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3580 if (const SCEVAddRecExpr *AR =
3581 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
3582 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3583 if (FactorS->isZero())
3584 continue;
3585 // Divide out the factor, ignoring high bits, since we'll be
3586 // scaling the value back up in the end.
3587 if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
3588 // TODO: This could be optimized to avoid all the copying.
3589 Formula F = Base;
3590 F.ScaledReg = Quotient;
3591 F.deleteBaseReg(F.BaseRegs[i]);
3592 // The canonical representation of 1*reg is reg, which is already in
3593 // Base. In that case, do not try to insert the formula, it will be
3594 // rejected anyway.
3595 if (F.Scale == 1 && F.BaseRegs.empty())
3596 continue;
3597 (void)InsertFormula(LU, LUIdx, F);
3598 }
3599 }
3600 }
3601}
3602
3603/// Generate reuse formulae from different IV types.
3604void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
3605 // Don't bother truncating symbolic values.
3606 if (Base.BaseGV) return;
3607
3608 // Determine the integer type for the base formula.
3609 Type *DstTy = Base.getType();
3610 if (!DstTy) return;
3611 DstTy = SE.getEffectiveSCEVType(DstTy);
3612
3613 for (Type *SrcTy : Types) {
3614 if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
3615 Formula F = Base;
3616
3617 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
3618 for (const SCEV *&BaseReg : F.BaseRegs)
3619 BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
3620
3621 // TODO: This assumes we've done basic processing on all uses and
3622 // have an idea what the register usage is.
3623 if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3624 continue;
3625
3626 (void)InsertFormula(LU, LUIdx, F);
3627 }
3628 }
3629}
3630
3631namespace {
3632
3633/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
3634/// modifications so that the search phase doesn't have to worry about the data
3635/// structures moving underneath it.
3636struct WorkItem {
3637 size_t LUIdx;
3638 int64_t Imm;
3639 const SCEV *OrigReg;
3640
3641 WorkItem(size_t LI, int64_t I, const SCEV *R)
3642 : LUIdx(LI), Imm(I), OrigReg(R) {}
3643
3644 void print(raw_ostream &OS) const;
3645 void dump() const;
3646};
3647
3648}
3649
3650void WorkItem::print(raw_ostream &OS) const {
3651 OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
3652 << " , add offset " << Imm;
3653}
3654
3655LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__))
3656void WorkItem::dump() const {
3657 print(errs()); errs() << '\n';
3658}
3659
3660/// Look for registers which are a constant distance apart and try to form reuse
3661/// opportunities between them.
3662void LSRInstance::GenerateCrossUseConstantOffsets() {
3663 // Group the registers by their value without any added constant offset.
3664 typedef std::map<int64_t, const SCEV *> ImmMapTy;
3665 DenseMap<const SCEV *, ImmMapTy> Map;
3666 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
3667 SmallVector<const SCEV *, 8> Sequence;
3668 for (const SCEV *Use : RegUses) {
3669 const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
3670 int64_t Imm = ExtractImmediate(Reg, SE);
3671 auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
3672 if (Pair.second)
3673 Sequence.push_back(Reg);
3674 Pair.first->second.insert(std::make_pair(Imm, Use));
3675 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
3676 }
3677
3678 // Now examine each set of registers with the same base value. Build up
3679 // a list of work to do and do the work in a separate step so that we're
3680 // not adding formulae and register counts while we're searching.
3681 SmallVector<WorkItem, 32> WorkItems;
3682 SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
3683 for (const SCEV *Reg : Sequence) {
3684 const ImmMapTy &Imms = Map.find(Reg)->second;
3685
3686 // It's not worthwhile looking for reuse if there's only one offset.
3687 if (Imms.size() == 1)
3688 continue;
3689
3690 DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
3691 for (const auto &Entry : Imms)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
3692 dbgs() << ' ' << Entry.first;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
3693 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
;
3694
3695 // Examine each offset.
3696 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3697 J != JE; ++J) {
3698 const SCEV *OrigReg = J->second;
3699
3700 int64_t JImm = J->first;
3701 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3702
3703 if (!isa<SCEVConstant>(OrigReg) &&
3704 UsedByIndicesMap[Reg].count() == 1) {
3705 DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Skipping cross-use reuse for "
<< *OrigReg << '\n'; } } while (false)
;
3706 continue;
3707 }
3708
3709 // Conservatively examine offsets between this orig reg a few selected
3710 // other orig regs.
3711 ImmMapTy::const_iterator OtherImms[] = {
3712 Imms.begin(), std::prev(Imms.end()),
3713 Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3714 2)
3715 };
3716 for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
3717 ImmMapTy::const_iterator M = OtherImms[i];
3718 if (M == J || M == JE) continue;
3719
3720 // Compute the difference between the two.
3721 int64_t Imm = (uint64_t)JImm - M->first;
3722 for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
3723 LUIdx = UsedByIndices.find_next(LUIdx))
3724 // Make a memo of this use, offset, and register tuple.
3725 if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
3726 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
3727 }
3728 }
3729 }
3730
3731 Map.clear();
3732 Sequence.clear();
3733 UsedByIndicesMap.clear();
3734 UniqueItems.clear();
3735
3736 // Now iterate through the worklist and add new formulae.
3737 for (const WorkItem &WI : WorkItems) {
3738 size_t LUIdx = WI.LUIdx;
3739 LSRUse &LU = Uses[LUIdx];
3740 int64_t Imm = WI.Imm;
3741 const SCEV *OrigReg = WI.OrigReg;
3742
3743 Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
3744 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3745 unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
3746
3747 // TODO: Use a more targeted data structure.
3748 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
3749 Formula F = LU.Formulae[L];
3750 // FIXME: The code for the scaled and unscaled registers looks
3751 // very similar but slightly different. Investigate if they
3752 // could be merged. That way, we would not have to unscale the
3753 // Formula.
3754 F.unscale();
3755 // Use the immediate in the scaled register.
3756 if (F.ScaledReg == OrigReg) {
3757 int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
3758 // Don't create 50 + reg(-50).
3759 if (F.referencesReg(SE.getSCEV(
3760 ConstantInt::get(IntTy, -(uint64_t)Offset))))
3761 continue;
3762 Formula NewF = F;
3763 NewF.BaseOffset = Offset;
3764 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3765 NewF))
3766 continue;
3767 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
3768
3769 // If the new scale is a constant in a register, and adding the constant
3770 // value to the immediate would produce a value closer to zero than the
3771 // immediate itself, then the formula isn't worthwhile.
3772 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
3773 if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
3774 (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
3775 .ule(std::abs(NewF.BaseOffset)))
3776 continue;
3777
3778 // OK, looks good.
3779 NewF.canonicalize();
3780 (void)InsertFormula(LU, LUIdx, NewF);
3781 } else {
3782 // Use the immediate in a base register.
3783 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
3784 const SCEV *BaseReg = F.BaseRegs[N];
3785 if (BaseReg != OrigReg)
3786 continue;
3787 Formula NewF = F;
3788 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
3789 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
3790 LU.Kind, LU.AccessTy, NewF)) {
3791 if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
3792 continue;
3793 NewF = F;
3794 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3795 }
3796 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
3797
3798 // If the new formula has a constant in a register, and adding the
3799 // constant value to the immediate would produce a value closer to
3800 // zero than the immediate itself, then the formula isn't worthwhile.
3801 for (const SCEV *NewReg : NewF.BaseRegs)
3802 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
3803 if ((C->getAPInt() + NewF.BaseOffset)
3804 .abs()
3805 .slt(std::abs(NewF.BaseOffset)) &&
3806 (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
3807 countTrailingZeros<uint64_t>(NewF.BaseOffset))
3808 goto skip_formula;
3809
3810 // Ok, looks good.
3811 NewF.canonicalize();
3812 (void)InsertFormula(LU, LUIdx, NewF);
3813 break;
3814 skip_formula:;
3815 }
3816 }
3817 }
3818 }
3819}
3820
3821/// Generate formulae for each use.
3822void
3823LSRInstance::GenerateAllReuseFormulae() {
3824 // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
3825 // queries are more precise.
3826 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3827 LSRUse &LU = Uses[LUIdx];
3828 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3829 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
3830 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3831 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
3832 }
3833 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3834 LSRUse &LU = Uses[LUIdx];
3835 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3836 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
3837 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3838 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
3839 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3840 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
3841 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3842 GenerateScales(LU, LUIdx, LU.Formulae[i]);
3843 }
3844 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3845 LSRUse &LU = Uses[LUIdx];
3846 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3847 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
3848 }
3849
3850 GenerateCrossUseConstantOffsets();
3851
3852 DEBUG(dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (false)
3853 "After generating reuse formulae:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (false)
3854 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (false)
;
3855}
3856
3857/// If there are multiple formulae with the same set of registers used
3858/// by other uses, pick the best one and delete the others.
3859void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
3860 DenseSet<const SCEV *> VisitedRegs;
3861 SmallPtrSet<const SCEV *, 16> Regs;
3862 SmallPtrSet<const SCEV *, 16> LoserRegs;
3863#ifndef NDEBUG
3864 bool ChangedFormulae = false;
3865#endif
3866
3867 // Collect the best formula for each unique set of shared registers. This
3868 // is reset for each use.
3869 typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
3870 BestFormulaeTy;
3871 BestFormulaeTy BestFormulae;
3872
3873 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3874 LSRUse &LU = Uses[LUIdx];
3875 DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Filtering for use "; LU.print
(dbgs()); dbgs() << '\n'; } } while (false)
;
3876
3877 bool Any = false;
3878 for (size_t FIdx = 0, NumForms = LU.Formulae.size();
3879 FIdx != NumForms; ++FIdx) {
3880 Formula &F = LU.Formulae[FIdx];
3881
3882 // Some formulas are instant losers. For example, they may depend on
3883 // nonexistent AddRecs from other loops. These need to be filtered
3884 // immediately, otherwise heuristics could choose them over others leading
3885 // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
3886 // avoids the need to recompute this information across formulae using the
3887 // same bad AddRec. Passing LoserRegs is also essential unless we remove
3888 // the corresponding bad register from the Regs set.
3889 Cost CostF;
3890 Regs.clear();
3891 CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
3892 if (CostF.isLoser()) {
3893 // During initial formula generation, undesirable formulae are generated
3894 // by uses within other loops that have some non-trivial address mode or
3895 // use the postinc form of the IV. LSR needs to provide these formulae
3896 // as the basis of rediscovering the desired formula that uses an AddRec
3897 // corresponding to the existing phi. Once all formulae have been
3898 // generated, these initial losers may be pruned.
3899 DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering loser "; F.print
(dbgs()); dbgs() << "\n"; } } while (false)
3900 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering loser "; F.print
(dbgs()); dbgs() << "\n"; } } while (false)
;
3901 }
3902 else {
3903 SmallVector<const SCEV *, 4> Key;
3904 for (const SCEV *Reg : F.BaseRegs) {
3905 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
3906 Key.push_back(Reg);
3907 }
3908 if (F.ScaledReg &&
3909 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
3910 Key.push_back(F.ScaledReg);
3911 // Unstable sort by host order ok, because this is only used for
3912 // uniquifying.
3913 std::sort(Key.begin(), Key.end());
3914
3915 std::pair<BestFormulaeTy::const_iterator, bool> P =
3916 BestFormulae.insert(std::make_pair(Key, FIdx));
3917 if (P.second)
3918 continue;
3919
3920 Formula &Best = LU.Formulae[P.first->second];
3921
3922 Cost CostBest;
3923 Regs.clear();
3924 CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
3925 if (CostF < CostBest)
3926 std::swap(F, Best);
3927 DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
3928 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
3929 " in favor of formula "; Best.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
3930 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
;
3931 }
3932#ifndef NDEBUG
3933 ChangedFormulae = true;
3934#endif
3935 LU.DeleteFormula(F);
3936 --FIdx;
3937 --NumForms;
3938 Any = true;
3939 }
3940
3941 // Now that we've filtered out some formulae, recompute the Regs set.
3942 if (Any)
3943 LU.RecomputeRegs(LUIdx, RegUses);
3944
3945 // Reset this to prepare for the next use.
3946 BestFormulae.clear();
3947 }
3948
3949 DEBUG(if (ChangedFormulae) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
3950 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
3951 "After filtering out undesirable candidates:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
3952 print_uses(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
3953 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
;
3954}
3955
3956// This is a rough guess that seems to work fairly well.
3957static const size_t ComplexityLimit = UINT16_MAX(65535);
3958
3959/// Estimate the worst-case number of solutions the solver might have to
3960/// consider. It almost never considers this many solutions because it prune the
3961/// search space, but the pruning isn't always sufficient.
3962size_t LSRInstance::EstimateSearchSpaceComplexity() const {
3963 size_t Power = 1;
3964 for (const LSRUse &LU : Uses) {
3965 size_t FSize = LU.Formulae.size();
3966 if (FSize >= ComplexityLimit) {
3967 Power = ComplexityLimit;
3968 break;
3969 }
3970 Power *= FSize;
3971 if (Power >= ComplexityLimit)
3972 break;
3973 }
3974 return Power;
3975}
3976
3977/// When one formula uses a superset of the registers of another formula, it
3978/// won't help reduce register pressure (though it may not necessarily hurt
3979/// register pressure); remove it to simplify the system.
3980void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
3981 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
3982 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (false)
;
3983
3984 DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (false)
3985 "which use a superset of registers used by other "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (false)
3986 "formulae.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (false)
;
3987
3988 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3989 LSRUse &LU = Uses[LUIdx];
3990 bool Any = false;
3991 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
3992 Formula &F = LU.Formulae[i];
3993 // Look for a formula with a constant or GV in a register. If the use
3994 // also has a formula with that same value in an immediate field,
3995 // delete the one that uses a register.
3996 for (SmallVectorImpl<const SCEV *>::const_iterator
3997 I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
3998 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
3999 Formula NewF = F;
4000 NewF.BaseOffset += C->getValue()->getSExtValue();
4001 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4002 (I - F.BaseRegs.begin()));
4003 if (LU.HasFormulaWithSameRegs(NewF)) {
4004 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4005 LU.DeleteFormula(F);
4006 --i;
4007 --e;
4008 Any = true;
4009 break;
4010 }
4011 } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4012 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4013 if (!F.BaseGV) {
4014 Formula NewF = F;
4015 NewF.BaseGV = GV;
4016 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4017 (I - F.BaseRegs.begin()));
4018 if (LU.HasFormulaWithSameRegs(NewF)) {
4019 DEBUG(dbgs() << " Deleting "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
4020 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4021 LU.DeleteFormula(F);
4022 --i;
4023 --e;
4024 Any = true;
4025 break;
4026 }
4027 }
4028 }
4029 }
4030 }
4031 if (Any)
4032 LU.RecomputeRegs(LUIdx, RegUses);
4033 }
4034
4035 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
4036 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4037 }
4038}
4039
4040/// When there are many registers for expressions like A, A+1, A+2, etc.,
4041/// allocate a single register for them.
4042void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4043 if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4044 return;
4045
4046 DEBUG(dbgs() << "The search space is too complex.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(false)
4047 "Narrowing the search space by assuming that uses separated "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(false)
4048 "by a constant offset will use the same registers.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(false)
;
4049
4050 // This is especially useful for unrolled loops.
4051
4052 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4053 LSRUse &LU = Uses[LUIdx];
4054 for (const Formula &F : LU.Formulae) {
4055 if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4056 continue;
4057
4058 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4059 if (!LUThatHas)
4060 continue;
4061
4062 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
4063 LU.Kind, LU.AccessTy))
4064 continue;
4065
4066 DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting use "; LU.print
(dbgs()); dbgs() << '\n'; } } while (false)
;
4067
4068 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4069
4070 // Transfer the fixups of LU to LUThatHas.
4071 for (LSRFixup &Fixup : LU.Fixups) {
4072 Fixup.Offset += F.BaseOffset;
4073 LUThatHas->pushFixup(Fixup);
4074 DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New fixup has offset " <<
Fixup.Offset << '\n'; } } while (false)
;
4075 }
4076
4077 // Delete formulae from the new use which are no longer legal.
4078 bool Any = false;
4079 for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4080 Formula &F = LUThatHas->Formulae[i];
4081 if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4082 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4083 DEBUG(dbgs() << " Deleting "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
4084 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4085 LUThatHas->DeleteFormula(F);
4086 --i;
4087 --e;
4088 Any = true;
4089 }
4090 }
4091
4092 if (Any)
4093 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4094
4095 // Delete the old use.
4096 DeleteUse(LU, LUIdx);
4097 --LUIdx;
4098 --NumUses;
4099 break;
4100 }
4101 }
4102
4103 DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4104}
4105
4106/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4107/// we've done more filtering, as it may be able to find more formulae to
4108/// eliminate.
4109void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4110 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4111 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (false)
;
4112
4113 DEBUG(dbgs() << "Narrowing the search space by re-filtering out "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n"; } } while (false)
4114 "undesirable dedicated registers.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n"; } } while (false)
;
4115
4116 FilterOutUndesirableDedicatedRegisters();
4117
4118 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
4119 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4120 }
4121}
4122
4123/// Pick a register which seems likely to be profitable, and then in any use
4124/// which has any reference to that register, delete all formulae which do not
4125/// reference that register.
4126void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4127 // With all other options exhausted, loop until the system is simple
4128 // enough to handle.
4129 SmallPtrSet<const SCEV *, 4> Taken;
4130 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
1
Assuming the condition is true
2
Loop condition is true. Entering loop body
6
Assuming the condition is true
7
Loop condition is true. Entering loop body
11
Assuming the condition is true
12
Loop condition is true. Entering loop body
16
Assuming the condition is true
17
Loop condition is true. Entering loop body
4131 // Ok, we have too many of formulae on our hands to conveniently handle.
4132 // Use a rough heuristic to thin out the list.
4133 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (false)
;
4134
4135 // Pick the register which is used by the most LSRUses, which is likely
4136 // to be a good reuse register candidate.
4137 const SCEV *Best = nullptr;
18
'Best' initialized to a null pointer value
4138 unsigned BestNum = 0;
4139 for (const SCEV *Reg : RegUses) {
3
Assuming '__begin' is equal to '__end'
8
Assuming '__begin' is equal to '__end'
13
Assuming '__begin' is equal to '__end'
19
Assuming '__begin' is equal to '__end'
4140 if (Taken.count(Reg))
4141 continue;
4142 if (!Best)
4143 Best = Reg;
4144 else {
4145 unsigned Count = RegUses.getUsedByIndices(Reg).count();
4146 if (Count > BestNum) {
4147 Best = Reg;
4148 BestNum = Count;
4149 }
4150 }
4151 }
4152
4153 DEBUG(dbgs() << "Narrowing the search space by assuming " << *Bestdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by assuming "
<< *Best << " will yield profitable reuse.\n"; }
} while (false)
20
Within the expansion of the macro 'DEBUG':
a
Assuming 'DebugFlag' is not equal to 0
b
Assuming the condition is true
c
Forming reference to null pointer
4154 << " will yield profitable reuse.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by assuming "
<< *Best << " will yield profitable reuse.\n"; }
} while (false)
;
4155 Taken.insert(Best);
4156
4157 // In any use with formulae which references this register, delete formulae
4158 // which don't reference it.
4159 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4
Assuming 'LUIdx' is equal to 'NumUses'
5
Loop condition is false. Execution continues on line 4181
9
Assuming 'LUIdx' is equal to 'NumUses'
10
Loop condition is false. Execution continues on line 4181
14
Assuming 'LUIdx' is equal to 'NumUses'
15
Loop condition is false. Execution continues on line 4181
4160 LSRUse &LU = Uses[LUIdx];
4161 if (!LU.Regs.count(Best)) continue;
4162
4163 bool Any = false;
4164 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4165 Formula &F = LU.Formulae[i];
4166 if (!F.referencesReg(Best)) {
4167 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4168 LU.DeleteFormula(F);
4169 --e;
4170 --i;
4171 Any = true;
4172 assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?")((e != 0 && "Use has no formulae left! Is Regs inconsistent?"
) ? static_cast<void> (0) : __assert_fail ("e != 0 && \"Use has no formulae left! Is Regs inconsistent?\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4172, __PRETTY_FUNCTION__))
;
4173 continue;
4174 }
4175 }
4176
4177 if (Any)
4178 LU.RecomputeRegs(LUIdx, RegUses);
4179 }
4180
4181 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
4182 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4183 }
4184}
4185
4186/// If there are an extraordinary number of formulae to choose from, use some
4187/// rough heuristics to prune down the number of formulae. This keeps the main
4188/// solver from taking an extraordinary amount of time in some worst-case
4189/// scenarios.
4190void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4191 NarrowSearchSpaceByDetectingSupersets();
4192 NarrowSearchSpaceByCollapsingUnrolledCode();
4193 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4194 NarrowSearchSpaceByPickingWinnerRegs();
4195}
4196
4197/// This is the recursive solver.
4198void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4199 Cost &SolutionCost,
4200 SmallVectorImpl<const Formula *> &Workspace,
4201 const Cost &CurCost,
4202 const SmallPtrSet<const SCEV *, 16> &CurRegs,
4203 DenseSet<const SCEV *> &VisitedRegs) const {
4204 // Some ideas:
4205 // - prune more:
4206 // - use more aggressive filtering
4207 // - sort the formula so that the most profitable solutions are found first
4208 // - sort the uses too
4209 // - search faster:
4210 // - don't compute a cost, and then compare. compare while computing a cost
4211 // and bail early.
4212 // - track register sets with SmallBitVector
4213
4214 const LSRUse &LU = Uses[Workspace.size()];
4215
4216 // If this use references any register that's already a part of the
4217 // in-progress solution, consider it a requirement that a formula must
4218 // reference that register in order to be considered. This prunes out
4219 // unprofitable searching.
4220 SmallSetVector<const SCEV *, 4> ReqRegs;
4221 for (const SCEV *S : CurRegs)
4222 if (LU.Regs.count(S))
4223 ReqRegs.insert(S);
4224
4225 SmallPtrSet<const SCEV *, 16> NewRegs;
4226 Cost NewCost;
4227 for (const Formula &F : LU.Formulae) {
4228 // Ignore formulae which may not be ideal in terms of register reuse of
4229 // ReqRegs. The formula should use all required registers before
4230 // introducing new ones.
4231 int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
4232 for (const SCEV *Reg : ReqRegs) {
4233 if ((F.ScaledReg && F.ScaledReg == Reg) ||
4234 is_contained(F.BaseRegs, Reg)) {
4235 --NumReqRegsToFind;
4236 if (NumReqRegsToFind == 0)
4237 break;
4238 }
4239 }
4240 if (NumReqRegsToFind != 0) {
4241 // If none of the formulae satisfied the required registers, then we could
4242 // clear ReqRegs and try again. Currently, we simply give up in this case.
4243 continue;
4244 }
4245
4246 // Evaluate the cost of the current formula. If it's already worse than
4247 // the current best, prune the search at that point.
4248 NewCost = CurCost;
4249 NewRegs = CurRegs;
4250 NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
4251 if (NewCost < SolutionCost) {
4252 Workspace.push_back(&F);
4253 if (Workspace.size() != Uses.size()) {
4254 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4255 NewRegs, VisitedRegs);
4256 if (F.getNumRegs() == 1 && Workspace.size() == 1)
4257 VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4258 } else {
4259 DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4260 dbgs() << ".\n Regs:";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4261 for (const SCEV *S : NewRegs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4262 dbgs() << ' ' << *S;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4263 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
;
4264
4265 SolutionCost = NewCost;
4266 Solution = Workspace;
4267 }
4268 Workspace.pop_back();
4269 }
4270 }
4271}
4272
4273/// Choose one formula from each use. Return the results in the given Solution
4274/// vector.
4275void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4276 SmallVector<const Formula *, 8> Workspace;
4277 Cost SolutionCost;
4278 SolutionCost.Lose();
4279 Cost CurCost;
4280 SmallPtrSet<const SCEV *, 16> CurRegs;
4281 DenseSet<const SCEV *> VisitedRegs;
4282 Workspace.reserve(Uses.size());
4283
4284 // SolveRecurse does all the work.
4285 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4286 CurRegs, VisitedRegs);
4287 if (Solution.empty()) {
4288 DEBUG(dbgs() << "\nNo Satisfactory Solution\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nNo Satisfactory Solution\n"
; } } while (false)
;
4289 return;
4290 }
4291
4292 // Ok, we've now made all our decisions.
4293 DEBUG(dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4294 "The chosen solution requires "; SolutionCost.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4295 dbgs() << ":\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4296 for (size_t i = 0, e = Uses.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4297 dbgs() << " ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4298 Uses[i].print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4299 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4300 " ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4301 Solution[i]->print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4302 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4303 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
;
4304
4305 assert(Solution.size() == Uses.size() && "Malformed solution!")((Solution.size() == Uses.size() && "Malformed solution!"
) ? static_cast<void> (0) : __assert_fail ("Solution.size() == Uses.size() && \"Malformed solution!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4305, __PRETTY_FUNCTION__))
;
4306}
4307
4308/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
4309/// we can go while still being dominated by the input positions. This helps
4310/// canonicalize the insert position, which encourages sharing.
4311BasicBlock::iterator
4312LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4313 const SmallVectorImpl<Instruction *> &Inputs)
4314 const {
4315 Instruction *Tentative = &*IP;
4316 for (;;) {
4317 bool AllDominate = true;
4318 Instruction *BetterPos = nullptr;
4319 // Don't bother attempting to insert before a catchswitch, their basic block
4320 // cannot have other non-PHI instructions.
4321 if (isa<CatchSwitchInst>(Tentative))
4322 return IP;
4323
4324 for (Instruction *Inst : Inputs) {
4325 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
4326 AllDominate = false;
4327 break;
4328 }
4329 // Attempt to find an insert position in the middle of the block,
4330 // instead of at the end, so that it can be used for other expansions.
4331 if (Tentative->getParent() == Inst->getParent() &&
4332 (!BetterPos || !DT.dominates(Inst, BetterPos)))
4333 BetterPos = &*std::next(BasicBlock::iterator(Inst));
4334 }
4335 if (!AllDominate)
4336 break;
4337 if (BetterPos)
4338 IP = BetterPos->getIterator();
4339 else
4340 IP = Tentative->getIterator();
4341
4342 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4343 unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4344
4345 BasicBlock *IDom;
4346 for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
4347 if (!Rung) return IP;
4348 Rung = Rung->getIDom();
4349 if (!Rung) return IP;
4350 IDom = Rung->getBlock();
4351
4352 // Don't climb into a loop though.
4353 const Loop *IDomLoop = LI.getLoopFor(IDom);
4354 unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4355 if (IDomDepth <= IPLoopDepth &&
4356 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4357 break;
4358 }
4359
4360 Tentative = IDom->getTerminator();
4361 }
4362
4363 return IP;
4364}
4365
4366/// Determine an input position which will be dominated by the operands and
4367/// which will dominate the result.
4368BasicBlock::iterator
4369LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
4370 const LSRFixup &LF,
4371 const LSRUse &LU,
4372 SCEVExpander &Rewriter) const {
4373 // Collect some instructions which must be dominated by the
4374 // expanding replacement. These must be dominated by any operands that
4375 // will be required in the expansion.
4376 SmallVector<Instruction *, 4> Inputs;
4377 if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4378 Inputs.push_back(I);
4379 if (LU.Kind == LSRUse::ICmpZero)
4380 if (Instruction *I =
4381 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4382 Inputs.push_back(I);
4383 if (LF.PostIncLoops.count(L)) {
4384 if (LF.isUseFullyOutsideLoop(L))
4385 Inputs.push_back(L->getLoopLatch()->getTerminator());
4386 else
4387 Inputs.push_back(IVIncInsertPos);
4388 }
4389 // The expansion must also be dominated by the increment positions of any
4390 // loops it for which it is using post-inc mode.
4391 for (const Loop *PIL : LF.PostIncLoops) {
4392 if (PIL == L) continue;
4393
4394 // Be dominated by the loop exit.
4395 SmallVector<BasicBlock *, 4> ExitingBlocks;
4396 PIL->getExitingBlocks(ExitingBlocks);
4397 if (!ExitingBlocks.empty()) {
4398 BasicBlock *BB = ExitingBlocks[0];
4399 for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
4400 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
4401 Inputs.push_back(BB->getTerminator());
4402 }
4403 }
4404
4405 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()((!isa<PHINode>(LowestIP) && !LowestIP->isEHPad
() && !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction") ? static_cast
<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4407, __PRETTY_FUNCTION__))
4406 && !isa<DbgInfoIntrinsic>(LowestIP) &&((!isa<PHINode>(LowestIP) && !LowestIP->isEHPad
() && !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction") ? static_cast
<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4407, __PRETTY_FUNCTION__))
4407 "Insertion point must be a normal instruction")((!isa<PHINode>(LowestIP) && !LowestIP->isEHPad
() && !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction") ? static_cast
<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4407, __PRETTY_FUNCTION__))
;
4408
4409 // Then, climb up the immediate dominator tree as far as we can go while
4410 // still being dominated by the input positions.
4411 BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
4412
4413 // Don't insert instructions before PHI nodes.
4414 while (isa<PHINode>(IP)) ++IP;
4415
4416 // Ignore landingpad instructions.
4417 while (IP->isEHPad()) ++IP;
4418
4419 // Ignore debug intrinsics.
4420 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
4421
4422 // Set IP below instructions recently inserted by SCEVExpander. This keeps the
4423 // IP consistent across expansions and allows the previously inserted
4424 // instructions to be reused by subsequent expansion.
4425 while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
4426 ++IP;
4427
4428 return IP;
4429}
4430
4431/// Emit instructions for the leading candidate expression for this LSRUse (this
4432/// is called "expanding").
4433Value *LSRInstance::Expand(const LSRUse &LU,
4434 const LSRFixup &LF,
4435 const Formula &F,
4436 BasicBlock::iterator IP,
4437 SCEVExpander &Rewriter,
4438 SmallVectorImpl<WeakVH> &DeadInsts) const {
4439 if (LU.RigidFormula)
4440 return LF.OperandValToReplace;
4441
4442 // Determine an input position which will be dominated by the operands and
4443 // which will dominate the result.
4444 IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
4445 Rewriter.setInsertPoint(&*IP);
4446
4447 // Inform the Rewriter if we have a post-increment use, so that it can
4448 // perform an advantageous expansion.
4449 Rewriter.setPostInc(LF.PostIncLoops);
4450
4451 // This is the type that the user actually needs.
4452 Type *OpTy = LF.OperandValToReplace->getType();
4453 // This will be the type that we'll initially expand to.
4454 Type *Ty = F.getType();
4455 if (!Ty)
4456 // No type known; just expand directly to the ultimate type.
4457 Ty = OpTy;
4458 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
4459 // Expand directly to the ultimate type if it's the right size.
4460 Ty = OpTy;
4461 // This is the type to do integer arithmetic in.
4462 Type *IntTy = SE.getEffectiveSCEVType(Ty);
4463
4464 // Build up a list of operands to add together to form the full base.
4465 SmallVector<const SCEV *, 8> Ops;
4466
4467 // Expand the BaseRegs portion.
4468 for (const SCEV *Reg : F.BaseRegs) {
4469 assert(!Reg->isZero() && "Zero allocated in a base register!")((!Reg->isZero() && "Zero allocated in a base register!"
) ? static_cast<void> (0) : __assert_fail ("!Reg->isZero() && \"Zero allocated in a base register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4469, __PRETTY_FUNCTION__))
;
4470
4471 // If we're expanding for a post-inc user, make the post-inc adjustment.
4472 PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4473 Reg = TransformForPostIncUse(Denormalize, Reg,
4474 LF.UserInst, LF.OperandValToReplace,
4475 Loops, SE, DT);
4476
4477 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
4478 }
4479
4480 // Expand the ScaledReg portion.
4481 Value *ICmpScaledV = nullptr;
4482 if (F.Scale != 0) {
4483 const SCEV *ScaledS = F.ScaledReg;
4484
4485 // If we're expanding for a post-inc user, make the post-inc adjustment.
4486 PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4487 ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
4488 LF.UserInst, LF.OperandValToReplace,
4489 Loops, SE, DT);
4490
4491 if (LU.Kind == LSRUse::ICmpZero) {
4492 // Expand ScaleReg as if it was part of the base regs.
4493 if (F.Scale == 1)
4494 Ops.push_back(
4495 SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
4496 else {
4497 // An interesting way of "folding" with an icmp is to use a negated
4498 // scale, which we'll implement by inserting it into the other operand
4499 // of the icmp.
4500 assert(F.Scale == -1 &&((F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"
) ? static_cast<void> (0) : __assert_fail ("F.Scale == -1 && \"The only scale supported by ICmpZero uses is -1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4501, __PRETTY_FUNCTION__))
4501 "The only scale supported by ICmpZero uses is -1!")((F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"
) ? static_cast<void> (0) : __assert_fail ("F.Scale == -1 && \"The only scale supported by ICmpZero uses is -1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4501, __PRETTY_FUNCTION__))
;
4502 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
4503 }
4504 } else {
4505 // Otherwise just expand the scaled register and an explicit scale,
4506 // which is expected to be matched as part of the address.
4507
4508 // Flush the operand list to suppress SCEVExpander hoisting address modes.
4509 // Unless the addressing mode will not be folded.
4510 if (!Ops.empty() && LU.Kind == LSRUse::Address &&
4511 isAMCompletelyFolded(TTI, LU, F)) {
4512 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
4513 Ops.clear();
4514 Ops.push_back(SE.getUnknown(FullV));
4515 }
4516 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
4517 if (F.Scale != 1)
4518 ScaledS =
4519 SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
4520 Ops.push_back(ScaledS);
4521 }
4522 }
4523
4524 // Expand the GV portion.
4525 if (F.BaseGV) {
4526 // Flush the operand list to suppress SCEVExpander hoisting.
4527 if (!Ops.empty()) {
4528 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
4529 Ops.clear();
4530 Ops.push_back(SE.getUnknown(FullV));
4531 }
4532 Ops.push_back(SE.getUnknown(F.BaseGV));
4533 }
4534
4535 // Flush the operand list to suppress SCEVExpander hoisting of both folded and
4536 // unfolded offsets. LSR assumes they both live next to their uses.
4537 if (!Ops.empty()) {
4538 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
4539 Ops.clear();
4540 Ops.push_back(SE.getUnknown(FullV));
4541 }
4542
4543 // Expand the immediate portion.
4544 int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
4545 if (Offset != 0) {
4546 if (LU.Kind == LSRUse::ICmpZero) {
4547 // The other interesting way of "folding" with an ICmpZero is to use a
4548 // negated immediate.
4549 if (!ICmpScaledV)
4550 ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
4551 else {
4552 Ops.push_back(SE.getUnknown(ICmpScaledV));
4553 ICmpScaledV = ConstantInt::get(IntTy, Offset);
4554 }
4555 } else {
4556 // Just add the immediate values. These again are expected to be matched
4557 // as part of the address.
4558 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
4559 }
4560 }
4561
4562 // Expand the unfolded offset portion.
4563 int64_t UnfoldedOffset = F.UnfoldedOffset;
4564 if (UnfoldedOffset != 0) {
4565 // Just add the immediate values.
4566 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
4567 UnfoldedOffset)));
4568 }
4569
4570 // Emit instructions summing all the operands.
4571 const SCEV *FullS = Ops.empty() ?
4572 SE.getConstant(IntTy, 0) :
4573 SE.getAddExpr(Ops);
4574 Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
4575
4576 // We're done expanding now, so reset the rewriter.
4577 Rewriter.clearPostInc();
4578
4579 // An ICmpZero Formula represents an ICmp which we're handling as a
4580 // comparison against zero. Now that we've expanded an expression for that
4581 // form, update the ICmp's other operand.
4582 if (LU.Kind == LSRUse::ICmpZero) {
4583 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4584 DeadInsts.emplace_back(CI->getOperand(1));
4585 assert(!F.BaseGV && "ICmp does not support folding a global value and "((!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("!F.BaseGV && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4586, __PRETTY_FUNCTION__))
4586 "a scale at the same time!")((!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("!F.BaseGV && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4586, __PRETTY_FUNCTION__))
;
4587 if (F.Scale == -1) {
4588 if (ICmpScaledV->getType() != OpTy) {
4589 Instruction *Cast =
4590 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
4591 OpTy, false),
4592 ICmpScaledV, OpTy, "tmp", CI);
4593 ICmpScaledV = Cast;
4594 }
4595 CI->setOperand(1, ICmpScaledV);
4596 } else {
4597 // A scale of 1 means that the scale has been expanded as part of the
4598 // base regs.
4599 assert((F.Scale == 0 || F.Scale == 1) &&(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4601, __PRETTY_FUNCTION__))
4600 "ICmp does not support folding a global value and "(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4601, __PRETTY_FUNCTION__))
4601 "a scale at the same time!")(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4601, __PRETTY_FUNCTION__))
;
4602 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
4603 -(uint64_t)Offset);
4604 if (C->getType() != OpTy)
4605 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
4606 OpTy, false),
4607 C, OpTy);
4608
4609 CI->setOperand(1, C);
4610 }
4611 }
4612
4613 return FullV;
4614}
4615
4616/// Helper for Rewrite. PHI nodes are special because the use of their operands
4617/// effectively happens in their predecessor blocks, so the expression may need
4618/// to be expanded in multiple places.
4619void LSRInstance::RewriteForPHI(PHINode *PN,
4620 const LSRUse &LU,
4621 const LSRFixup &LF,
4622 const Formula &F,
4623 SCEVExpander &Rewriter,
4624 SmallVectorImpl<WeakVH> &DeadInsts) const {
4625 DenseMap<BasicBlock *, Value *> Inserted;
4626 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
4627 if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
4628 BasicBlock *BB = PN->getIncomingBlock(i);
4629
4630 // If this is a critical edge, split the edge so that we do not insert
4631 // the code on all predecessor/successor paths. We do this unless this
4632 // is the canonical backedge for this loop, which complicates post-inc
4633 // users.
4634 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
4635 !isa<IndirectBrInst>(BB->getTerminator())) {
4636 BasicBlock *Parent = PN->getParent();
4637 Loop *PNLoop = LI.getLoopFor(Parent);
4638 if (!PNLoop || Parent != PNLoop->getHeader()) {
4639 // Split the critical edge.
4640 BasicBlock *NewBB = nullptr;
4641 if (!Parent->isLandingPad()) {
4642 NewBB = SplitCriticalEdge(BB, Parent,
4643 CriticalEdgeSplittingOptions(&DT, &LI)
4644 .setMergeIdenticalEdges()
4645 .setDontDeleteUselessPHIs());
4646 } else {
4647 SmallVector<BasicBlock*, 2> NewBBs;
4648 SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
4649 NewBB = NewBBs[0];
4650 }
4651 // If NewBB==NULL, then SplitCriticalEdge refused to split because all
4652 // phi predecessors are identical. The simple thing to do is skip
4653 // splitting in this case rather than complicate the API.
4654 if (NewBB) {
4655 // If PN is outside of the loop and BB is in the loop, we want to
4656 // move the block to be immediately before the PHI block, not
4657 // immediately after BB.
4658 if (L->contains(BB) && !L->contains(PN))
4659 NewBB->moveBefore(PN->getParent());
4660
4661 // Splitting the edge can reduce the number of PHI entries we have.
4662 e = PN->getNumIncomingValues();
4663 BB = NewBB;
4664 i = PN->getBasicBlockIndex(BB);
4665 }
4666 }
4667 }
4668
4669 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
4670 Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
4671 if (!Pair.second)
4672 PN->setIncomingValue(i, Pair.first->second);
4673 else {
4674 Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(),
4675 Rewriter, DeadInsts);
4676
4677 // If this is reuse-by-noop-cast, insert the noop cast.
4678 Type *OpTy = LF.OperandValToReplace->getType();
4679 if (FullV->getType() != OpTy)
4680 FullV =
4681 CastInst::Create(CastInst::getCastOpcode(FullV, false,
4682 OpTy, false),
4683 FullV, LF.OperandValToReplace->getType(),
4684 "tmp", BB->getTerminator());
4685
4686 PN->setIncomingValue(i, FullV);
4687 Pair.first->second = FullV;
4688 }
4689 }
4690}
4691
4692/// Emit instructions for the leading candidate expression for this LSRUse (this
4693/// is called "expanding"), and update the UserInst to reference the newly
4694/// expanded value.
4695void LSRInstance::Rewrite(const LSRUse &LU,
4696 const LSRFixup &LF,
4697 const Formula &F,
4698 SCEVExpander &Rewriter,
4699 SmallVectorImpl<WeakVH> &DeadInsts) const {
4700 // First, find an insertion point that dominates UserInst. For PHI nodes,
4701 // find the nearest block which dominates all the relevant uses.
4702 if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
4703 RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
4704 } else {
4705 Value *FullV =
4706 Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
4707
4708 // If this is reuse-by-noop-cast, insert the noop cast.
4709 Type *OpTy = LF.OperandValToReplace->getType();
4710 if (FullV->getType() != OpTy) {
4711 Instruction *Cast =
4712 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
4713 FullV, OpTy, "tmp", LF.UserInst);
4714 FullV = Cast;
4715 }
4716
4717 // Update the user. ICmpZero is handled specially here (for now) because
4718 // Expand may have updated one of the operands of the icmp already, and
4719 // its new value may happen to be equal to LF.OperandValToReplace, in
4720 // which case doing replaceUsesOfWith leads to replacing both operands
4721 // with the same value. TODO: Reorganize this.
4722 if (LU.Kind == LSRUse::ICmpZero)
4723 LF.UserInst->setOperand(0, FullV);
4724 else
4725 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
4726 }
4727
4728 DeadInsts.emplace_back(LF.OperandValToReplace);
4729}
4730
4731/// Rewrite all the fixup locations with new values, following the chosen
4732/// solution.
4733void LSRInstance::ImplementSolution(
4734 const SmallVectorImpl<const Formula *> &Solution) {
4735 // Keep track of instructions we may have made dead, so that
4736 // we can remove them after we are done working.
4737 SmallVector<WeakVH, 16> DeadInsts;
4738
4739 SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
4740 "lsr");
4741#ifndef NDEBUG
4742 Rewriter.setDebugType(DEBUG_TYPE"loop-reduce");
4743#endif
4744 Rewriter.disableCanonicalMode();
4745 Rewriter.enableLSRMode();
4746 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
4747
4748 // Mark phi nodes that terminate chains so the expander tries to reuse them.
4749 for (const IVChain &Chain : IVChainVec) {
4750 if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
4751 Rewriter.setChainedPhi(PN);
4752 }
4753
4754 // Expand the new value definitions and update the users.
4755 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
4756 for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
4757 Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
4758 Changed = true;
4759 }
4760
4761 for (const IVChain &Chain : IVChainVec) {
4762 GenerateIVChain(Chain, Rewriter, DeadInsts);
4763 Changed = true;
4764 }
4765 // Clean up after ourselves. This must be done before deleting any
4766 // instructions.
4767 Rewriter.clear();
4768
4769 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
4770}
4771
4772LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
4773 DominatorTree &DT, LoopInfo &LI,
4774 const TargetTransformInfo &TTI)
4775 : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
4776 IVIncInsertPos(nullptr) {
4777 // If LoopSimplify form is not available, stay out of trouble.
4778 if (!L->isLoopSimplifyForm())
4779 return;
4780
4781 // If there's no interesting work to be done, bail early.
4782 if (IU.empty()) return;
4783
4784 // If there's too much analysis to be done, bail early. We won't be able to
4785 // model the problem anyway.
4786 unsigned NumUsers = 0;
4787 for (const IVStrideUse &U : IU) {
4788 if (++NumUsers > MaxIVUsers) {
4789 (void)U;
4790 DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR skipping loop, too many IV Users in "
<< U << "\n"; } } while (false)
;
4791 return;
4792 }
4793 // Bail out if we have a PHI on an EHPad that gets a value from a
4794 // CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
4795 // no good place to stick any instructions.
4796 if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
4797 auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
4798 if (isa<FuncletPadInst>(FirstNonPHI) ||
4799 isa<CatchSwitchInst>(FirstNonPHI))
4800 for (BasicBlock *PredBB : PN->blocks())
4801 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
4802 return;
4803 }
4804 }
4805
4806#ifndef NDEBUG
4807 // All dominating loops must have preheaders, or SCEVExpander may not be able
4808 // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
4809 //
4810 // IVUsers analysis should only create users that are dominated by simple loop
4811 // headers. Since this loop should dominate all of its users, its user list
4812 // should be empty if this loop itself is not within a simple loop nest.
4813 for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
4814 Rung; Rung = Rung->getIDom()) {
4815 BasicBlock *BB = Rung->getBlock();
4816 const Loop *DomLoop = LI.getLoopFor(BB);
4817 if (DomLoop && DomLoop->getHeader() == BB) {
4818 assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest")((DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest"
) ? static_cast<void> (0) : __assert_fail ("DomLoop->getLoopPreheader() && \"LSR needs a simplified loop nest\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4818, __PRETTY_FUNCTION__))
;
4819 }
4820 }
4821#endif // DEBUG
4822
4823 DEBUG(dbgs() << "\nLSR on loop ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (false)
4824 L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (false)
4825 dbgs() << ":\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (false)
;
4826
4827 // First, perform some low-level loop optimizations.
4828 OptimizeShadowIV();
4829 OptimizeLoopTermCond();
4830
4831 // If loop preparation eliminates all interesting IV users, bail.
4832 if (IU.empty()) return;
4833
4834 // Skip nested loops until we can model them better with formulae.
4835 if (!L->empty()) {
4836 DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR skipping outer loop "
<< *L << "\n"; } } while (false)
;
4837 return;
4838 }
4839
4840 // Start collecting data and preparing for the solver.
4841 CollectChains();
4842 CollectInterestingTypesAndFactors();
4843 CollectFixupsAndInitialFormulae();
4844 CollectLoopInvariantFixupsAndFormulae();
4845
4846 assert(!Uses.empty() && "IVUsers reported at least one use")((!Uses.empty() && "IVUsers reported at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Uses.empty() && \"IVUsers reported at least one use\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4846, __PRETTY_FUNCTION__))
;
4847 DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR found " << Uses
.size() << " uses:\n"; print_uses(dbgs()); } } while (false
)
4848 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR found " << Uses
.size() << " uses:\n"; print_uses(dbgs()); } } while (false
)
;
4849
4850 // Now use the reuse data to generate a bunch of interesting ways
4851 // to formulate the values needed for the uses.
4852 GenerateAllReuseFormulae();
4853
4854 FilterOutUndesirableDedicatedRegisters();
4855 NarrowSearchSpaceUsingHeuristics();
4856
4857 SmallVector<const Formula *, 8> Solution;
4858 Solve(Solution);
4859
4860 // Release memory that is no longer needed.
4861 Factors.clear();
4862 Types.clear();
4863 RegUses.clear();
4864
4865 if (Solution.empty())
4866 return;
4867
4868#ifndef NDEBUG
4869 // Formulae should be legal.
4870 for (const LSRUse &LU : Uses) {
4871 for (const Formula &F : LU.Formulae)
4872 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Illegal formula generated!") ? static_cast<
void> (0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Illegal formula generated!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4873, __PRETTY_FUNCTION__))
4873 F) && "Illegal formula generated!")((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Illegal formula generated!") ? static_cast<
void> (0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Illegal formula generated!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4873, __PRETTY_FUNCTION__))
;
4874 };
4875#endif
4876
4877 // Now that we've decided what we want, make it so.
4878 ImplementSolution(Solution);
4879}
4880
4881void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
4882 if (Factors.empty() && Types.empty()) return;
4883
4884 OS << "LSR has identified the following interesting factors and types: ";
4885 bool First = true;
4886
4887 for (int64_t Factor : Factors) {
4888 if (!First) OS << ", ";
4889 First = false;
4890 OS << '*' << Factor;
4891 }
4892
4893 for (Type *Ty : Types) {
4894 if (!First) OS << ", ";
4895 First = false;
4896 OS << '(' << *Ty << ')';
4897 }
4898 OS << '\n';
4899}
4900
4901void LSRInstance::print_fixups(raw_ostream &OS) const {
4902 OS << "LSR is examining the following fixup sites:\n";
4903 for (const LSRUse &LU : Uses)
4904 for (const LSRFixup &LF : LU.Fixups) {
4905 dbgs() << " ";
4906 LF.print(OS);
4907 OS << '\n';
4908 }
4909}
4910
4911void LSRInstance::print_uses(raw_ostream &OS) const {
4912 OS << "LSR is examining the following uses:\n";
4913 for (const LSRUse &LU : Uses) {
4914 dbgs() << " ";
4915 LU.print(OS);
4916 OS << '\n';
4917 for (const Formula &F : LU.Formulae) {
4918 OS << " ";
4919 F.print(OS);
4920 OS << '\n';
4921 }
4922 }
4923}
4924
4925void LSRInstance::print(raw_ostream &OS) const {
4926 print_factors_and_types(OS);
4927 print_fixups(OS);
4928 print_uses(OS);
4929}
4930
4931LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__))
4932void LSRInstance::dump() const {
4933 print(errs()); errs() << '\n';
4934}
4935
4936namespace {
4937
4938class LoopStrengthReduce : public LoopPass {
4939public:
4940 static char ID; // Pass ID, replacement for typeid
4941 LoopStrengthReduce();
4942
4943private:
4944 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
4945 void getAnalysisUsage(AnalysisUsage &AU) const override;
4946};
4947}
4948
4949char LoopStrengthReduce::ID = 0;
4950INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",static void *initializeLoopStrengthReducePassOnce(PassRegistry
&Registry) {
4951 "Loop Strength Reduction", false, false)static void *initializeLoopStrengthReducePassOnce(PassRegistry
&Registry) {
4952INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
4953INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
4954INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
4955INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)initializeIVUsersWrapperPassPass(Registry);
4956INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
4957INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
4958INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",PassInfo *PI = new PassInfo( "Loop Strength Reduction", "loop-reduce"
, &LoopStrengthReduce::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LoopStrengthReduce>), false, false); Registry.registerPass
(*PI, true); return PI; } static once_flag InitializeLoopStrengthReducePassFlag
; void llvm::initializeLoopStrengthReducePass(PassRegistry &
Registry) { llvm::call_once(InitializeLoopStrengthReducePassFlag
, initializeLoopStrengthReducePassOnce, std::ref(Registry)); }
4959 "Loop Strength Reduction", false, false)PassInfo *PI = new PassInfo( "Loop Strength Reduction", "loop-reduce"
, &LoopStrengthReduce::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LoopStrengthReduce>), false, false); Registry.registerPass
(*PI, true); return PI; } static once_flag InitializeLoopStrengthReducePassFlag
; void llvm::initializeLoopStrengthReducePass(PassRegistry &
Registry) { llvm::call_once(InitializeLoopStrengthReducePassFlag
, initializeLoopStrengthReducePassOnce, std::ref(Registry)); }
4960
4961Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }
4962
4963LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
4964 initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
4965}
4966
4967void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
4968 // We split critical edges, so we change the CFG. However, we do update
4969 // many analyses if they are around.
4970 AU.addPreservedID(LoopSimplifyID);
4971
4972 AU.addRequired<LoopInfoWrapperPass>();
4973 AU.addPreserved<LoopInfoWrapperPass>();
4974 AU.addRequiredID(LoopSimplifyID);
4975 AU.addRequired<DominatorTreeWrapperPass>();
4976 AU.addPreserved<DominatorTreeWrapperPass>();
4977 AU.addRequired<ScalarEvolutionWrapperPass>();
4978 AU.addPreserved<ScalarEvolutionWrapperPass>();
4979 // Requiring LoopSimplify a second time here prevents IVUsers from running
4980 // twice, since LoopSimplify was invalidated by running ScalarEvolution.
4981 AU.addRequiredID(LoopSimplifyID);
4982 AU.addRequired<IVUsersWrapperPass>();
4983 AU.addPreserved<IVUsersWrapperPass>();
4984 AU.addRequired<TargetTransformInfoWrapperPass>();
4985}
4986
4987static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
4988 DominatorTree &DT, LoopInfo &LI,
4989 const TargetTransformInfo &TTI) {
4990 bool Changed = false;
4991
4992 // Run the main LSR transformation.
4993 Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
4994
4995 // Remove any extra phis created by processing inner loops.
4996 Changed |= DeleteDeadPHIs(L->getHeader());
4997 if (EnablePhiElim && L->isLoopSimplifyForm()) {
4998 SmallVector<WeakVH, 16> DeadInsts;
4999 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
5000 SCEVExpander Rewriter(SE, DL, "lsr");
5001#ifndef NDEBUG
5002 Rewriter.setDebugType(DEBUG_TYPE"loop-reduce");
5003#endif
5004 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
5005 if (numFolded) {
5006 Changed = true;
5007 DeleteTriviallyDeadInstructions(DeadInsts);
5008 DeleteDeadPHIs(L->getHeader());
5009 }
5010 }
5011 return Changed;
5012}
5013
5014bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
5015 if (skipLoop(L))
5016 return false;
5017
5018 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
5019 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5020 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5021 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5022 const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5023 *L->getHeader()->getParent());
5024 return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
5025}
5026
5027PreservedAnalyses LoopStrengthReducePass::run(Loop &L,
5028 LoopAnalysisManager &AM) {
5029 const auto &FAM =
5030 AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
5031 Function *F = L.getHeader()->getParent();
5032
5033 auto &IU = AM.getResult<IVUsersAnalysis>(L);
5034 auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
5035 auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
5036 auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
5037 auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
5038 assert((SE && DT && LI && TTI) &&(((SE && DT && LI && TTI) && "Analyses for Loop Strength Reduce not available"
) ? static_cast<void> (0) : __assert_fail ("(SE && DT && LI && TTI) && \"Analyses for Loop Strength Reduce not available\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5039, __PRETTY_FUNCTION__))
5039 "Analyses for Loop Strength Reduce not available")(((SE && DT && LI && TTI) && "Analyses for Loop Strength Reduce not available"
) ? static_cast<void> (0) : __assert_fail ("(SE && DT && LI && TTI) && \"Analyses for Loop Strength Reduce not available\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn285991/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5039, __PRETTY_FUNCTION__))
;
5040
5041 if (!ReduceLoopStrength(&L, IU, *SE, *DT, *LI, *TTI))
5042 return PreservedAnalyses::all();
5043
5044 return getLoopPassPreservedAnalyses();
5045}