Bug Summary

File:lib/Transforms/Scalar/LoopStrengthReduce.cpp
Location:line 4231, column 5
Description:Forming reference to null pointer

Annotated Source Code

1//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
14// This pass performs a strength reduction on array references inside loops that
15// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
19//
20// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated and filtered.
37//
38// TODO: Handle multiple loops at a time.
39//
40// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41// of a GlobalValue?
42//
43// TODO: When truncation is free, truncate ICmp users' operands to make it a
44// smaller encoding (on x86 at least).
45//
46// TODO: When a negated register is used by an add (such as in a list of
47// multiple base registers, or as the increment expression in an addrec),
48// we may not actually need both reg and (-1 * reg) in registers; the
49// negation can be implemented by using a sub instead of an add. The
50// lack of support for taking this into consideration when making
51// register pressure decisions is partly worked around by the "Special"
52// use kind.
53//
54//===----------------------------------------------------------------------===//
55
56#include "llvm/Transforms/Scalar.h"
57#include "llvm/ADT/DenseSet.h"
58#include "llvm/ADT/Hashing.h"
59#include "llvm/ADT/STLExtras.h"
60#include "llvm/ADT/SetVector.h"
61#include "llvm/ADT/SmallBitVector.h"
62#include "llvm/Analysis/IVUsers.h"
63#include "llvm/Analysis/LoopPass.h"
64#include "llvm/Analysis/ScalarEvolutionExpander.h"
65#include "llvm/Analysis/TargetTransformInfo.h"
66#include "llvm/IR/Constants.h"
67#include "llvm/IR/DerivedTypes.h"
68#include "llvm/IR/Dominators.h"
69#include "llvm/IR/Instructions.h"
70#include "llvm/IR/IntrinsicInst.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/ValueHandle.h"
73#include "llvm/Support/CommandLine.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/raw_ostream.h"
76#include "llvm/Transforms/Utils/BasicBlockUtils.h"
77#include "llvm/Transforms/Utils/Local.h"
78#include <algorithm>
79using namespace llvm;
80
81#define DEBUG_TYPE"loop-reduce" "loop-reduce"
82
83/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
84/// bail out. This threshold is far beyond the number of users that LSR can
85/// conceivably solve, so it should not affect generated code, but catches the
86/// worst cases before LSR burns too much compile time and stack space.
87static const unsigned MaxIVUsers = 200;
88
89// Temporary flag to cleanup congruent phis after LSR phi expansion.
90// It's currently disabled until we can determine whether it's truly useful or
91// not. The flag should be removed after the v3.0 release.
92// This is now needed for ivchains.
93static cl::opt<bool> EnablePhiElim(
94 "enable-lsr-phielim", cl::Hidden, cl::init(true),
95 cl::desc("Enable LSR phi elimination"));
96
97#ifndef NDEBUG
98// Stress test IV chain generation.
99static cl::opt<bool> StressIVChain(
100 "stress-ivchain", cl::Hidden, cl::init(false),
101 cl::desc("Stress test LSR IV chains"));
102#else
103static bool StressIVChain = false;
104#endif
105
106namespace {
107
108/// RegSortData - This class holds data which is used to order reuse candidates.
109class RegSortData {
110public:
111 /// UsedByIndices - This represents the set of LSRUse indices which reference
112 /// a particular register.
113 SmallBitVector UsedByIndices;
114
115 void print(raw_ostream &OS) const;
116 void dump() const;
117};
118
119}
120
121void RegSortData::print(raw_ostream &OS) const {
122 OS << "[NumUses=" << UsedByIndices.count() << ']';
123}
124
125#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
126void RegSortData::dump() const {
127 print(errs()); errs() << '\n';
128}
129#endif
130
131namespace {
132
133/// RegUseTracker - Map register candidates to information about how they are
134/// used.
135class RegUseTracker {
136 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
137
138 RegUsesTy RegUsesMap;
139 SmallVector<const SCEV *, 16> RegSequence;
140
141public:
142 void CountRegister(const SCEV *Reg, size_t LUIdx);
143 void DropRegister(const SCEV *Reg, size_t LUIdx);
144 void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
145
146 bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
147
148 const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
149
150 void clear();
151
152 typedef SmallVectorImpl<const SCEV *>::iterator iterator;
153 typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
154 iterator begin() { return RegSequence.begin(); }
155 iterator end() { return RegSequence.end(); }
156 const_iterator begin() const { return RegSequence.begin(); }
157 const_iterator end() const { return RegSequence.end(); }
158};
159
160}
161
162void
163RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
164 std::pair<RegUsesTy::iterator, bool> Pair =
165 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
166 RegSortData &RSD = Pair.first->second;
167 if (Pair.second)
168 RegSequence.push_back(Reg);
169 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
170 RSD.UsedByIndices.set(LUIdx);
171}
172
173void
174RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
175 RegUsesTy::iterator It = RegUsesMap.find(Reg);
176 assert(It != RegUsesMap.end())((It != RegUsesMap.end()) ? static_cast<void> (0) : __assert_fail
("It != RegUsesMap.end()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 176, __PRETTY_FUNCTION__))
;
177 RegSortData &RSD = It->second;
178 assert(RSD.UsedByIndices.size() > LUIdx)((RSD.UsedByIndices.size() > LUIdx) ? static_cast<void>
(0) : __assert_fail ("RSD.UsedByIndices.size() > LUIdx", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 178, __PRETTY_FUNCTION__))
;
179 RSD.UsedByIndices.reset(LUIdx);
180}
181
182void
183RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
184 assert(LUIdx <= LastLUIdx)((LUIdx <= LastLUIdx) ? static_cast<void> (0) : __assert_fail
("LUIdx <= LastLUIdx", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 184, __PRETTY_FUNCTION__))
;
185
186 // Update RegUses. The data structure is not optimized for this purpose;
187 // we must iterate through it and update each of the bit vectors.
188 for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
189 I != E; ++I) {
190 SmallBitVector &UsedByIndices = I->second.UsedByIndices;
191 if (LUIdx < UsedByIndices.size())
192 UsedByIndices[LUIdx] =
193 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
194 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
195 }
196}
197
198bool
199RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
200 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
201 if (I == RegUsesMap.end())
202 return false;
203 const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
204 int i = UsedByIndices.find_first();
205 if (i == -1) return false;
206 if ((size_t)i != LUIdx) return true;
207 return UsedByIndices.find_next(i) != -1;
208}
209
210const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
211 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
212 assert(I != RegUsesMap.end() && "Unknown register!")((I != RegUsesMap.end() && "Unknown register!") ? static_cast
<void> (0) : __assert_fail ("I != RegUsesMap.end() && \"Unknown register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 212, __PRETTY_FUNCTION__))
;
213 return I->second.UsedByIndices;
214}
215
216void RegUseTracker::clear() {
217 RegUsesMap.clear();
218 RegSequence.clear();
219}
220
221namespace {
222
223/// Formula - This class holds information that describes a formula for
224/// computing satisfying a use. It may include broken-out immediates and scaled
225/// registers.
226struct Formula {
227 /// Global base address used for complex addressing.
228 GlobalValue *BaseGV;
229
230 /// Base offset for complex addressing.
231 int64_t BaseOffset;
232
233 /// Whether any complex addressing has a base register.
234 bool HasBaseReg;
235
236 /// The scale of any complex addressing.
237 int64_t Scale;
238
239 /// BaseRegs - The list of "base" registers for this use. When this is
240 /// non-empty. The canonical representation of a formula is
241 /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
242 /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
243 /// #1 enforces that the scaled register is always used when at least two
244 /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
245 /// #2 enforces that 1 * reg is reg.
246 /// This invariant can be temporarly broken while building a formula.
247 /// However, every formula inserted into the LSRInstance must be in canonical
248 /// form.
249 SmallVector<const SCEV *, 4> BaseRegs;
250
251 /// ScaledReg - The 'scaled' register for this use. This should be non-null
252 /// when Scale is not zero.
253 const SCEV *ScaledReg;
254
255 /// UnfoldedOffset - An additional constant offset which added near the
256 /// use. This requires a temporary register, but the offset itself can
257 /// live in an add immediate field rather than a register.
258 int64_t UnfoldedOffset;
259
260 Formula()
261 : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
262 ScaledReg(nullptr), UnfoldedOffset(0) {}
263
264 void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
265
266 bool isCanonical() const;
267
268 void Canonicalize();
269
270 bool Unscale();
271
272 size_t getNumRegs() const;
273 Type *getType() const;
274
275 void DeleteBaseReg(const SCEV *&S);
276
277 bool referencesReg(const SCEV *S) const;
278 bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
279 const RegUseTracker &RegUses) const;
280
281 void print(raw_ostream &OS) const;
282 void dump() const;
283};
284
285}
286
287/// DoInitialMatch - Recursion helper for InitialMatch.
288static void DoInitialMatch(const SCEV *S, Loop *L,
289 SmallVectorImpl<const SCEV *> &Good,
290 SmallVectorImpl<const SCEV *> &Bad,
291 ScalarEvolution &SE) {
292 // Collect expressions which properly dominate the loop header.
293 if (SE.properlyDominates(S, L->getHeader())) {
294 Good.push_back(S);
295 return;
296 }
297
298 // Look at add operands.
299 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
300 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
301 I != E; ++I)
302 DoInitialMatch(*I, L, Good, Bad, SE);
303 return;
304 }
305
306 // Look at addrec operands.
307 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
308 if (!AR->getStart()->isZero()) {
309 DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
310 DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
311 AR->getStepRecurrence(SE),
312 // FIXME: AR->getNoWrapFlags()
313 AR->getLoop(), SCEV::FlagAnyWrap),
314 L, Good, Bad, SE);
315 return;
316 }
317
318 // Handle a multiplication by -1 (negation) if it didn't fold.
319 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
320 if (Mul->getOperand(0)->isAllOnesValue()) {
321 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
322 const SCEV *NewMul = SE.getMulExpr(Ops);
323
324 SmallVector<const SCEV *, 4> MyGood;
325 SmallVector<const SCEV *, 4> MyBad;
326 DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
327 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
328 SE.getEffectiveSCEVType(NewMul->getType())));
329 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
330 E = MyGood.end(); I != E; ++I)
331 Good.push_back(SE.getMulExpr(NegOne, *I));
332 for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
333 E = MyBad.end(); I != E; ++I)
334 Bad.push_back(SE.getMulExpr(NegOne, *I));
335 return;
336 }
337
338 // Ok, we can't do anything interesting. Just stuff the whole thing into a
339 // register and hope for the best.
340 Bad.push_back(S);
341}
342
343/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
344/// attempting to keep all loop-invariant and loop-computable values in a
345/// single base register.
346void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
347 SmallVector<const SCEV *, 4> Good;
348 SmallVector<const SCEV *, 4> Bad;
349 DoInitialMatch(S, L, Good, Bad, SE);
350 if (!Good.empty()) {
351 const SCEV *Sum = SE.getAddExpr(Good);
352 if (!Sum->isZero())
353 BaseRegs.push_back(Sum);
354 HasBaseReg = true;
355 }
356 if (!Bad.empty()) {
357 const SCEV *Sum = SE.getAddExpr(Bad);
358 if (!Sum->isZero())
359 BaseRegs.push_back(Sum);
360 HasBaseReg = true;
361 }
362 Canonicalize();
363}
364
365/// \brief Check whether or not this formula statisfies the canonical
366/// representation.
367/// \see Formula::BaseRegs.
368bool Formula::isCanonical() const {
369 if (ScaledReg)
370 return Scale != 1 || !BaseRegs.empty();
371 return BaseRegs.size() <= 1;
372}
373
374/// \brief Helper method to morph a formula into its canonical representation.
375/// \see Formula::BaseRegs.
376/// Every formula having more than one base register, must use the ScaledReg
377/// field. Otherwise, we would have to do special cases everywhere in LSR
378/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
379/// On the other hand, 1*reg should be canonicalized into reg.
380void Formula::Canonicalize() {
381 if (isCanonical())
382 return;
383 // So far we did not need this case. This is easy to implement but it is
384 // useless to maintain dead code. Beside it could hurt compile time.
385 assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.")((!BaseRegs.empty() && "1*reg => reg, should not be needed."
) ? static_cast<void> (0) : __assert_fail ("!BaseRegs.empty() && \"1*reg => reg, should not be needed.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 385, __PRETTY_FUNCTION__))
;
386 // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
387 ScaledReg = BaseRegs.back();
388 BaseRegs.pop_back();
389 Scale = 1;
390 size_t BaseRegsSize = BaseRegs.size();
391 size_t Try = 0;
392 // If ScaledReg is an invariant, try to find a variant expression.
393 while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
394 std::swap(ScaledReg, BaseRegs[Try++]);
395}
396
397/// \brief Get rid of the scale in the formula.
398/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
399/// \return true if it was possible to get rid of the scale, false otherwise.
400/// \note After this operation the formula may not be in the canonical form.
401bool Formula::Unscale() {
402 if (Scale != 1)
403 return false;
404 Scale = 0;
405 BaseRegs.push_back(ScaledReg);
406 ScaledReg = nullptr;
407 return true;
408}
409
410/// getNumRegs - Return the total number of register operands used by this
411/// formula. This does not include register uses implied by non-constant
412/// addrec strides.
413size_t Formula::getNumRegs() const {
414 return !!ScaledReg + BaseRegs.size();
415}
416
417/// getType - Return the type of this formula, if it has one, or null
418/// otherwise. This type is meaningless except for the bit size.
419Type *Formula::getType() const {
420 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
421 ScaledReg ? ScaledReg->getType() :
422 BaseGV ? BaseGV->getType() :
423 nullptr;
424}
425
426/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
427void Formula::DeleteBaseReg(const SCEV *&S) {
428 if (&S != &BaseRegs.back())
429 std::swap(S, BaseRegs.back());
430 BaseRegs.pop_back();
431}
432
433/// referencesReg - Test if this formula references the given register.
434bool Formula::referencesReg(const SCEV *S) const {
435 return S == ScaledReg ||
436 std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
437}
438
439/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
440/// which are used by uses other than the use with the given index.
441bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
442 const RegUseTracker &RegUses) const {
443 if (ScaledReg)
444 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
445 return true;
446 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
447 E = BaseRegs.end(); I != E; ++I)
448 if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
449 return true;
450 return false;
451}
452
453void Formula::print(raw_ostream &OS) const {
454 bool First = true;
455 if (BaseGV) {
456 if (!First) OS << " + "; else First = false;
457 BaseGV->printAsOperand(OS, /*PrintType=*/false);
458 }
459 if (BaseOffset != 0) {
460 if (!First) OS << " + "; else First = false;
461 OS << BaseOffset;
462 }
463 for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
464 E = BaseRegs.end(); I != E; ++I) {
465 if (!First) OS << " + "; else First = false;
466 OS << "reg(" << **I << ')';
467 }
468 if (HasBaseReg && BaseRegs.empty()) {
469 if (!First) OS << " + "; else First = false;
470 OS << "**error: HasBaseReg**";
471 } else if (!HasBaseReg && !BaseRegs.empty()) {
472 if (!First) OS << " + "; else First = false;
473 OS << "**error: !HasBaseReg**";
474 }
475 if (Scale != 0) {
476 if (!First) OS << " + "; else First = false;
477 OS << Scale << "*reg(";
478 if (ScaledReg)
479 OS << *ScaledReg;
480 else
481 OS << "<unknown>";
482 OS << ')';
483 }
484 if (UnfoldedOffset != 0) {
485 if (!First) OS << " + ";
486 OS << "imm(" << UnfoldedOffset << ')';
487 }
488}
489
490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
491void Formula::dump() const {
492 print(errs()); errs() << '\n';
493}
494#endif
495
496/// isAddRecSExtable - Return true if the given addrec can be sign-extended
497/// without changing its value.
498static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
499 Type *WideTy =
500 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
501 return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
502}
503
504/// isAddSExtable - Return true if the given add can be sign-extended
505/// without changing its value.
506static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
507 Type *WideTy =
508 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
509 return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
510}
511
512/// isMulSExtable - Return true if the given mul can be sign-extended
513/// without changing its value.
514static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
515 Type *WideTy =
516 IntegerType::get(SE.getContext(),
517 SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
518 return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
519}
520
521/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
522/// and if the remainder is known to be zero, or null otherwise. If
523/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
524/// to Y, ignoring that the multiplication may overflow, which is useful when
525/// the result will be used in a context where the most significant bits are
526/// ignored.
527static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
528 ScalarEvolution &SE,
529 bool IgnoreSignificantBits = false) {
530 // Handle the trivial case, which works for any SCEV type.
531 if (LHS == RHS)
532 return SE.getConstant(LHS->getType(), 1);
533
534 // Handle a few RHS special cases.
535 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
536 if (RC) {
537 const APInt &RA = RC->getValue()->getValue();
538 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
539 // some folding.
540 if (RA.isAllOnesValue())
541 return SE.getMulExpr(LHS, RC);
542 // Handle x /s 1 as x.
543 if (RA == 1)
544 return LHS;
545 }
546
547 // Check for a division of a constant by a constant.
548 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
549 if (!RC)
550 return nullptr;
551 const APInt &LA = C->getValue()->getValue();
552 const APInt &RA = RC->getValue()->getValue();
553 if (LA.srem(RA) != 0)
554 return nullptr;
555 return SE.getConstant(LA.sdiv(RA));
556 }
557
558 // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
559 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
560 if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
561 const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
562 IgnoreSignificantBits);
563 if (!Step) return nullptr;
564 const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
565 IgnoreSignificantBits);
566 if (!Start) return nullptr;
567 // FlagNW is independent of the start value, step direction, and is
568 // preserved with smaller magnitude steps.
569 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
570 return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
571 }
572 return nullptr;
573 }
574
575 // Distribute the sdiv over add operands, if the add doesn't overflow.
576 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
577 if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
578 SmallVector<const SCEV *, 8> Ops;
579 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
580 I != E; ++I) {
581 const SCEV *Op = getExactSDiv(*I, RHS, SE,
582 IgnoreSignificantBits);
583 if (!Op) return nullptr;
584 Ops.push_back(Op);
585 }
586 return SE.getAddExpr(Ops);
587 }
588 return nullptr;
589 }
590
591 // Check for a multiply operand that we can pull RHS out of.
592 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
593 if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
594 SmallVector<const SCEV *, 4> Ops;
595 bool Found = false;
596 for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
597 I != E; ++I) {
598 const SCEV *S = *I;
599 if (!Found)
600 if (const SCEV *Q = getExactSDiv(S, RHS, SE,
601 IgnoreSignificantBits)) {
602 S = Q;
603 Found = true;
604 }
605 Ops.push_back(S);
606 }
607 return Found ? SE.getMulExpr(Ops) : nullptr;
608 }
609 return nullptr;
610 }
611
612 // Otherwise we don't know.
613 return nullptr;
614}
615
616/// ExtractImmediate - If S involves the addition of a constant integer value,
617/// return that integer value, and mutate S to point to a new SCEV with that
618/// value excluded.
619static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
620 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
621 if (C->getValue()->getValue().getMinSignedBits() <= 64) {
622 S = SE.getConstant(C->getType(), 0);
623 return C->getValue()->getSExtValue();
624 }
625 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
626 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
627 int64_t Result = ExtractImmediate(NewOps.front(), SE);
628 if (Result != 0)
629 S = SE.getAddExpr(NewOps);
630 return Result;
631 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
632 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
633 int64_t Result = ExtractImmediate(NewOps.front(), SE);
634 if (Result != 0)
635 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
636 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
637 SCEV::FlagAnyWrap);
638 return Result;
639 }
640 return 0;
641}
642
643/// ExtractSymbol - If S involves the addition of a GlobalValue address,
644/// return that symbol, and mutate S to point to a new SCEV with that
645/// value excluded.
646static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
647 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
648 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
649 S = SE.getConstant(GV->getType(), 0);
650 return GV;
651 }
652 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
653 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
654 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
655 if (Result)
656 S = SE.getAddExpr(NewOps);
657 return Result;
658 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
659 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
660 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
661 if (Result)
662 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
663 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
664 SCEV::FlagAnyWrap);
665 return Result;
666 }
667 return nullptr;
668}
669
670/// isAddressUse - Returns true if the specified instruction is using the
671/// specified value as an address.
672static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
673 bool isAddress = isa<LoadInst>(Inst);
674 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
675 if (SI->getOperand(1) == OperandVal)
676 isAddress = true;
677 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
678 // Addressing modes can also be folded into prefetches and a variety
679 // of intrinsics.
680 switch (II->getIntrinsicID()) {
681 default: break;
682 case Intrinsic::prefetch:
683 case Intrinsic::x86_sse_storeu_ps:
684 case Intrinsic::x86_sse2_storeu_pd:
685 case Intrinsic::x86_sse2_storeu_dq:
686 case Intrinsic::x86_sse2_storel_dq:
687 if (II->getArgOperand(0) == OperandVal)
688 isAddress = true;
689 break;
690 }
691 }
692 return isAddress;
693}
694
695/// getAccessType - Return the type of the memory being accessed.
696static Type *getAccessType(const Instruction *Inst) {
697 Type *AccessTy = Inst->getType();
698 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
699 AccessTy = SI->getOperand(0)->getType();
700 else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
701 // Addressing modes can also be folded into prefetches and a variety
702 // of intrinsics.
703 switch (II->getIntrinsicID()) {
704 default: break;
705 case Intrinsic::x86_sse_storeu_ps:
706 case Intrinsic::x86_sse2_storeu_pd:
707 case Intrinsic::x86_sse2_storeu_dq:
708 case Intrinsic::x86_sse2_storel_dq:
709 AccessTy = II->getArgOperand(0)->getType();
710 break;
711 }
712 }
713
714 // All pointers have the same requirements, so canonicalize them to an
715 // arbitrary pointer type to minimize variation.
716 if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
717 AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
718 PTy->getAddressSpace());
719
720 return AccessTy;
721}
722
723/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
724static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
725 for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
726 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
727 if (SE.isSCEVable(PN->getType()) &&
728 (SE.getEffectiveSCEVType(PN->getType()) ==
729 SE.getEffectiveSCEVType(AR->getType())) &&
730 SE.getSCEV(PN) == AR)
731 return true;
732 }
733 return false;
734}
735
736/// Check if expanding this expression is likely to incur significant cost. This
737/// is tricky because SCEV doesn't track which expressions are actually computed
738/// by the current IR.
739///
740/// We currently allow expansion of IV increments that involve adds,
741/// multiplication by constants, and AddRecs from existing phis.
742///
743/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
744/// obvious multiple of the UDivExpr.
745static bool isHighCostExpansion(const SCEV *S,
746 SmallPtrSetImpl<const SCEV*> &Processed,
747 ScalarEvolution &SE) {
748 // Zero/One operand expressions
749 switch (S->getSCEVType()) {
750 case scUnknown:
751 case scConstant:
752 return false;
753 case scTruncate:
754 return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
755 Processed, SE);
756 case scZeroExtend:
757 return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
758 Processed, SE);
759 case scSignExtend:
760 return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
761 Processed, SE);
762 }
763
764 if (!Processed.insert(S).second)
765 return false;
766
767 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
768 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
769 I != E; ++I) {
770 if (isHighCostExpansion(*I, Processed, SE))
771 return true;
772 }
773 return false;
774 }
775
776 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
777 if (Mul->getNumOperands() == 2) {
778 // Multiplication by a constant is ok
779 if (isa<SCEVConstant>(Mul->getOperand(0)))
780 return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
781
782 // If we have the value of one operand, check if an existing
783 // multiplication already generates this expression.
784 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
785 Value *UVal = U->getValue();
786 for (User *UR : UVal->users()) {
787 // If U is a constant, it may be used by a ConstantExpr.
788 Instruction *UI = dyn_cast<Instruction>(UR);
789 if (UI && UI->getOpcode() == Instruction::Mul &&
790 SE.isSCEVable(UI->getType())) {
791 return SE.getSCEV(UI) == Mul;
792 }
793 }
794 }
795 }
796 }
797
798 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
799 if (isExistingPhi(AR, SE))
800 return false;
801 }
802
803 // Fow now, consider any other type of expression (div/mul/min/max) high cost.
804 return true;
805}
806
807/// DeleteTriviallyDeadInstructions - If any of the instructions is the
808/// specified set are trivially dead, delete them and see if this makes any of
809/// their operands subsequently dead.
810static bool
811DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
812 bool Changed = false;
813
814 while (!DeadInsts.empty()) {
815 Value *V = DeadInsts.pop_back_val();
816 Instruction *I = dyn_cast_or_null<Instruction>(V);
817
818 if (!I || !isInstructionTriviallyDead(I))
819 continue;
820
821 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
822 if (Instruction *U = dyn_cast<Instruction>(*OI)) {
823 *OI = nullptr;
824 if (U->use_empty())
825 DeadInsts.push_back(U);
826 }
827
828 I->eraseFromParent();
829 Changed = true;
830 }
831
832 return Changed;
833}
834
835namespace {
836class LSRUse;
837}
838
839/// \brief Check if the addressing mode defined by \p F is completely
840/// folded in \p LU at isel time.
841/// This includes address-mode folding and special icmp tricks.
842/// This function returns true if \p LU can accommodate what \p F
843/// defines and up to 1 base + 1 scaled + offset.
844/// In other words, if \p F has several base registers, this function may
845/// still return true. Therefore, users still need to account for
846/// additional base registers and/or unfolded offsets to derive an
847/// accurate cost model.
848static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
849 const LSRUse &LU, const Formula &F);
850// Get the cost of the scaling factor used in F for LU.
851static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
852 const LSRUse &LU, const Formula &F);
853
854namespace {
855
856/// Cost - This class is used to measure and compare candidate formulae.
857class Cost {
858 /// TODO: Some of these could be merged. Also, a lexical ordering
859 /// isn't always optimal.
860 unsigned NumRegs;
861 unsigned AddRecCost;
862 unsigned NumIVMuls;
863 unsigned NumBaseAdds;
864 unsigned ImmCost;
865 unsigned SetupCost;
866 unsigned ScaleCost;
867
868public:
869 Cost()
870 : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
871 SetupCost(0), ScaleCost(0) {}
872
873 bool operator<(const Cost &Other) const;
874
875 void Lose();
876
877#ifndef NDEBUG
878 // Once any of the metrics loses, they must all remain losers.
879 bool isValid() {
880 return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
881 | ImmCost | SetupCost | ScaleCost) != ~0u)
882 || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
883 & ImmCost & SetupCost & ScaleCost) == ~0u);
884 }
885#endif
886
887 bool isLoser() {
888 assert(isValid() && "invalid cost")((isValid() && "invalid cost") ? static_cast<void>
(0) : __assert_fail ("isValid() && \"invalid cost\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 888, __PRETTY_FUNCTION__))
;
889 return NumRegs == ~0u;
890 }
891
892 void RateFormula(const TargetTransformInfo &TTI,
893 const Formula &F,
894 SmallPtrSetImpl<const SCEV *> &Regs,
895 const DenseSet<const SCEV *> &VisitedRegs,
896 const Loop *L,
897 const SmallVectorImpl<int64_t> &Offsets,
898 ScalarEvolution &SE, DominatorTree &DT,
899 const LSRUse &LU,
900 SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
901
902 void print(raw_ostream &OS) const;
903 void dump() const;
904
905private:
906 void RateRegister(const SCEV *Reg,
907 SmallPtrSetImpl<const SCEV *> &Regs,
908 const Loop *L,
909 ScalarEvolution &SE, DominatorTree &DT);
910 void RatePrimaryRegister(const SCEV *Reg,
911 SmallPtrSetImpl<const SCEV *> &Regs,
912 const Loop *L,
913 ScalarEvolution &SE, DominatorTree &DT,
914 SmallPtrSetImpl<const SCEV *> *LoserRegs);
915};
916
917}
918
919/// RateRegister - Tally up interesting quantities from the given register.
920void Cost::RateRegister(const SCEV *Reg,
921 SmallPtrSetImpl<const SCEV *> &Regs,
922 const Loop *L,
923 ScalarEvolution &SE, DominatorTree &DT) {
924 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
925 // If this is an addrec for another loop, don't second-guess its addrec phi
926 // nodes. LSR isn't currently smart enough to reason about more than one
927 // loop at a time. LSR has already run on inner loops, will not run on outer
928 // loops, and cannot be expected to change sibling loops.
929 if (AR->getLoop() != L) {
930 // If the AddRec exists, consider it's register free and leave it alone.
931 if (isExistingPhi(AR, SE))
932 return;
933
934 // Otherwise, do not consider this formula at all.
935 Lose();
936 return;
937 }
938 AddRecCost += 1; /// TODO: This should be a function of the stride.
939
940 // Add the step value register, if it needs one.
941 // TODO: The non-affine case isn't precisely modeled here.
942 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
943 if (!Regs.count(AR->getOperand(1))) {
944 RateRegister(AR->getOperand(1), Regs, L, SE, DT);
945 if (isLoser())
946 return;
947 }
948 }
949 }
950 ++NumRegs;
951
952 // Rough heuristic; favor registers which don't require extra setup
953 // instructions in the preheader.
954 if (!isa<SCEVUnknown>(Reg) &&
955 !isa<SCEVConstant>(Reg) &&
956 !(isa<SCEVAddRecExpr>(Reg) &&
957 (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
958 isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
959 ++SetupCost;
960
961 NumIVMuls += isa<SCEVMulExpr>(Reg) &&
962 SE.hasComputableLoopEvolution(Reg, L);
963}
964
965/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
966/// before, rate it. Optional LoserRegs provides a way to declare any formula
967/// that refers to one of those regs an instant loser.
968void Cost::RatePrimaryRegister(const SCEV *Reg,
969 SmallPtrSetImpl<const SCEV *> &Regs,
970 const Loop *L,
971 ScalarEvolution &SE, DominatorTree &DT,
972 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
973 if (LoserRegs && LoserRegs->count(Reg)) {
974 Lose();
975 return;
976 }
977 if (Regs.insert(Reg).second) {
978 RateRegister(Reg, Regs, L, SE, DT);
979 if (LoserRegs && isLoser())
980 LoserRegs->insert(Reg);
981 }
982}
983
984void Cost::RateFormula(const TargetTransformInfo &TTI,
985 const Formula &F,
986 SmallPtrSetImpl<const SCEV *> &Regs,
987 const DenseSet<const SCEV *> &VisitedRegs,
988 const Loop *L,
989 const SmallVectorImpl<int64_t> &Offsets,
990 ScalarEvolution &SE, DominatorTree &DT,
991 const LSRUse &LU,
992 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
993 assert(F.isCanonical() && "Cost is accurate only for canonical formula")((F.isCanonical() && "Cost is accurate only for canonical formula"
) ? static_cast<void> (0) : __assert_fail ("F.isCanonical() && \"Cost is accurate only for canonical formula\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 993, __PRETTY_FUNCTION__))
;
994 // Tally up the registers.
995 if (const SCEV *ScaledReg = F.ScaledReg) {
996 if (VisitedRegs.count(ScaledReg)) {
997 Lose();
998 return;
999 }
1000 RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
1001 if (isLoser())
1002 return;
1003 }
1004 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
1005 E = F.BaseRegs.end(); I != E; ++I) {
1006 const SCEV *BaseReg = *I;
1007 if (VisitedRegs.count(BaseReg)) {
1008 Lose();
1009 return;
1010 }
1011 RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
1012 if (isLoser())
1013 return;
1014 }
1015
1016 // Determine how many (unfolded) adds we'll need inside the loop.
1017 size_t NumBaseParts = F.getNumRegs();
1018 if (NumBaseParts > 1)
1019 // Do not count the base and a possible second register if the target
1020 // allows to fold 2 registers.
1021 NumBaseAdds +=
1022 NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1023 NumBaseAdds += (F.UnfoldedOffset != 0);
1024
1025 // Accumulate non-free scaling amounts.
1026 ScaleCost += getScalingFactorCost(TTI, LU, F);
1027
1028 // Tally up the non-zero immediates.
1029 for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
1030 E = Offsets.end(); I != E; ++I) {
1031 int64_t Offset = (uint64_t)*I + F.BaseOffset;
1032 if (F.BaseGV)
1033 ImmCost += 64; // Handle symbolic values conservatively.
1034 // TODO: This should probably be the pointer size.
1035 else if (Offset != 0)
1036 ImmCost += APInt(64, Offset, true).getMinSignedBits();
1037 }
1038 assert(isValid() && "invalid cost")((isValid() && "invalid cost") ? static_cast<void>
(0) : __assert_fail ("isValid() && \"invalid cost\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1038, __PRETTY_FUNCTION__))
;
1039}
1040
1041/// Lose - Set this cost to a losing value.
1042void Cost::Lose() {
1043 NumRegs = ~0u;
1044 AddRecCost = ~0u;
1045 NumIVMuls = ~0u;
1046 NumBaseAdds = ~0u;
1047 ImmCost = ~0u;
1048 SetupCost = ~0u;
1049 ScaleCost = ~0u;
1050}
1051
1052/// operator< - Choose the lower cost.
1053bool Cost::operator<(const Cost &Other) const {
1054 return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
1055 ImmCost, SetupCost) <
1056 std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
1057 Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
1058 Other.SetupCost);
1059}
1060
1061void Cost::print(raw_ostream &OS) const {
1062 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1063 if (AddRecCost != 0)
1064 OS << ", with addrec cost " << AddRecCost;
1065 if (NumIVMuls != 0)
1066 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1067 if (NumBaseAdds != 0)
1068 OS << ", plus " << NumBaseAdds << " base add"
1069 << (NumBaseAdds == 1 ? "" : "s");
1070 if (ScaleCost != 0)
1071 OS << ", plus " << ScaleCost << " scale cost";
1072 if (ImmCost != 0)
1073 OS << ", plus " << ImmCost << " imm cost";
1074 if (SetupCost != 0)
1075 OS << ", plus " << SetupCost << " setup cost";
1076}
1077
1078#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1079void Cost::dump() const {
1080 print(errs()); errs() << '\n';
1081}
1082#endif
1083
1084namespace {
1085
1086/// LSRFixup - An operand value in an instruction which is to be replaced
1087/// with some equivalent, possibly strength-reduced, replacement.
1088struct LSRFixup {
1089 /// UserInst - The instruction which will be updated.
1090 Instruction *UserInst;
1091
1092 /// OperandValToReplace - The operand of the instruction which will
1093 /// be replaced. The operand may be used more than once; every instance
1094 /// will be replaced.
1095 Value *OperandValToReplace;
1096
1097 /// PostIncLoops - If this user is to use the post-incremented value of an
1098 /// induction variable, this variable is non-null and holds the loop
1099 /// associated with the induction variable.
1100 PostIncLoopSet PostIncLoops;
1101
1102 /// LUIdx - The index of the LSRUse describing the expression which
1103 /// this fixup needs, minus an offset (below).
1104 size_t LUIdx;
1105
1106 /// Offset - A constant offset to be added to the LSRUse expression.
1107 /// This allows multiple fixups to share the same LSRUse with different
1108 /// offsets, for example in an unrolled loop.
1109 int64_t Offset;
1110
1111 bool isUseFullyOutsideLoop(const Loop *L) const;
1112
1113 LSRFixup();
1114
1115 void print(raw_ostream &OS) const;
1116 void dump() const;
1117};
1118
1119}
1120
1121LSRFixup::LSRFixup()
1122 : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
1123 Offset(0) {}
1124
1125/// isUseFullyOutsideLoop - Test whether this fixup always uses its
1126/// value outside of the given loop.
1127bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1128 // PHI nodes use their value in their incoming blocks.
1129 if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1130 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1131 if (PN->getIncomingValue(i) == OperandValToReplace &&
1132 L->contains(PN->getIncomingBlock(i)))
1133 return false;
1134 return true;
1135 }
1136
1137 return !L->contains(UserInst);
1138}
1139
1140void LSRFixup::print(raw_ostream &OS) const {
1141 OS << "UserInst=";
1142 // Store is common and interesting enough to be worth special-casing.
1143 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1144 OS << "store ";
1145 Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
1146 } else if (UserInst->getType()->isVoidTy())
1147 OS << UserInst->getOpcodeName();
1148 else
1149 UserInst->printAsOperand(OS, /*PrintType=*/false);
1150
1151 OS << ", OperandValToReplace=";
1152 OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
1153
1154 for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
1155 E = PostIncLoops.end(); I != E; ++I) {
1156 OS << ", PostIncLoop=";
1157 (*I)->getHeader()->printAsOperand(OS, /*PrintType=*/false);
1158 }
1159
1160 if (LUIdx != ~size_t(0))
1161 OS << ", LUIdx=" << LUIdx;
1162
1163 if (Offset != 0)
1164 OS << ", Offset=" << Offset;
1165}
1166
1167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1168void LSRFixup::dump() const {
1169 print(errs()); errs() << '\n';
1170}
1171#endif
1172
1173namespace {
1174
1175/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
1176/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
1177struct UniquifierDenseMapInfo {
1178 static SmallVector<const SCEV *, 4> getEmptyKey() {
1179 SmallVector<const SCEV *, 4> V;
1180 V.push_back(reinterpret_cast<const SCEV *>(-1));
1181 return V;
1182 }
1183
1184 static SmallVector<const SCEV *, 4> getTombstoneKey() {
1185 SmallVector<const SCEV *, 4> V;
1186 V.push_back(reinterpret_cast<const SCEV *>(-2));
1187 return V;
1188 }
1189
1190 static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
1191 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1192 }
1193
1194 static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
1195 const SmallVector<const SCEV *, 4> &RHS) {
1196 return LHS == RHS;
1197 }
1198};
1199
1200/// LSRUse - This class holds the state that LSR keeps for each use in
1201/// IVUsers, as well as uses invented by LSR itself. It includes information
1202/// about what kinds of things can be folded into the user, information about
1203/// the user itself, and information about how the use may be satisfied.
1204/// TODO: Represent multiple users of the same expression in common?
1205class LSRUse {
1206 DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
1207
1208public:
1209 /// KindType - An enum for a kind of use, indicating what types of
1210 /// scaled and immediate operands it might support.
1211 enum KindType {
1212 Basic, ///< A normal use, with no folding.
1213 Special, ///< A special case of basic, allowing -1 scales.
1214 Address, ///< An address use; folding according to TargetLowering
1215 ICmpZero ///< An equality icmp with both operands folded into one.
1216 // TODO: Add a generic icmp too?
1217 };
1218
1219 typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
1220
1221 KindType Kind;
1222 Type *AccessTy;
1223
1224 SmallVector<int64_t, 8> Offsets;
1225 int64_t MinOffset;
1226 int64_t MaxOffset;
1227
1228 /// AllFixupsOutsideLoop - This records whether all of the fixups using this
1229 /// LSRUse are outside of the loop, in which case some special-case heuristics
1230 /// may be used.
1231 bool AllFixupsOutsideLoop;
1232
1233 /// RigidFormula is set to true to guarantee that this use will be associated
1234 /// with a single formula--the one that initially matched. Some SCEV
1235 /// expressions cannot be expanded. This allows LSR to consider the registers
1236 /// used by those expressions without the need to expand them later after
1237 /// changing the formula.
1238 bool RigidFormula;
1239
1240 /// WidestFixupType - This records the widest use type for any fixup using
1241 /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
1242 /// max fixup widths to be equivalent, because the narrower one may be relying
1243 /// on the implicit truncation to truncate away bogus bits.
1244 Type *WidestFixupType;
1245
1246 /// Formulae - A list of ways to build a value that can satisfy this user.
1247 /// After the list is populated, one of these is selected heuristically and
1248 /// used to formulate a replacement for OperandValToReplace in UserInst.
1249 SmallVector<Formula, 12> Formulae;
1250
1251 /// Regs - The set of register candidates used by all formulae in this LSRUse.
1252 SmallPtrSet<const SCEV *, 4> Regs;
1253
1254 LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
1255 MinOffset(INT64_MAX(9223372036854775807L)),
1256 MaxOffset(INT64_MIN(-9223372036854775807L -1)),
1257 AllFixupsOutsideLoop(true),
1258 RigidFormula(false),
1259 WidestFixupType(nullptr) {}
1260
1261 bool HasFormulaWithSameRegs(const Formula &F) const;
1262 bool InsertFormula(const Formula &F);
1263 void DeleteFormula(Formula &F);
1264 void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1265
1266 void print(raw_ostream &OS) const;
1267 void dump() const;
1268};
1269
1270}
1271
1272/// HasFormula - Test whether this use as a formula which has the same
1273/// registers as the given formula.
1274bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1275 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1276 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1277 // Unstable sort by host order ok, because this is only used for uniquifying.
1278 std::sort(Key.begin(), Key.end());
1279 return Uniquifier.count(Key);
1280}
1281
1282/// InsertFormula - If the given formula has not yet been inserted, add it to
1283/// the list, and return true. Return false otherwise.
1284/// The formula must be in canonical form.
1285bool LSRUse::InsertFormula(const Formula &F) {
1286 assert(F.isCanonical() && "Invalid canonical representation")((F.isCanonical() && "Invalid canonical representation"
) ? static_cast<void> (0) : __assert_fail ("F.isCanonical() && \"Invalid canonical representation\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1286, __PRETTY_FUNCTION__))
;
1287
1288 if (!Formulae.empty() && RigidFormula)
1289 return false;
1290
1291 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1292 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1293 // Unstable sort by host order ok, because this is only used for uniquifying.
1294 std::sort(Key.begin(), Key.end());
1295
1296 if (!Uniquifier.insert(Key).second)
1297 return false;
1298
1299 // Using a register to hold the value of 0 is not profitable.
1300 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&(((!F.ScaledReg || !F.ScaledReg->isZero()) && "Zero allocated in a scaled register!"
) ? static_cast<void> (0) : __assert_fail ("(!F.ScaledReg || !F.ScaledReg->isZero()) && \"Zero allocated in a scaled register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1301, __PRETTY_FUNCTION__))
1301 "Zero allocated in a scaled register!")(((!F.ScaledReg || !F.ScaledReg->isZero()) && "Zero allocated in a scaled register!"
) ? static_cast<void> (0) : __assert_fail ("(!F.ScaledReg || !F.ScaledReg->isZero()) && \"Zero allocated in a scaled register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1301, __PRETTY_FUNCTION__))
;
1302#ifndef NDEBUG
1303 for (SmallVectorImpl<const SCEV *>::const_iterator I =
1304 F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
1305 assert(!(*I)->isZero() && "Zero allocated in a base register!")((!(*I)->isZero() && "Zero allocated in a base register!"
) ? static_cast<void> (0) : __assert_fail ("!(*I)->isZero() && \"Zero allocated in a base register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1305, __PRETTY_FUNCTION__))
;
1306#endif
1307
1308 // Add the formula to the list.
1309 Formulae.push_back(F);
1310
1311 // Record registers now being used by this use.
1312 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1313 if (F.ScaledReg)
1314 Regs.insert(F.ScaledReg);
1315
1316 return true;
1317}
1318
1319/// DeleteFormula - Remove the given formula from this use's list.
1320void LSRUse::DeleteFormula(Formula &F) {
1321 if (&F != &Formulae.back())
1322 std::swap(F, Formulae.back());
1323 Formulae.pop_back();
1324}
1325
1326/// RecomputeRegs - Recompute the Regs field, and update RegUses.
1327void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1328 // Now that we've filtered out some formulae, recompute the Regs set.
1329 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1330 Regs.clear();
1331 for (const Formula &F : Formulae) {
1332 if (F.ScaledReg) Regs.insert(F.ScaledReg);
1333 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1334 }
1335
1336 // Update the RegTracker.
1337 for (const SCEV *S : OldRegs)
1338 if (!Regs.count(S))
1339 RegUses.DropRegister(S, LUIdx);
1340}
1341
1342void LSRUse::print(raw_ostream &OS) const {
1343 OS << "LSR Use: Kind=";
1344 switch (Kind) {
1345 case Basic: OS << "Basic"; break;
1346 case Special: OS << "Special"; break;
1347 case ICmpZero: OS << "ICmpZero"; break;
1348 case Address:
1349 OS << "Address of ";
1350 if (AccessTy->isPointerTy())
1351 OS << "pointer"; // the full pointer type could be really verbose
1352 else
1353 OS << *AccessTy;
1354 }
1355
1356 OS << ", Offsets={";
1357 for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
1358 E = Offsets.end(); I != E; ++I) {
1359 OS << *I;
1360 if (std::next(I) != E)
1361 OS << ',';
1362 }
1363 OS << '}';
1364
1365 if (AllFixupsOutsideLoop)
1366 OS << ", all-fixups-outside-loop";
1367
1368 if (WidestFixupType)
1369 OS << ", widest fixup type: " << *WidestFixupType;
1370}
1371
1372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1373void LSRUse::dump() const {
1374 print(errs()); errs() << '\n';
1375}
1376#endif
1377
1378static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1379 LSRUse::KindType Kind, Type *AccessTy,
1380 GlobalValue *BaseGV, int64_t BaseOffset,
1381 bool HasBaseReg, int64_t Scale) {
1382 switch (Kind) {
1383 case LSRUse::Address:
1384 return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
1385
1386 case LSRUse::ICmpZero:
1387 // There's not even a target hook for querying whether it would be legal to
1388 // fold a GV into an ICmp.
1389 if (BaseGV)
1390 return false;
1391
1392 // ICmp only has two operands; don't allow more than two non-trivial parts.
1393 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1394 return false;
1395
1396 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1397 // putting the scaled register in the other operand of the icmp.
1398 if (Scale != 0 && Scale != -1)
1399 return false;
1400
1401 // If we have low-level target information, ask the target if it can fold an
1402 // integer immediate on an icmp.
1403 if (BaseOffset != 0) {
1404 // We have one of:
1405 // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1406 // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1407 // Offs is the ICmp immediate.
1408 if (Scale == 0)
1409 // The cast does the right thing with INT64_MIN.
1410 BaseOffset = -(uint64_t)BaseOffset;
1411 return TTI.isLegalICmpImmediate(BaseOffset);
1412 }
1413
1414 // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1415 return true;
1416
1417 case LSRUse::Basic:
1418 // Only handle single-register values.
1419 return !BaseGV && Scale == 0 && BaseOffset == 0;
1420
1421 case LSRUse::Special:
1422 // Special case Basic to handle -1 scales.
1423 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1424 }
1425
1426 llvm_unreachable("Invalid LSRUse Kind!")::llvm::llvm_unreachable_internal("Invalid LSRUse Kind!", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1426)
;
1427}
1428
1429static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1430 int64_t MinOffset, int64_t MaxOffset,
1431 LSRUse::KindType Kind, Type *AccessTy,
1432 GlobalValue *BaseGV, int64_t BaseOffset,
1433 bool HasBaseReg, int64_t Scale) {
1434 // Check for overflow.
1435 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1436 (MinOffset > 0))
1437 return false;
1438 MinOffset = (uint64_t)BaseOffset + MinOffset;
1439 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1440 (MaxOffset > 0))
1441 return false;
1442 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1443
1444 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
1445 HasBaseReg, Scale) &&
1446 isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
1447 HasBaseReg, Scale);
1448}
1449
1450static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1451 int64_t MinOffset, int64_t MaxOffset,
1452 LSRUse::KindType Kind, Type *AccessTy,
1453 const Formula &F) {
1454 // For the purpose of isAMCompletelyFolded either having a canonical formula
1455 // or a scale not equal to zero is correct.
1456 // Problems may arise from non canonical formulae having a scale == 0.
1457 // Strictly speaking it would best to just rely on canonical formulae.
1458 // However, when we generate the scaled formulae, we first check that the
1459 // scaling factor is profitable before computing the actual ScaledReg for
1460 // compile time sake.
1461 assert((F.isCanonical() || F.Scale != 0))(((F.isCanonical() || F.Scale != 0)) ? static_cast<void>
(0) : __assert_fail ("(F.isCanonical() || F.Scale != 0)", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1461, __PRETTY_FUNCTION__))
;
1462 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1463 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1464}
1465
1466/// isLegalUse - Test whether we know how to expand the current formula.
1467static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1468 int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1469 GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
1470 int64_t Scale) {
1471 // We know how to expand completely foldable formulae.
1472 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1473 BaseOffset, HasBaseReg, Scale) ||
1474 // Or formulae that use a base register produced by a sum of base
1475 // registers.
1476 (Scale == 1 &&
1477 isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1478 BaseGV, BaseOffset, true, 0));
1479}
1480
1481static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1482 int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
1483 const Formula &F) {
1484 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1485 F.BaseOffset, F.HasBaseReg, F.Scale);
1486}
1487
1488static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1489 const LSRUse &LU, const Formula &F) {
1490 return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1491 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1492 F.Scale);
1493}
1494
1495static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
1496 const LSRUse &LU, const Formula &F) {
1497 if (!F.Scale)
1498 return 0;
1499
1500 // If the use is not completely folded in that instruction, we will have to
1501 // pay an extra cost only for scale != 1.
1502 if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1503 LU.AccessTy, F))
1504 return F.Scale != 1;
1505
1506 switch (LU.Kind) {
1507 case LSRUse::Address: {
1508 // Check the scaling factor cost with both the min and max offsets.
1509 int ScaleCostMinOffset =
1510 TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
1511 F.BaseOffset + LU.MinOffset,
1512 F.HasBaseReg, F.Scale);
1513 int ScaleCostMaxOffset =
1514 TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
1515 F.BaseOffset + LU.MaxOffset,
1516 F.HasBaseReg, F.Scale);
1517
1518 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&((ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >=
0 && "Legal addressing mode has an illegal cost!") ?
static_cast<void> (0) : __assert_fail ("ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && \"Legal addressing mode has an illegal cost!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1519, __PRETTY_FUNCTION__))
1519 "Legal addressing mode has an illegal cost!")((ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >=
0 && "Legal addressing mode has an illegal cost!") ?
static_cast<void> (0) : __assert_fail ("ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && \"Legal addressing mode has an illegal cost!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1519, __PRETTY_FUNCTION__))
;
1520 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1521 }
1522 case LSRUse::ICmpZero:
1523 case LSRUse::Basic:
1524 case LSRUse::Special:
1525 // The use is completely folded, i.e., everything is folded into the
1526 // instruction.
1527 return 0;
1528 }
1529
1530 llvm_unreachable("Invalid LSRUse Kind!")::llvm::llvm_unreachable_internal("Invalid LSRUse Kind!", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1530)
;
1531}
1532
1533static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1534 LSRUse::KindType Kind, Type *AccessTy,
1535 GlobalValue *BaseGV, int64_t BaseOffset,
1536 bool HasBaseReg) {
1537 // Fast-path: zero is always foldable.
1538 if (BaseOffset == 0 && !BaseGV) return true;
1539
1540 // Conservatively, create an address with an immediate and a
1541 // base and a scale.
1542 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1543
1544 // Canonicalize a scale of 1 to a base register if the formula doesn't
1545 // already have a base register.
1546 if (!HasBaseReg && Scale == 1) {
1547 Scale = 0;
1548 HasBaseReg = true;
1549 }
1550
1551 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
1552 HasBaseReg, Scale);
1553}
1554
1555static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1556 ScalarEvolution &SE, int64_t MinOffset,
1557 int64_t MaxOffset, LSRUse::KindType Kind,
1558 Type *AccessTy, const SCEV *S, bool HasBaseReg) {
1559 // Fast-path: zero is always foldable.
1560 if (S->isZero()) return true;
1561
1562 // Conservatively, create an address with an immediate and a
1563 // base and a scale.
1564 int64_t BaseOffset = ExtractImmediate(S, SE);
1565 GlobalValue *BaseGV = ExtractSymbol(S, SE);
1566
1567 // If there's anything else involved, it's not foldable.
1568 if (!S->isZero()) return false;
1569
1570 // Fast-path: zero is always foldable.
1571 if (BaseOffset == 0 && !BaseGV) return true;
1572
1573 // Conservatively, create an address with an immediate and a
1574 // base and a scale.
1575 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1576
1577 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1578 BaseOffset, HasBaseReg, Scale);
1579}
1580
1581namespace {
1582
1583/// IVInc - An individual increment in a Chain of IV increments.
1584/// Relate an IV user to an expression that computes the IV it uses from the IV
1585/// used by the previous link in the Chain.
1586///
1587/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1588/// original IVOperand. The head of the chain's IVOperand is only valid during
1589/// chain collection, before LSR replaces IV users. During chain generation,
1590/// IncExpr can be used to find the new IVOperand that computes the same
1591/// expression.
1592struct IVInc {
1593 Instruction *UserInst;
1594 Value* IVOperand;
1595 const SCEV *IncExpr;
1596
1597 IVInc(Instruction *U, Value *O, const SCEV *E):
1598 UserInst(U), IVOperand(O), IncExpr(E) {}
1599};
1600
1601// IVChain - The list of IV increments in program order.
1602// We typically add the head of a chain without finding subsequent links.
1603struct IVChain {
1604 SmallVector<IVInc,1> Incs;
1605 const SCEV *ExprBase;
1606
1607 IVChain() : ExprBase(nullptr) {}
1608
1609 IVChain(const IVInc &Head, const SCEV *Base)
1610 : Incs(1, Head), ExprBase(Base) {}
1611
1612 typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
1613
1614 // begin - return the first increment in the chain.
1615 const_iterator begin() const {
1616 assert(!Incs.empty())((!Incs.empty()) ? static_cast<void> (0) : __assert_fail
("!Incs.empty()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1616, __PRETTY_FUNCTION__))
;
1617 return std::next(Incs.begin());
1618 }
1619 const_iterator end() const {
1620 return Incs.end();
1621 }
1622
1623 // hasIncs - Returns true if this chain contains any increments.
1624 bool hasIncs() const { return Incs.size() >= 2; }
1625
1626 // add - Add an IVInc to the end of this chain.
1627 void add(const IVInc &X) { Incs.push_back(X); }
1628
1629 // tailUserInst - Returns the last UserInst in the chain.
1630 Instruction *tailUserInst() const { return Incs.back().UserInst; }
1631
1632 // isProfitableIncrement - Returns true if IncExpr can be profitably added to
1633 // this chain.
1634 bool isProfitableIncrement(const SCEV *OperExpr,
1635 const SCEV *IncExpr,
1636 ScalarEvolution&);
1637};
1638
1639/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
1640/// Distinguish between FarUsers that definitely cross IV increments and
1641/// NearUsers that may be used between IV increments.
1642struct ChainUsers {
1643 SmallPtrSet<Instruction*, 4> FarUsers;
1644 SmallPtrSet<Instruction*, 4> NearUsers;
1645};
1646
1647/// LSRInstance - This class holds state for the main loop strength reduction
1648/// logic.
1649class LSRInstance {
1650 IVUsers &IU;
1651 ScalarEvolution &SE;
1652 DominatorTree &DT;
1653 LoopInfo &LI;
1654 const TargetTransformInfo &TTI;
1655 Loop *const L;
1656 bool Changed;
1657
1658 /// IVIncInsertPos - This is the insert position that the current loop's
1659 /// induction variable increment should be placed. In simple loops, this is
1660 /// the latch block's terminator. But in more complicated cases, this is a
1661 /// position which will dominate all the in-loop post-increment users.
1662 Instruction *IVIncInsertPos;
1663
1664 /// Factors - Interesting factors between use strides.
1665 SmallSetVector<int64_t, 8> Factors;
1666
1667 /// Types - Interesting use types, to facilitate truncation reuse.
1668 SmallSetVector<Type *, 4> Types;
1669
1670 /// Fixups - The list of operands which are to be replaced.
1671 SmallVector<LSRFixup, 16> Fixups;
1672
1673 /// Uses - The list of interesting uses.
1674 SmallVector<LSRUse, 16> Uses;
1675
1676 /// RegUses - Track which uses use which register candidates.
1677 RegUseTracker RegUses;
1678
1679 // Limit the number of chains to avoid quadratic behavior. We don't expect to
1680 // have more than a few IV increment chains in a loop. Missing a Chain falls
1681 // back to normal LSR behavior for those uses.
1682 static const unsigned MaxChains = 8;
1683
1684 /// IVChainVec - IV users can form a chain of IV increments.
1685 SmallVector<IVChain, MaxChains> IVChainVec;
1686
1687 /// IVIncSet - IV users that belong to profitable IVChains.
1688 SmallPtrSet<Use*, MaxChains> IVIncSet;
1689
1690 void OptimizeShadowIV();
1691 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1692 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1693 void OptimizeLoopTermCond();
1694
1695 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1696 SmallVectorImpl<ChainUsers> &ChainUsersVec);
1697 void FinalizeChain(IVChain &Chain);
1698 void CollectChains();
1699 void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1700 SmallVectorImpl<WeakVH> &DeadInsts);
1701
1702 void CollectInterestingTypesAndFactors();
1703 void CollectFixupsAndInitialFormulae();
1704
1705 LSRFixup &getNewFixup() {
1706 Fixups.push_back(LSRFixup());
1707 return Fixups.back();
1708 }
1709
1710 // Support for sharing of LSRUses between LSRFixups.
1711 typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
1712 UseMapTy UseMap;
1713
1714 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
1715 LSRUse::KindType Kind, Type *AccessTy);
1716
1717 std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
1718 LSRUse::KindType Kind,
1719 Type *AccessTy);
1720
1721 void DeleteUse(LSRUse &LU, size_t LUIdx);
1722
1723 LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
1724
1725 void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1726 void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1727 void CountRegisters(const Formula &F, size_t LUIdx);
1728 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1729
1730 void CollectLoopInvariantFixupsAndFormulae();
1731
1732 void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1733 unsigned Depth = 0);
1734
1735 void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
1736 const Formula &Base, unsigned Depth,
1737 size_t Idx, bool IsScaledReg = false);
1738 void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
1739 void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1740 const Formula &Base, size_t Idx,
1741 bool IsScaledReg = false);
1742 void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1743 void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1744 const Formula &Base,
1745 const SmallVectorImpl<int64_t> &Worklist,
1746 size_t Idx, bool IsScaledReg = false);
1747 void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1748 void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1749 void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1750 void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1751 void GenerateCrossUseConstantOffsets();
1752 void GenerateAllReuseFormulae();
1753
1754 void FilterOutUndesirableDedicatedRegisters();
1755
1756 size_t EstimateSearchSpaceComplexity() const;
1757 void NarrowSearchSpaceByDetectingSupersets();
1758 void NarrowSearchSpaceByCollapsingUnrolledCode();
1759 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1760 void NarrowSearchSpaceByPickingWinnerRegs();
1761 void NarrowSearchSpaceUsingHeuristics();
1762
1763 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1764 Cost &SolutionCost,
1765 SmallVectorImpl<const Formula *> &Workspace,
1766 const Cost &CurCost,
1767 const SmallPtrSet<const SCEV *, 16> &CurRegs,
1768 DenseSet<const SCEV *> &VisitedRegs) const;
1769 void Solve(SmallVectorImpl<const Formula *> &Solution) const;
1770
1771 BasicBlock::iterator
1772 HoistInsertPosition(BasicBlock::iterator IP,
1773 const SmallVectorImpl<Instruction *> &Inputs) const;
1774 BasicBlock::iterator
1775 AdjustInsertPositionForExpand(BasicBlock::iterator IP,
1776 const LSRFixup &LF,
1777 const LSRUse &LU,
1778 SCEVExpander &Rewriter) const;
1779
1780 Value *Expand(const LSRFixup &LF,
1781 const Formula &F,
1782 BasicBlock::iterator IP,
1783 SCEVExpander &Rewriter,
1784 SmallVectorImpl<WeakVH> &DeadInsts) const;
1785 void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
1786 const Formula &F,
1787 SCEVExpander &Rewriter,
1788 SmallVectorImpl<WeakVH> &DeadInsts,
1789 Pass *P) const;
1790 void Rewrite(const LSRFixup &LF,
1791 const Formula &F,
1792 SCEVExpander &Rewriter,
1793 SmallVectorImpl<WeakVH> &DeadInsts,
1794 Pass *P) const;
1795 void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
1796 Pass *P);
1797
1798public:
1799 LSRInstance(Loop *L, Pass *P);
1800
1801 bool getChanged() const { return Changed; }
1802
1803 void print_factors_and_types(raw_ostream &OS) const;
1804 void print_fixups(raw_ostream &OS) const;
1805 void print_uses(raw_ostream &OS) const;
1806 void print(raw_ostream &OS) const;
1807 void dump() const;
1808};
1809
1810}
1811
1812/// OptimizeShadowIV - If IV is used in a int-to-float cast
1813/// inside the loop then try to eliminate the cast operation.
1814void LSRInstance::OptimizeShadowIV() {
1815 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1816 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1817 return;
1818
1819 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
1820 UI != E; /* empty */) {
1821 IVUsers::const_iterator CandidateUI = UI;
1822 ++UI;
1823 Instruction *ShadowUse = CandidateUI->getUser();
1824 Type *DestTy = nullptr;
1825 bool IsSigned = false;
1826
1827 /* If shadow use is a int->float cast then insert a second IV
1828 to eliminate this cast.
1829
1830 for (unsigned i = 0; i < n; ++i)
1831 foo((double)i);
1832
1833 is transformed into
1834
1835 double d = 0.0;
1836 for (unsigned i = 0; i < n; ++i, ++d)
1837 foo(d);
1838 */
1839 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1840 IsSigned = false;
1841 DestTy = UCast->getDestTy();
1842 }
1843 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1844 IsSigned = true;
1845 DestTy = SCast->getDestTy();
1846 }
1847 if (!DestTy) continue;
1848
1849 // If target does not support DestTy natively then do not apply
1850 // this transformation.
1851 if (!TTI.isTypeLegal(DestTy)) continue;
1852
1853 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1854 if (!PH) continue;
1855 if (PH->getNumIncomingValues() != 2) continue;
1856
1857 Type *SrcTy = PH->getType();
1858 int Mantissa = DestTy->getFPMantissaWidth();
1859 if (Mantissa == -1) continue;
1860 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
1861 continue;
1862
1863 unsigned Entry, Latch;
1864 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1865 Entry = 0;
1866 Latch = 1;
1867 } else {
1868 Entry = 1;
1869 Latch = 0;
1870 }
1871
1872 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1873 if (!Init) continue;
1874 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
1875 (double)Init->getSExtValue() :
1876 (double)Init->getZExtValue());
1877
1878 BinaryOperator *Incr =
1879 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1880 if (!Incr) continue;
1881 if (Incr->getOpcode() != Instruction::Add
1882 && Incr->getOpcode() != Instruction::Sub)
1883 continue;
1884
1885 /* Initialize new IV, double d = 0.0 in above example. */
1886 ConstantInt *C = nullptr;
1887 if (Incr->getOperand(0) == PH)
1888 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1889 else if (Incr->getOperand(1) == PH)
1890 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1891 else
1892 continue;
1893
1894 if (!C) continue;
1895
1896 // Ignore negative constants, as the code below doesn't handle them
1897 // correctly. TODO: Remove this restriction.
1898 if (!C->getValue().isStrictlyPositive()) continue;
1899
1900 /* Add new PHINode. */
1901 PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
1902
1903 /* create new increment. '++d' in above example. */
1904 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
1905 BinaryOperator *NewIncr =
1906 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1907 Instruction::FAdd : Instruction::FSub,
1908 NewPH, CFP, "IV.S.next.", Incr);
1909
1910 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1911 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1912
1913 /* Remove cast operation */
1914 ShadowUse->replaceAllUsesWith(NewPH);
1915 ShadowUse->eraseFromParent();
1916 Changed = true;
1917 break;
1918 }
1919}
1920
1921/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
1922/// set the IV user and stride information and return true, otherwise return
1923/// false.
1924bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
1925 for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
1926 if (UI->getUser() == Cond) {
1927 // NOTE: we could handle setcc instructions with multiple uses here, but
1928 // InstCombine does it as well for simple uses, it's not clear that it
1929 // occurs enough in real life to handle.
1930 CondUse = UI;
1931 return true;
1932 }
1933 return false;
1934}
1935
1936/// OptimizeMax - Rewrite the loop's terminating condition if it uses
1937/// a max computation.
1938///
1939/// This is a narrow solution to a specific, but acute, problem. For loops
1940/// like this:
1941///
1942/// i = 0;
1943/// do {
1944/// p[i] = 0.0;
1945/// } while (++i < n);
1946///
1947/// the trip count isn't just 'n', because 'n' might not be positive. And
1948/// unfortunately this can come up even for loops where the user didn't use
1949/// a C do-while loop. For example, seemingly well-behaved top-test loops
1950/// will commonly be lowered like this:
1951//
1952/// if (n > 0) {
1953/// i = 0;
1954/// do {
1955/// p[i] = 0.0;
1956/// } while (++i < n);
1957/// }
1958///
1959/// and then it's possible for subsequent optimization to obscure the if
1960/// test in such a way that indvars can't find it.
1961///
1962/// When indvars can't find the if test in loops like this, it creates a
1963/// max expression, which allows it to give the loop a canonical
1964/// induction variable:
1965///
1966/// i = 0;
1967/// max = n < 1 ? 1 : n;
1968/// do {
1969/// p[i] = 0.0;
1970/// } while (++i != max);
1971///
1972/// Canonical induction variables are necessary because the loop passes
1973/// are designed around them. The most obvious example of this is the
1974/// LoopInfo analysis, which doesn't remember trip count values. It
1975/// expects to be able to rediscover the trip count each time it is
1976/// needed, and it does this using a simple analysis that only succeeds if
1977/// the loop has a canonical induction variable.
1978///
1979/// However, when it comes time to generate code, the maximum operation
1980/// can be quite costly, especially if it's inside of an outer loop.
1981///
1982/// This function solves this problem by detecting this type of loop and
1983/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1984/// the instructions for the maximum computation.
1985///
1986ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1987 // Check that the loop matches the pattern we're looking for.
1988 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1989 Cond->getPredicate() != CmpInst::ICMP_NE)
1990 return Cond;
1991
1992 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1993 if (!Sel || !Sel->hasOneUse()) return Cond;
1994
1995 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1996 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1997 return Cond;
1998 const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
1999
2000 // Add one to the backedge-taken count to get the trip count.
2001 const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
2002 if (IterationCount != SE.getSCEV(Sel)) return Cond;
2003
2004 // Check for a max calculation that matches the pattern. There's no check
2005 // for ICMP_ULE here because the comparison would be with zero, which
2006 // isn't interesting.
2007 CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
2008 const SCEVNAryExpr *Max = nullptr;
2009 if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2010 Pred = ICmpInst::ICMP_SLE;
2011 Max = S;
2012 } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2013 Pred = ICmpInst::ICMP_SLT;
2014 Max = S;
2015 } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2016 Pred = ICmpInst::ICMP_ULT;
2017 Max = U;
2018 } else {
2019 // No match; bail.
2020 return Cond;
2021 }
2022
2023 // To handle a max with more than two operands, this optimization would
2024 // require additional checking and setup.
2025 if (Max->getNumOperands() != 2)
2026 return Cond;
2027
2028 const SCEV *MaxLHS = Max->getOperand(0);
2029 const SCEV *MaxRHS = Max->getOperand(1);
2030
2031 // ScalarEvolution canonicalizes constants to the left. For < and >, look
2032 // for a comparison with 1. For <= and >=, a comparison with zero.
2033 if (!MaxLHS ||
2034 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2035 return Cond;
2036
2037 // Check the relevant induction variable for conformance to
2038 // the pattern.
2039 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2040 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2041 if (!AR || !AR->isAffine() ||
2042 AR->getStart() != One ||
2043 AR->getStepRecurrence(SE) != One)
2044 return Cond;
2045
2046 assert(AR->getLoop() == L &&((AR->getLoop() == L && "Loop condition operand is an addrec in a different loop!"
) ? static_cast<void> (0) : __assert_fail ("AR->getLoop() == L && \"Loop condition operand is an addrec in a different loop!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2047, __PRETTY_FUNCTION__))
2047 "Loop condition operand is an addrec in a different loop!")((AR->getLoop() == L && "Loop condition operand is an addrec in a different loop!"
) ? static_cast<void> (0) : __assert_fail ("AR->getLoop() == L && \"Loop condition operand is an addrec in a different loop!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2047, __PRETTY_FUNCTION__))
;
2048
2049 // Check the right operand of the select, and remember it, as it will
2050 // be used in the new comparison instruction.
2051 Value *NewRHS = nullptr;
2052 if (ICmpInst::isTrueWhenEqual(Pred)) {
2053 // Look for n+1, and grab n.
2054 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2055 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2056 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2057 NewRHS = BO->getOperand(0);
2058 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2059 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2060 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2061 NewRHS = BO->getOperand(0);
2062 if (!NewRHS)
2063 return Cond;
2064 } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2065 NewRHS = Sel->getOperand(1);
2066 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2067 NewRHS = Sel->getOperand(2);
2068 else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2069 NewRHS = SU->getValue();
2070 else
2071 // Max doesn't match expected pattern.
2072 return Cond;
2073
2074 // Determine the new comparison opcode. It may be signed or unsigned,
2075 // and the original comparison may be either equality or inequality.
2076 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2077 Pred = CmpInst::getInversePredicate(Pred);
2078
2079 // Ok, everything looks ok to change the condition into an SLT or SGE and
2080 // delete the max calculation.
2081 ICmpInst *NewCond =
2082 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
2083
2084 // Delete the max calculation instructions.
2085 Cond->replaceAllUsesWith(NewCond);
2086 CondUse->setUser(NewCond);
2087 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2088 Cond->eraseFromParent();
2089 Sel->eraseFromParent();
2090 if (Cmp->use_empty())
2091 Cmp->eraseFromParent();
2092 return NewCond;
2093}
2094
2095/// OptimizeLoopTermCond - Change loop terminating condition to use the
2096/// postinc iv when possible.
2097void
2098LSRInstance::OptimizeLoopTermCond() {
2099 SmallPtrSet<Instruction *, 4> PostIncs;
2100
2101 BasicBlock *LatchBlock = L->getLoopLatch();
2102 SmallVector<BasicBlock*, 8> ExitingBlocks;
2103 L->getExitingBlocks(ExitingBlocks);
2104
2105 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
2106 BasicBlock *ExitingBlock = ExitingBlocks[i];
2107
2108 // Get the terminating condition for the loop if possible. If we
2109 // can, we want to change it to use a post-incremented version of its
2110 // induction variable, to allow coalescing the live ranges for the IV into
2111 // one register value.
2112
2113 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2114 if (!TermBr)
2115 continue;
2116 // FIXME: Overly conservative, termination condition could be an 'or' etc..
2117 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2118 continue;
2119
2120 // Search IVUsesByStride to find Cond's IVUse if there is one.
2121 IVStrideUse *CondUse = nullptr;
2122 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2123 if (!FindIVUserForCond(Cond, CondUse))
2124 continue;
2125
2126 // If the trip count is computed in terms of a max (due to ScalarEvolution
2127 // being unable to find a sufficient guard, for example), change the loop
2128 // comparison to use SLT or ULT instead of NE.
2129 // One consequence of doing this now is that it disrupts the count-down
2130 // optimization. That's not always a bad thing though, because in such
2131 // cases it may still be worthwhile to avoid a max.
2132 Cond = OptimizeMax(Cond, CondUse);
2133
2134 // If this exiting block dominates the latch block, it may also use
2135 // the post-inc value if it won't be shared with other uses.
2136 // Check for dominance.
2137 if (!DT.dominates(ExitingBlock, LatchBlock))
2138 continue;
2139
2140 // Conservatively avoid trying to use the post-inc value in non-latch
2141 // exits if there may be pre-inc users in intervening blocks.
2142 if (LatchBlock != ExitingBlock)
2143 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2144 // Test if the use is reachable from the exiting block. This dominator
2145 // query is a conservative approximation of reachability.
2146 if (&*UI != CondUse &&
2147 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2148 // Conservatively assume there may be reuse if the quotient of their
2149 // strides could be a legal scale.
2150 const SCEV *A = IU.getStride(*CondUse, L);
2151 const SCEV *B = IU.getStride(*UI, L);
2152 if (!A || !B) continue;
2153 if (SE.getTypeSizeInBits(A->getType()) !=
2154 SE.getTypeSizeInBits(B->getType())) {
2155 if (SE.getTypeSizeInBits(A->getType()) >
2156 SE.getTypeSizeInBits(B->getType()))
2157 B = SE.getSignExtendExpr(B, A->getType());
2158 else
2159 A = SE.getSignExtendExpr(A, B->getType());
2160 }
2161 if (const SCEVConstant *D =
2162 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
2163 const ConstantInt *C = D->getValue();
2164 // Stride of one or negative one can have reuse with non-addresses.
2165 if (C->isOne() || C->isAllOnesValue())
2166 goto decline_post_inc;
2167 // Avoid weird situations.
2168 if (C->getValue().getMinSignedBits() >= 64 ||
2169 C->getValue().isMinSignedValue())
2170 goto decline_post_inc;
2171 // Check for possible scaled-address reuse.
2172 Type *AccessTy = getAccessType(UI->getUser());
2173 int64_t Scale = C->getSExtValue();
2174 if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
2175 /*BaseOffset=*/ 0,
2176 /*HasBaseReg=*/ false, Scale))
2177 goto decline_post_inc;
2178 Scale = -Scale;
2179 if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
2180 /*BaseOffset=*/ 0,
2181 /*HasBaseReg=*/ false, Scale))
2182 goto decline_post_inc;
2183 }
2184 }
2185
2186 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n'; } } while (0)
2187 << *Cond << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n'; } } while (0)
;
2188
2189 // It's possible for the setcc instruction to be anywhere in the loop, and
2190 // possible for it to have multiple users. If it is not immediately before
2191 // the exiting block branch, move it.
2192 if (&*++BasicBlock::iterator(Cond) != TermBr) {
2193 if (Cond->hasOneUse()) {
2194 Cond->moveBefore(TermBr);
2195 } else {
2196 // Clone the terminating condition and insert into the loopend.
2197 ICmpInst *OldCond = Cond;
2198 Cond = cast<ICmpInst>(Cond->clone());
2199 Cond->setName(L->getHeader()->getName() + ".termcond");
2200 ExitingBlock->getInstList().insert(TermBr, Cond);
2201
2202 // Clone the IVUse, as the old use still exists!
2203 CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2204 TermBr->replaceUsesOfWith(OldCond, Cond);
2205 }
2206 }
2207
2208 // If we get to here, we know that we can transform the setcc instruction to
2209 // use the post-incremented version of the IV, allowing us to coalesce the
2210 // live ranges for the IV correctly.
2211 CondUse->transformToPostInc(L);
2212 Changed = true;
2213
2214 PostIncs.insert(Cond);
2215 decline_post_inc:;
2216 }
2217
2218 // Determine an insertion point for the loop induction variable increment. It
2219 // must dominate all the post-inc comparisons we just set up, and it must
2220 // dominate the loop latch edge.
2221 IVIncInsertPos = L->getLoopLatch()->getTerminator();
2222 for (Instruction *Inst : PostIncs) {
2223 BasicBlock *BB =
2224 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
2225 Inst->getParent());
2226 if (BB == Inst->getParent())
2227 IVIncInsertPos = Inst;
2228 else if (BB != IVIncInsertPos->getParent())
2229 IVIncInsertPos = BB->getTerminator();
2230 }
2231}
2232
2233/// reconcileNewOffset - Determine if the given use can accommodate a fixup
2234/// at the given offset and other details. If so, update the use and
2235/// return true.
2236bool
2237LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
2238 LSRUse::KindType Kind, Type *AccessTy) {
2239 int64_t NewMinOffset = LU.MinOffset;
2240 int64_t NewMaxOffset = LU.MaxOffset;
2241 Type *NewAccessTy = AccessTy;
2242
2243 // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2244 // something conservative, however this can pessimize in the case that one of
2245 // the uses will have all its uses outside the loop, for example.
2246 if (LU.Kind != Kind)
2247 return false;
2248
2249 // Check for a mismatched access type, and fall back conservatively as needed.
2250 // TODO: Be less conservative when the type is similar and can use the same
2251 // addressing modes.
2252 if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
2253 NewAccessTy = Type::getVoidTy(AccessTy->getContext());
2254
2255 // Conservatively assume HasBaseReg is true for now.
2256 if (NewOffset < LU.MinOffset) {
2257 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2258 LU.MaxOffset - NewOffset, HasBaseReg))
2259 return false;
2260 NewMinOffset = NewOffset;
2261 } else if (NewOffset > LU.MaxOffset) {
2262 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2263 NewOffset - LU.MinOffset, HasBaseReg))
2264 return false;
2265 NewMaxOffset = NewOffset;
2266 }
2267
2268 // Update the use.
2269 LU.MinOffset = NewMinOffset;
2270 LU.MaxOffset = NewMaxOffset;
2271 LU.AccessTy = NewAccessTy;
2272 if (NewOffset != LU.Offsets.back())
2273 LU.Offsets.push_back(NewOffset);
2274 return true;
2275}
2276
2277/// getUse - Return an LSRUse index and an offset value for a fixup which
2278/// needs the given expression, with the given kind and optional access type.
2279/// Either reuse an existing use or create a new one, as needed.
2280std::pair<size_t, int64_t>
2281LSRInstance::getUse(const SCEV *&Expr,
2282 LSRUse::KindType Kind, Type *AccessTy) {
2283 const SCEV *Copy = Expr;
2284 int64_t Offset = ExtractImmediate(Expr, SE);
2285
2286 // Basic uses can't accept any offset, for example.
2287 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2288 Offset, /*HasBaseReg=*/ true)) {
2289 Expr = Copy;
2290 Offset = 0;
2291 }
2292
2293 std::pair<UseMapTy::iterator, bool> P =
2294 UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2295 if (!P.second) {
2296 // A use already existed with this base.
2297 size_t LUIdx = P.first->second;
2298 LSRUse &LU = Uses[LUIdx];
2299 if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
2300 // Reuse this use.
2301 return std::make_pair(LUIdx, Offset);
2302 }
2303
2304 // Create a new use.
2305 size_t LUIdx = Uses.size();
2306 P.first->second = LUIdx;
2307 Uses.push_back(LSRUse(Kind, AccessTy));
2308 LSRUse &LU = Uses[LUIdx];
2309
2310 // We don't need to track redundant offsets, but we don't need to go out
2311 // of our way here to avoid them.
2312 if (LU.Offsets.empty() || Offset != LU.Offsets.back())
2313 LU.Offsets.push_back(Offset);
2314
2315 LU.MinOffset = Offset;
2316 LU.MaxOffset = Offset;
2317 return std::make_pair(LUIdx, Offset);
2318}
2319
2320/// DeleteUse - Delete the given use from the Uses list.
2321void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2322 if (&LU != &Uses.back())
2323 std::swap(LU, Uses.back());
2324 Uses.pop_back();
2325
2326 // Update RegUses.
2327 RegUses.SwapAndDropUse(LUIdx, Uses.size());
2328}
2329
2330/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
2331/// a formula that has the same registers as the given formula.
2332LSRUse *
2333LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2334 const LSRUse &OrigLU) {
2335 // Search all uses for the formula. This could be more clever.
2336 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2337 LSRUse &LU = Uses[LUIdx];
2338 // Check whether this use is close enough to OrigLU, to see whether it's
2339 // worthwhile looking through its formulae.
2340 // Ignore ICmpZero uses because they may contain formulae generated by
2341 // GenerateICmpZeroScales, in which case adding fixup offsets may
2342 // be invalid.
2343 if (&LU != &OrigLU &&
2344 LU.Kind != LSRUse::ICmpZero &&
2345 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2346 LU.WidestFixupType == OrigLU.WidestFixupType &&
2347 LU.HasFormulaWithSameRegs(OrigF)) {
2348 // Scan through this use's formulae.
2349 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
2350 E = LU.Formulae.end(); I != E; ++I) {
2351 const Formula &F = *I;
2352 // Check to see if this formula has the same registers and symbols
2353 // as OrigF.
2354 if (F.BaseRegs == OrigF.BaseRegs &&
2355 F.ScaledReg == OrigF.ScaledReg &&
2356 F.BaseGV == OrigF.BaseGV &&
2357 F.Scale == OrigF.Scale &&
2358 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2359 if (F.BaseOffset == 0)
2360 return &LU;
2361 // This is the formula where all the registers and symbols matched;
2362 // there aren't going to be any others. Since we declined it, we
2363 // can skip the rest of the formulae and proceed to the next LSRUse.
2364 break;
2365 }
2366 }
2367 }
2368 }
2369
2370 // Nothing looked good.
2371 return nullptr;
2372}
2373
2374void LSRInstance::CollectInterestingTypesAndFactors() {
2375 SmallSetVector<const SCEV *, 4> Strides;
2376
2377 // Collect interesting types and strides.
2378 SmallVector<const SCEV *, 4> Worklist;
2379 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
2380 const SCEV *Expr = IU.getExpr(*UI);
2381
2382 // Collect interesting types.
2383 Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2384
2385 // Add strides for mentioned loops.
2386 Worklist.push_back(Expr);
2387 do {
2388 const SCEV *S = Worklist.pop_back_val();
2389 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2390 if (AR->getLoop() == L)
2391 Strides.insert(AR->getStepRecurrence(SE));
2392 Worklist.push_back(AR->getStart());
2393 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2394 Worklist.append(Add->op_begin(), Add->op_end());
2395 }
2396 } while (!Worklist.empty());
2397 }
2398
2399 // Compute interesting factors from the set of interesting strides.
2400 for (SmallSetVector<const SCEV *, 4>::const_iterator
2401 I = Strides.begin(), E = Strides.end(); I != E; ++I)
2402 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2403 std::next(I); NewStrideIter != E; ++NewStrideIter) {
2404 const SCEV *OldStride = *I;
2405 const SCEV *NewStride = *NewStrideIter;
2406
2407 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2408 SE.getTypeSizeInBits(NewStride->getType())) {
2409 if (SE.getTypeSizeInBits(OldStride->getType()) >
2410 SE.getTypeSizeInBits(NewStride->getType()))
2411 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2412 else
2413 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2414 }
2415 if (const SCEVConstant *Factor =
2416 dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2417 SE, true))) {
2418 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2419 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2420 } else if (const SCEVConstant *Factor =
2421 dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2422 NewStride,
2423 SE, true))) {
2424 if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
2425 Factors.insert(Factor->getValue()->getValue().getSExtValue());
2426 }
2427 }
2428
2429 // If all uses use the same type, don't bother looking for truncation-based
2430 // reuse.
2431 if (Types.size() == 1)
2432 Types.clear();
2433
2434 DEBUG(print_factors_and_types(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { print_factors_and_types(dbgs()); } } while
(0)
;
2435}
2436
2437/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
2438/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
2439/// Instructions to IVStrideUses, we could partially skip this.
2440static User::op_iterator
2441findIVOperand(User::op_iterator OI, User::op_iterator OE,
2442 Loop *L, ScalarEvolution &SE) {
2443 for(; OI != OE; ++OI) {
2444 if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2445 if (!SE.isSCEVable(Oper->getType()))
2446 continue;
2447
2448 if (const SCEVAddRecExpr *AR =
2449 dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2450 if (AR->getLoop() == L)
2451 break;
2452 }
2453 }
2454 }
2455 return OI;
2456}
2457
2458/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
2459/// operands, so wrap it in a convenient helper.
2460static Value *getWideOperand(Value *Oper) {
2461 if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2462 return Trunc->getOperand(0);
2463 return Oper;
2464}
2465
2466/// isCompatibleIVType - Return true if we allow an IV chain to include both
2467/// types.
2468static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2469 Type *LType = LVal->getType();
2470 Type *RType = RVal->getType();
2471 return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
2472}
2473
2474/// getExprBase - Return an approximation of this SCEV expression's "base", or
2475/// NULL for any constant. Returning the expression itself is
2476/// conservative. Returning a deeper subexpression is more precise and valid as
2477/// long as it isn't less complex than another subexpression. For expressions
2478/// involving multiple unscaled values, we need to return the pointer-type
2479/// SCEVUnknown. This avoids forming chains across objects, such as:
2480/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
2481///
2482/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2483/// SCEVUnknown, we simply return the rightmost SCEV operand.
2484static const SCEV *getExprBase(const SCEV *S) {
2485 switch (S->getSCEVType()) {
2486 default: // uncluding scUnknown.
2487 return S;
2488 case scConstant:
2489 return nullptr;
2490 case scTruncate:
2491 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2492 case scZeroExtend:
2493 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2494 case scSignExtend:
2495 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2496 case scAddExpr: {
2497 // Skip over scaled operands (scMulExpr) to follow add operands as long as
2498 // there's nothing more complex.
2499 // FIXME: not sure if we want to recognize negation.
2500 const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2501 for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2502 E(Add->op_begin()); I != E; ++I) {
2503 const SCEV *SubExpr = *I;
2504 if (SubExpr->getSCEVType() == scAddExpr)
2505 return getExprBase(SubExpr);
2506
2507 if (SubExpr->getSCEVType() != scMulExpr)
2508 return SubExpr;
2509 }
2510 return S; // all operands are scaled, be conservative.
2511 }
2512 case scAddRecExpr:
2513 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2514 }
2515}
2516
2517/// Return true if the chain increment is profitable to expand into a loop
2518/// invariant value, which may require its own register. A profitable chain
2519/// increment will be an offset relative to the same base. We allow such offsets
2520/// to potentially be used as chain increment as long as it's not obviously
2521/// expensive to expand using real instructions.
2522bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
2523 const SCEV *IncExpr,
2524 ScalarEvolution &SE) {
2525 // Aggressively form chains when -stress-ivchain.
2526 if (StressIVChain)
2527 return true;
2528
2529 // Do not replace a constant offset from IV head with a nonconstant IV
2530 // increment.
2531 if (!isa<SCEVConstant>(IncExpr)) {
2532 const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
2533 if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2534 return 0;
2535 }
2536
2537 SmallPtrSet<const SCEV*, 8> Processed;
2538 return !isHighCostExpansion(IncExpr, Processed, SE);
2539}
2540
2541/// Return true if the number of registers needed for the chain is estimated to
2542/// be less than the number required for the individual IV users. First prohibit
2543/// any IV users that keep the IV live across increments (the Users set should
2544/// be empty). Next count the number and type of increments in the chain.
2545///
2546/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2547/// effectively use postinc addressing modes. Only consider it profitable it the
2548/// increments can be computed in fewer registers when chained.
2549///
2550/// TODO: Consider IVInc free if it's already used in another chains.
2551static bool
2552isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
2553 ScalarEvolution &SE, const TargetTransformInfo &TTI) {
2554 if (StressIVChain)
2555 return true;
2556
2557 if (!Chain.hasIncs())
2558 return false;
2559
2560 if (!Users.empty()) {
2561 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (0)
2562 for (Instruction *Inst : Users) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (0)
2563 dbgs() << " " << *Inst << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (0)
2564 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (0)
;
2565 return false;
2566 }
2567 assert(!Chain.Incs.empty() && "empty IV chains are not allowed")((!Chain.Incs.empty() && "empty IV chains are not allowed"
) ? static_cast<void> (0) : __assert_fail ("!Chain.Incs.empty() && \"empty IV chains are not allowed\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2567, __PRETTY_FUNCTION__))
;
2568
2569 // The chain itself may require a register, so intialize cost to 1.
2570 int cost = 1;
2571
2572 // A complete chain likely eliminates the need for keeping the original IV in
2573 // a register. LSR does not currently know how to form a complete chain unless
2574 // the header phi already exists.
2575 if (isa<PHINode>(Chain.tailUserInst())
2576 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2577 --cost;
2578 }
2579 const SCEV *LastIncExpr = nullptr;
2580 unsigned NumConstIncrements = 0;
2581 unsigned NumVarIncrements = 0;
2582 unsigned NumReusedIncrements = 0;
2583 for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
2584 I != E; ++I) {
2585
2586 if (I->IncExpr->isZero())
2587 continue;
2588
2589 // Incrementing by zero or some constant is neutral. We assume constants can
2590 // be folded into an addressing mode or an add's immediate operand.
2591 if (isa<SCEVConstant>(I->IncExpr)) {
2592 ++NumConstIncrements;
2593 continue;
2594 }
2595
2596 if (I->IncExpr == LastIncExpr)
2597 ++NumReusedIncrements;
2598 else
2599 ++NumVarIncrements;
2600
2601 LastIncExpr = I->IncExpr;
2602 }
2603 // An IV chain with a single increment is handled by LSR's postinc
2604 // uses. However, a chain with multiple increments requires keeping the IV's
2605 // value live longer than it needs to be if chained.
2606 if (NumConstIncrements > 1)
2607 --cost;
2608
2609 // Materializing increment expressions in the preheader that didn't exist in
2610 // the original code may cost a register. For example, sign-extended array
2611 // indices can produce ridiculous increments like this:
2612 // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2613 cost += NumVarIncrements;
2614
2615 // Reusing variable increments likely saves a register to hold the multiple of
2616 // the stride.
2617 cost -= NumReusedIncrements;
2618
2619 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " Cost: " << cost << "\n"
; } } while (0)
2620 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " Cost: " << cost << "\n"
; } } while (0)
;
2621
2622 return cost < 0;
2623}
2624
2625/// ChainInstruction - Add this IV user to an existing chain or make it the head
2626/// of a new chain.
2627void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2628 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2629 // When IVs are used as types of varying widths, they are generally converted
2630 // to a wider type with some uses remaining narrow under a (free) trunc.
2631 Value *const NextIV = getWideOperand(IVOper);
2632 const SCEV *const OperExpr = SE.getSCEV(NextIV);
2633 const SCEV *const OperExprBase = getExprBase(OperExpr);
2634
2635 // Visit all existing chains. Check if its IVOper can be computed as a
2636 // profitable loop invariant increment from the last link in the Chain.
2637 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2638 const SCEV *LastIncExpr = nullptr;
2639 for (; ChainIdx < NChains; ++ChainIdx) {
2640 IVChain &Chain = IVChainVec[ChainIdx];
2641
2642 // Prune the solution space aggressively by checking that both IV operands
2643 // are expressions that operate on the same unscaled SCEVUnknown. This
2644 // "base" will be canceled by the subsequent getMinusSCEV call. Checking
2645 // first avoids creating extra SCEV expressions.
2646 if (!StressIVChain && Chain.ExprBase != OperExprBase)
2647 continue;
2648
2649 Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
2650 if (!isCompatibleIVType(PrevIV, NextIV))
2651 continue;
2652
2653 // A phi node terminates a chain.
2654 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2655 continue;
2656
2657 // The increment must be loop-invariant so it can be kept in a register.
2658 const SCEV *PrevExpr = SE.getSCEV(PrevIV);
2659 const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
2660 if (!SE.isLoopInvariant(IncExpr, L))
2661 continue;
2662
2663 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2664 LastIncExpr = IncExpr;
2665 break;
2666 }
2667 }
2668 // If we haven't found a chain, create a new one, unless we hit the max. Don't
2669 // bother for phi nodes, because they must be last in the chain.
2670 if (ChainIdx == NChains) {
2671 if (isa<PHINode>(UserInst))
2672 return;
2673 if (NChains >= MaxChains && !StressIVChain) {
2674 DEBUG(dbgs() << "IV Chain Limit\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain Limit\n"; } } while
(0)
;
2675 return;
2676 }
2677 LastIncExpr = OperExpr;
2678 // IVUsers may have skipped over sign/zero extensions. We don't currently
2679 // attempt to form chains involving extensions unless they can be hoisted
2680 // into this loop's AddRec.
2681 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2682 return;
2683 ++NChains;
2684 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2685 OperExprBase));
2686 ChainUsersVec.resize(NChains);
2687 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Head: (" << *UserInst << ") IV=" <<
*LastIncExpr << "\n"; } } while (0)
2688 << ") IV=" << *LastIncExpr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Head: (" << *UserInst << ") IV=" <<
*LastIncExpr << "\n"; } } while (0)
;
2689 } else {
2690 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Inc: (" << *UserInst << ") IV+" <<
*LastIncExpr << "\n"; } } while (0)
2691 << ") IV+" << *LastIncExpr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Inc: (" << *UserInst << ") IV+" <<
*LastIncExpr << "\n"; } } while (0)
;
2692 // Add this IV user to the end of the chain.
2693 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2694 }
2695 IVChain &Chain = IVChainVec[ChainIdx];
2696
2697 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2698 // This chain's NearUsers become FarUsers.
2699 if (!LastIncExpr->isZero()) {
2700 ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2701 NearUsers.end());
2702 NearUsers.clear();
2703 }
2704
2705 // All other uses of IVOperand become near uses of the chain.
2706 // We currently ignore intermediate values within SCEV expressions, assuming
2707 // they will eventually be used be the current chain, or can be computed
2708 // from one of the chain increments. To be more precise we could
2709 // transitively follow its user and only add leaf IV users to the set.
2710 for (User *U : IVOper->users()) {
2711 Instruction *OtherUse = dyn_cast<Instruction>(U);
2712 if (!OtherUse)
2713 continue;
2714 // Uses in the chain will no longer be uses if the chain is formed.
2715 // Include the head of the chain in this iteration (not Chain.begin()).
2716 IVChain::const_iterator IncIter = Chain.Incs.begin();
2717 IVChain::const_iterator IncEnd = Chain.Incs.end();
2718 for( ; IncIter != IncEnd; ++IncIter) {
2719 if (IncIter->UserInst == OtherUse)
2720 break;
2721 }
2722 if (IncIter != IncEnd)
2723 continue;
2724
2725 if (SE.isSCEVable(OtherUse->getType())
2726 && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2727 && IU.isIVUserOrOperand(OtherUse)) {
2728 continue;
2729 }
2730 NearUsers.insert(OtherUse);
2731 }
2732
2733 // Since this user is part of the chain, it's no longer considered a use
2734 // of the chain.
2735 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2736}
2737
2738/// CollectChains - Populate the vector of Chains.
2739///
2740/// This decreases ILP at the architecture level. Targets with ample registers,
2741/// multiple memory ports, and no register renaming probably don't want
2742/// this. However, such targets should probably disable LSR altogether.
2743///
2744/// The job of LSR is to make a reasonable choice of induction variables across
2745/// the loop. Subsequent passes can easily "unchain" computation exposing more
2746/// ILP *within the loop* if the target wants it.
2747///
2748/// Finding the best IV chain is potentially a scheduling problem. Since LSR
2749/// will not reorder memory operations, it will recognize this as a chain, but
2750/// will generate redundant IV increments. Ideally this would be corrected later
2751/// by a smart scheduler:
2752/// = A[i]
2753/// = A[i+x]
2754/// A[i] =
2755/// A[i+x] =
2756///
2757/// TODO: Walk the entire domtree within this loop, not just the path to the
2758/// loop latch. This will discover chains on side paths, but requires
2759/// maintaining multiple copies of the Chains state.
2760void LSRInstance::CollectChains() {
2761 DEBUG(dbgs() << "Collecting IV Chains.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Collecting IV Chains.\n";
} } while (0)
;
2762 SmallVector<ChainUsers, 8> ChainUsersVec;
2763
2764 SmallVector<BasicBlock *,8> LatchPath;
2765 BasicBlock *LoopHeader = L->getHeader();
2766 for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
2767 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2768 LatchPath.push_back(Rung->getBlock());
2769 }
2770 LatchPath.push_back(LoopHeader);
2771
2772 // Walk the instruction stream from the loop header to the loop latch.
2773 for (SmallVectorImpl<BasicBlock *>::reverse_iterator
2774 BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
2775 BBIter != BBEnd; ++BBIter) {
2776 for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
2777 I != E; ++I) {
2778 // Skip instructions that weren't seen by IVUsers analysis.
2779 if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
2780 continue;
2781
2782 // Ignore users that are part of a SCEV expression. This way we only
2783 // consider leaf IV Users. This effectively rediscovers a portion of
2784 // IVUsers analysis but in program order this time.
2785 if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
2786 continue;
2787
2788 // Remove this instruction from any NearUsers set it may be in.
2789 for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
2790 ChainIdx < NChains; ++ChainIdx) {
2791 ChainUsersVec[ChainIdx].NearUsers.erase(I);
2792 }
2793 // Search for operands that can be chained.
2794 SmallPtrSet<Instruction*, 4> UniqueOperands;
2795 User::op_iterator IVOpEnd = I->op_end();
2796 User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
2797 while (IVOpIter != IVOpEnd) {
2798 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2799 if (UniqueOperands.insert(IVOpInst).second)
2800 ChainInstruction(I, IVOpInst, ChainUsersVec);
2801 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2802 }
2803 } // Continue walking down the instructions.
2804 } // Continue walking down the domtree.
2805 // Visit phi backedges to determine if the chain can generate the IV postinc.
2806 for (BasicBlock::iterator I = L->getHeader()->begin();
2807 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
2808 if (!SE.isSCEVable(PN->getType()))
2809 continue;
2810
2811 Instruction *IncV =
2812 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
2813 if (IncV)
2814 ChainInstruction(PN, IncV, ChainUsersVec);
2815 }
2816 // Remove any unprofitable chains.
2817 unsigned ChainIdx = 0;
2818 for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
2819 UsersIdx < NChains; ++UsersIdx) {
2820 if (!isProfitableChain(IVChainVec[UsersIdx],
2821 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
2822 continue;
2823 // Preserve the chain at UsesIdx.
2824 if (ChainIdx != UsersIdx)
2825 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2826 FinalizeChain(IVChainVec[ChainIdx]);
2827 ++ChainIdx;
2828 }
2829 IVChainVec.resize(ChainIdx);
2830}
2831
2832void LSRInstance::FinalizeChain(IVChain &Chain) {
2833 assert(!Chain.Incs.empty() && "empty IV chains are not allowed")((!Chain.Incs.empty() && "empty IV chains are not allowed"
) ? static_cast<void> (0) : __assert_fail ("!Chain.Incs.empty() && \"empty IV chains are not allowed\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2833, __PRETTY_FUNCTION__))
;
2834 DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Final Chain: " << *
Chain.Incs[0].UserInst << "\n"; } } while (0)
;
2835
2836 for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
2837 I != E; ++I) {
2838 DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Inc: " << *
I->UserInst << "\n"; } } while (0)
;
2839 User::op_iterator UseI =
2840 std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
2841 assert(UseI != I->UserInst->op_end() && "cannot find IV operand")((UseI != I->UserInst->op_end() && "cannot find IV operand"
) ? static_cast<void> (0) : __assert_fail ("UseI != I->UserInst->op_end() && \"cannot find IV operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2841, __PRETTY_FUNCTION__))
;
2842 IVIncSet.insert(UseI);
2843 }
2844}
2845
2846/// Return true if the IVInc can be folded into an addressing mode.
2847static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
2848 Value *Operand, const TargetTransformInfo &TTI) {
2849 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
2850 if (!IncConst || !isAddressUse(UserInst, Operand))
2851 return false;
2852
2853 if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
2854 return false;
2855
2856 int64_t IncOffset = IncConst->getValue()->getSExtValue();
2857 if (!isAlwaysFoldable(TTI, LSRUse::Address,
2858 getAccessType(UserInst), /*BaseGV=*/ nullptr,
2859 IncOffset, /*HaseBaseReg=*/ false))
2860 return false;
2861
2862 return true;
2863}
2864
2865/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
2866/// materialize the IV user's operand from the previous IV user's operand.
2867void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
2868 SmallVectorImpl<WeakVH> &DeadInsts) {
2869 // Find the new IVOperand for the head of the chain. It may have been replaced
2870 // by LSR.
2871 const IVInc &Head = Chain.Incs[0];
2872 User::op_iterator IVOpEnd = Head.UserInst->op_end();
2873 // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
2874 User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
2875 IVOpEnd, L, SE);
2876 Value *IVSrc = nullptr;
2877 while (IVOpIter != IVOpEnd) {
2878 IVSrc = getWideOperand(*IVOpIter);
2879
2880 // If this operand computes the expression that the chain needs, we may use
2881 // it. (Check this after setting IVSrc which is used below.)
2882 //
2883 // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
2884 // narrow for the chain, so we can no longer use it. We do allow using a
2885 // wider phi, assuming the LSR checked for free truncation. In that case we
2886 // should already have a truncate on this operand such that
2887 // getSCEV(IVSrc) == IncExpr.
2888 if (SE.getSCEV(*IVOpIter) == Head.IncExpr
2889 || SE.getSCEV(IVSrc) == Head.IncExpr) {
2890 break;
2891 }
2892 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2893 }
2894 if (IVOpIter == IVOpEnd) {
2895 // Gracefully give up on this chain.
2896 DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Concealed chain head: " <<
*Head.UserInst << "\n"; } } while (0)
;
2897 return;
2898 }
2899
2900 DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generate chain at: " <<
*IVSrc << "\n"; } } while (0)
;
2901 Type *IVTy = IVSrc->getType();
2902 Type *IntTy = SE.getEffectiveSCEVType(IVTy);
2903 const SCEV *LeftOverExpr = nullptr;
2904 for (IVChain::const_iterator IncI = Chain.begin(),
2905 IncE = Chain.end(); IncI != IncE; ++IncI) {
2906
2907 Instruction *InsertPt = IncI->UserInst;
2908 if (isa<PHINode>(InsertPt))
2909 InsertPt = L->getLoopLatch()->getTerminator();
2910
2911 // IVOper will replace the current IV User's operand. IVSrc is the IV
2912 // value currently held in a register.
2913 Value *IVOper = IVSrc;
2914 if (!IncI->IncExpr->isZero()) {
2915 // IncExpr was the result of subtraction of two narrow values, so must
2916 // be signed.
2917 const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
2918 LeftOverExpr = LeftOverExpr ?
2919 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
2920 }
2921 if (LeftOverExpr && !LeftOverExpr->isZero()) {
2922 // Expand the IV increment.
2923 Rewriter.clearPostInc();
2924 Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
2925 const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
2926 SE.getUnknown(IncV));
2927 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
2928
2929 // If an IV increment can't be folded, use it as the next IV value.
2930 if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
2931 TTI)) {
2932 assert(IVTy == IVOper->getType() && "inconsistent IV increment type")((IVTy == IVOper->getType() && "inconsistent IV increment type"
) ? static_cast<void> (0) : __assert_fail ("IVTy == IVOper->getType() && \"inconsistent IV increment type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2932, __PRETTY_FUNCTION__))
;
2933 IVSrc = IVOper;
2934 LeftOverExpr = nullptr;
2935 }
2936 }
2937 Type *OperTy = IncI->IVOperand->getType();
2938 if (IVTy != OperTy) {
2939 assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&((SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy
) && "cannot extend a chained IV") ? static_cast<void
> (0) : __assert_fail ("SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && \"cannot extend a chained IV\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2940, __PRETTY_FUNCTION__))
2940 "cannot extend a chained IV")((SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy
) && "cannot extend a chained IV") ? static_cast<void
> (0) : __assert_fail ("SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && \"cannot extend a chained IV\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2940, __PRETTY_FUNCTION__))
;
2941 IRBuilder<> Builder(InsertPt);
2942 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
2943 }
2944 IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
2945 DeadInsts.push_back(IncI->IVOperand);
2946 }
2947 // If LSR created a new, wider phi, we may also replace its postinc. We only
2948 // do this if we also found a wide value for the head of the chain.
2949 if (isa<PHINode>(Chain.tailUserInst())) {
2950 for (BasicBlock::iterator I = L->getHeader()->begin();
2951 PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
2952 if (!isCompatibleIVType(Phi, IVSrc))
2953 continue;
2954 Instruction *PostIncV = dyn_cast<Instruction>(
2955 Phi->getIncomingValueForBlock(L->getLoopLatch()));
2956 if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
2957 continue;
2958 Value *IVOper = IVSrc;
2959 Type *PostIncTy = PostIncV->getType();
2960 if (IVTy != PostIncTy) {
2961 assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types")((PostIncTy->isPointerTy() && "mixing int/ptr IV types"
) ? static_cast<void> (0) : __assert_fail ("PostIncTy->isPointerTy() && \"mixing int/ptr IV types\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2961, __PRETTY_FUNCTION__))
;
2962 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
2963 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
2964 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
2965 }
2966 Phi->replaceUsesOfWith(PostIncV, IVOper);
2967 DeadInsts.push_back(PostIncV);
2968 }
2969 }
2970}
2971
2972void LSRInstance::CollectFixupsAndInitialFormulae() {
2973 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
2974 Instruction *UserInst = UI->getUser();
2975 // Skip IV users that are part of profitable IV Chains.
2976 User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
2977 UI->getOperandValToReplace());
2978 assert(UseI != UserInst->op_end() && "cannot find IV operand")((UseI != UserInst->op_end() && "cannot find IV operand"
) ? static_cast<void> (0) : __assert_fail ("UseI != UserInst->op_end() && \"cannot find IV operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2978, __PRETTY_FUNCTION__))
;
2979 if (IVIncSet.count(UseI))
2980 continue;
2981
2982 // Record the uses.
2983 LSRFixup &LF = getNewFixup();
2984 LF.UserInst = UserInst;
2985 LF.OperandValToReplace = UI->getOperandValToReplace();
2986 LF.PostIncLoops = UI->getPostIncLoops();
2987
2988 LSRUse::KindType Kind = LSRUse::Basic;
2989 Type *AccessTy = nullptr;
2990 if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
2991 Kind = LSRUse::Address;
2992 AccessTy = getAccessType(LF.UserInst);
2993 }
2994
2995 const SCEV *S = IU.getExpr(*UI);
2996
2997 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2998 // (N - i == 0), and this allows (N - i) to be the expression that we work
2999 // with rather than just N or i, so we can consider the register
3000 // requirements for both N and i at the same time. Limiting this code to
3001 // equality icmps is not a problem because all interesting loops use
3002 // equality icmps, thanks to IndVarSimplify.
3003 if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
3004 if (CI->isEquality()) {
3005 // Swap the operands if needed to put the OperandValToReplace on the
3006 // left, for consistency.
3007 Value *NV = CI->getOperand(1);
3008 if (NV == LF.OperandValToReplace) {
3009 CI->setOperand(1, CI->getOperand(0));
3010 CI->setOperand(0, NV);
3011 NV = CI->getOperand(1);
3012 Changed = true;
3013 }
3014
3015 // x == y --> x - y == 0
3016 const SCEV *N = SE.getSCEV(NV);
3017 if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
3018 // S is normalized, so normalize N before folding it into S
3019 // to keep the result normalized.
3020 N = TransformForPostIncUse(Normalize, N, CI, nullptr,
3021 LF.PostIncLoops, SE, DT);
3022 Kind = LSRUse::ICmpZero;
3023 S = SE.getMinusSCEV(N, S);
3024 }
3025
3026 // -1 and the negations of all interesting strides (except the negation
3027 // of -1) are now also interesting.
3028 for (size_t i = 0, e = Factors.size(); i != e; ++i)
3029 if (Factors[i] != -1)
3030 Factors.insert(-(uint64_t)Factors[i]);
3031 Factors.insert(-1);
3032 }
3033
3034 // Set up the initial formula for this use.
3035 std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3036 LF.LUIdx = P.first;
3037 LF.Offset = P.second;
3038 LSRUse &LU = Uses[LF.LUIdx];
3039 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3040 if (!LU.WidestFixupType ||
3041 SE.getTypeSizeInBits(LU.WidestFixupType) <
3042 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3043 LU.WidestFixupType = LF.OperandValToReplace->getType();
3044
3045 // If this is the first use of this LSRUse, give it a formula.
3046 if (LU.Formulae.empty()) {
3047 InsertInitialFormula(S, LU, LF.LUIdx);
3048 CountRegisters(LU.Formulae.back(), LF.LUIdx);
3049 }
3050 }
3051
3052 DEBUG(print_fixups(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { print_fixups(dbgs()); } } while (0)
;
3053}
3054
3055/// InsertInitialFormula - Insert a formula for the given expression into
3056/// the given use, separating out loop-variant portions from loop-invariant
3057/// and loop-computable portions.
3058void
3059LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
3060 // Mark uses whose expressions cannot be expanded.
3061 if (!isSafeToExpand(S, SE))
3062 LU.RigidFormula = true;
3063
3064 Formula F;
3065 F.InitialMatch(S, L, SE);
3066 bool Inserted = InsertFormula(LU, LUIdx, F);
3067 assert(Inserted && "Initial formula already exists!")((Inserted && "Initial formula already exists!") ? static_cast
<void> (0) : __assert_fail ("Inserted && \"Initial formula already exists!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3067, __PRETTY_FUNCTION__))
; (void)Inserted;
3068}
3069
3070/// InsertSupplementalFormula - Insert a simple single-register formula for
3071/// the given expression into the given use.
3072void
3073LSRInstance::InsertSupplementalFormula(const SCEV *S,
3074 LSRUse &LU, size_t LUIdx) {
3075 Formula F;
3076 F.BaseRegs.push_back(S);
3077 F.HasBaseReg = true;
3078 bool Inserted = InsertFormula(LU, LUIdx, F);
3079 assert(Inserted && "Supplemental formula already exists!")((Inserted && "Supplemental formula already exists!")
? static_cast<void> (0) : __assert_fail ("Inserted && \"Supplemental formula already exists!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3079, __PRETTY_FUNCTION__))
; (void)Inserted;
3080}
3081
3082/// CountRegisters - Note which registers are used by the given formula,
3083/// updating RegUses.
3084void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3085 if (F.ScaledReg)
3086 RegUses.CountRegister(F.ScaledReg, LUIdx);
3087 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
3088 E = F.BaseRegs.end(); I != E; ++I)
3089 RegUses.CountRegister(*I, LUIdx);
3090}
3091
3092/// InsertFormula - If the given formula has not yet been inserted, add it to
3093/// the list, and return true. Return false otherwise.
3094bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3095 // Do not insert formula that we will not be able to expand.
3096 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Formula is illegal") ? static_cast<void>
(0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Formula is illegal\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3097, __PRETTY_FUNCTION__))
3097 "Formula is illegal")((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Formula is illegal") ? static_cast<void>
(0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Formula is illegal\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3097, __PRETTY_FUNCTION__))
;
3098 if (!LU.InsertFormula(F))
3099 return false;
3100
3101 CountRegisters(F, LUIdx);
3102 return true;
3103}
3104
3105/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
3106/// loop-invariant values which we're tracking. These other uses will pin these
3107/// values in registers, making them less profitable for elimination.
3108/// TODO: This currently misses non-constant addrec step registers.
3109/// TODO: Should this give more weight to users inside the loop?
3110void
3111LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3112 SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
3113 SmallPtrSet<const SCEV *, 32> Visited;
3114
3115 while (!Worklist.empty()) {
3116 const SCEV *S = Worklist.pop_back_val();
3117
3118 // Don't process the same SCEV twice
3119 if (!Visited.insert(S).second)
3120 continue;
3121
3122 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3123 Worklist.append(N->op_begin(), N->op_end());
3124 else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3125 Worklist.push_back(C->getOperand());
3126 else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3127 Worklist.push_back(D->getLHS());
3128 Worklist.push_back(D->getRHS());
3129 } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3130 const Value *V = US->getValue();
3131 if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3132 // Look for instructions defined outside the loop.
3133 if (L->contains(Inst)) continue;
3134 } else if (isa<UndefValue>(V))
3135 // Undef doesn't have a live range, so it doesn't matter.
3136 continue;
3137 for (const Use &U : V->uses()) {
3138 const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3139 // Ignore non-instructions.
3140 if (!UserInst)
3141 continue;
3142 // Ignore instructions in other functions (as can happen with
3143 // Constants).
3144 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3145 continue;
3146 // Ignore instructions not dominated by the loop.
3147 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3148 UserInst->getParent() :
3149 cast<PHINode>(UserInst)->getIncomingBlock(
3150 PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3151 if (!DT.dominates(L->getHeader(), UseBB))
3152 continue;
3153 // Ignore uses which are part of other SCEV expressions, to avoid
3154 // analyzing them multiple times.
3155 if (SE.isSCEVable(UserInst->getType())) {
3156 const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3157 // If the user is a no-op, look through to its uses.
3158 if (!isa<SCEVUnknown>(UserS))
3159 continue;
3160 if (UserS == US) {
3161 Worklist.push_back(
3162 SE.getUnknown(const_cast<Instruction *>(UserInst)));
3163 continue;
3164 }
3165 }
3166 // Ignore icmp instructions which are already being analyzed.
3167 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3168 unsigned OtherIdx = !U.getOperandNo();
3169 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
3170 if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3171 continue;
3172 }
3173
3174 LSRFixup &LF = getNewFixup();
3175 LF.UserInst = const_cast<Instruction *>(UserInst);
3176 LF.OperandValToReplace = U;
3177 std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
3178 LF.LUIdx = P.first;
3179 LF.Offset = P.second;
3180 LSRUse &LU = Uses[LF.LUIdx];
3181 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3182 if (!LU.WidestFixupType ||
3183 SE.getTypeSizeInBits(LU.WidestFixupType) <
3184 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3185 LU.WidestFixupType = LF.OperandValToReplace->getType();
3186 InsertSupplementalFormula(US, LU, LF.LUIdx);
3187 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3188 break;
3189 }
3190 }
3191 }
3192}
3193
3194/// CollectSubexprs - Split S into subexpressions which can be pulled out into
3195/// separate registers. If C is non-null, multiply each subexpression by C.
3196///
3197/// Return remainder expression after factoring the subexpressions captured by
3198/// Ops. If Ops is complete, return NULL.
3199static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3200 SmallVectorImpl<const SCEV *> &Ops,
3201 const Loop *L,
3202 ScalarEvolution &SE,
3203 unsigned Depth = 0) {
3204 // Arbitrarily cap recursion to protect compile time.
3205 if (Depth >= 3)
3206 return S;
3207
3208 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3209 // Break out add operands.
3210 for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
3211 I != E; ++I) {
3212 const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
3213 if (Remainder)
3214 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3215 }
3216 return nullptr;
3217 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3218 // Split a non-zero base out of an addrec.
3219 if (AR->getStart()->isZero())
3220 return S;
3221
3222 const SCEV *Remainder = CollectSubexprs(AR->getStart(),
3223 C, Ops, L, SE, Depth+1);
3224 // Split the non-zero AddRec unless it is part of a nested recurrence that
3225 // does not pertain to this loop.
3226 if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3227 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3228 Remainder = nullptr;
3229 }
3230 if (Remainder != AR->getStart()) {
3231 if (!Remainder)
3232 Remainder = SE.getConstant(AR->getType(), 0);
3233 return SE.getAddRecExpr(Remainder,
3234 AR->getStepRecurrence(SE),
3235 AR->getLoop(),
3236 //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3237 SCEV::FlagAnyWrap);
3238 }
3239 } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3240 // Break (C * (a + b + c)) into C*a + C*b + C*c.
3241 if (Mul->getNumOperands() != 2)
3242 return S;
3243 if (const SCEVConstant *Op0 =
3244 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3245 C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
3246 const SCEV *Remainder =
3247 CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
3248 if (Remainder)
3249 Ops.push_back(SE.getMulExpr(C, Remainder));
3250 return nullptr;
3251 }
3252 }
3253 return S;
3254}
3255
3256/// \brief Helper function for LSRInstance::GenerateReassociations.
3257void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3258 const Formula &Base,
3259 unsigned Depth, size_t Idx,
3260 bool IsScaledReg) {
3261 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3262 SmallVector<const SCEV *, 8> AddOps;
3263 const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3264 if (Remainder)
3265 AddOps.push_back(Remainder);
3266
3267 if (AddOps.size() == 1)
3268 return;
3269
3270 for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3271 JE = AddOps.end();
3272 J != JE; ++J) {
3273
3274 // Loop-variant "unknown" values are uninteresting; we won't be able to
3275 // do anything meaningful with them.
3276 if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3277 continue;
3278
3279 // Don't pull a constant into a register if the constant could be folded
3280 // into an immediate field.
3281 if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3282 LU.AccessTy, *J, Base.getNumRegs() > 1))
3283 continue;
3284
3285 // Collect all operands except *J.
3286 SmallVector<const SCEV *, 8> InnerAddOps(
3287 ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
3288 InnerAddOps.append(std::next(J),
3289 ((const SmallVector<const SCEV *, 8> &)AddOps).end());
3290
3291 // Don't leave just a constant behind in a register if the constant could
3292 // be folded into an immediate field.
3293 if (InnerAddOps.size() == 1 &&
3294 isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3295 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3296 continue;
3297
3298 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3299 if (InnerSum->isZero())
3300 continue;
3301 Formula F = Base;
3302
3303 // Add the remaining pieces of the add back into the new formula.
3304 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3305 if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3306 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3307 InnerSumSC->getValue()->getZExtValue())) {
3308 F.UnfoldedOffset =
3309 (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
3310 if (IsScaledReg)
3311 F.ScaledReg = nullptr;
3312 else
3313 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3314 } else if (IsScaledReg)
3315 F.ScaledReg = InnerSum;
3316 else
3317 F.BaseRegs[Idx] = InnerSum;
3318
3319 // Add J as its own register, or an unfolded immediate.
3320 const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3321 if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3322 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3323 SC->getValue()->getZExtValue()))
3324 F.UnfoldedOffset =
3325 (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3326 else
3327 F.BaseRegs.push_back(*J);
3328 // We may have changed the number of register in base regs, adjust the
3329 // formula accordingly.
3330 F.Canonicalize();
3331
3332 if (InsertFormula(LU, LUIdx, F))
3333 // If that formula hadn't been seen before, recurse to find more like
3334 // it.
3335 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3336 }
3337}
3338
3339/// GenerateReassociations - Split out subexpressions from adds and the bases of
3340/// addrecs.
3341void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3342 Formula Base, unsigned Depth) {
3343 assert(Base.isCanonical() && "Input must be in the canonical form")((Base.isCanonical() && "Input must be in the canonical form"
) ? static_cast<void> (0) : __assert_fail ("Base.isCanonical() && \"Input must be in the canonical form\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3343, __PRETTY_FUNCTION__))
;
3344 // Arbitrarily cap recursion to protect compile time.
3345 if (Depth >= 3)
3346 return;
3347
3348 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3349 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3350
3351 if (Base.Scale == 1)
3352 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3353 /* Idx */ -1, /* IsScaledReg */ true);
3354}
3355
3356/// GenerateCombinations - Generate a formula consisting of all of the
3357/// loop-dominating registers added into a single register.
3358void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3359 Formula Base) {
3360 // This method is only interesting on a plurality of registers.
3361 if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3362 return;
3363
3364 // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
3365 // processing the formula.
3366 Base.Unscale();
3367 Formula F = Base;
3368 F.BaseRegs.clear();
3369 SmallVector<const SCEV *, 4> Ops;
3370 for (SmallVectorImpl<const SCEV *>::const_iterator
3371 I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
3372 const SCEV *BaseReg = *I;
3373 if (SE.properlyDominates(BaseReg, L->getHeader()) &&
3374 !SE.hasComputableLoopEvolution(BaseReg, L))
3375 Ops.push_back(BaseReg);
3376 else
3377 F.BaseRegs.push_back(BaseReg);
3378 }
3379 if (Ops.size() > 1) {
3380 const SCEV *Sum = SE.getAddExpr(Ops);
3381 // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3382 // opportunity to fold something. For now, just ignore such cases
3383 // rather than proceed with zero in a register.
3384 if (!Sum->isZero()) {
3385 F.BaseRegs.push_back(Sum);
3386 F.Canonicalize();
3387 (void)InsertFormula(LU, LUIdx, F);
3388 }
3389 }
3390}
3391
3392/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
3393void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
3394 const Formula &Base, size_t Idx,
3395 bool IsScaledReg) {
3396 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3397 GlobalValue *GV = ExtractSymbol(G, SE);
3398 if (G->isZero() || !GV)
3399 return;
3400 Formula F = Base;
3401 F.BaseGV = GV;
3402 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3403 return;
3404 if (IsScaledReg)
3405 F.ScaledReg = G;
3406 else
3407 F.BaseRegs[Idx] = G;
3408 (void)InsertFormula(LU, LUIdx, F);
3409}
3410
3411/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
3412void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3413 Formula Base) {
3414 // We can't add a symbolic offset if the address already contains one.
3415 if (Base.BaseGV) return;
3416
3417 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3418 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3419 if (Base.Scale == 1)
3420 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
3421 /* IsScaledReg */ true);
3422}
3423
3424/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
3425void LSRInstance::GenerateConstantOffsetsImpl(
3426 LSRUse &LU, unsigned LUIdx, const Formula &Base,
3427 const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
3428 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3429 for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
3430 E = Worklist.end();
3431 I != E; ++I) {
3432 Formula F = Base;
3433 F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
3434 if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
3435 LU.AccessTy, F)) {
3436 // Add the offset to the base register.
3437 const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
3438 // If it cancelled out, drop the base register, otherwise update it.
3439 if (NewG->isZero()) {
3440 if (IsScaledReg) {
3441 F.Scale = 0;
3442 F.ScaledReg = nullptr;
3443 } else
3444 F.DeleteBaseReg(F.BaseRegs[Idx]);
3445 F.Canonicalize();
3446 } else if (IsScaledReg)
3447 F.ScaledReg = NewG;
3448 else
3449 F.BaseRegs[Idx] = NewG;
3450
3451 (void)InsertFormula(LU, LUIdx, F);
3452 }
3453 }
3454
3455 int64_t Imm = ExtractImmediate(G, SE);
3456 if (G->isZero() || Imm == 0)
3457 return;
3458 Formula F = Base;
3459 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3460 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3461 return;
3462 if (IsScaledReg)
3463 F.ScaledReg = G;
3464 else
3465 F.BaseRegs[Idx] = G;
3466 (void)InsertFormula(LU, LUIdx, F);
3467}
3468
3469/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
3470void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3471 Formula Base) {
3472 // TODO: For now, just add the min and max offset, because it usually isn't
3473 // worthwhile looking at everything inbetween.
3474 SmallVector<int64_t, 2> Worklist;
3475 Worklist.push_back(LU.MinOffset);
3476 if (LU.MaxOffset != LU.MinOffset)
3477 Worklist.push_back(LU.MaxOffset);
3478
3479 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3480 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3481 if (Base.Scale == 1)
3482 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
3483 /* IsScaledReg */ true);
3484}
3485
3486/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
3487/// the comparison. For example, x == y -> x*c == y*c.
3488void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3489 Formula Base) {
3490 if (LU.Kind != LSRUse::ICmpZero) return;
3491
3492 // Determine the integer type for the base formula.
3493 Type *IntTy = Base.getType();
3494 if (!IntTy) return;
3495 if (SE.getTypeSizeInBits(IntTy) > 64) return;
3496
3497 // Don't do this if there is more than one offset.
3498 if (LU.MinOffset != LU.MaxOffset) return;
3499
3500 assert(!Base.BaseGV && "ICmpZero use is not legal!")((!Base.BaseGV && "ICmpZero use is not legal!") ? static_cast
<void> (0) : __assert_fail ("!Base.BaseGV && \"ICmpZero use is not legal!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3500, __PRETTY_FUNCTION__))
;
3501
3502 // Check each interesting stride.
3503 for (SmallSetVector<int64_t, 8>::const_iterator
3504 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
3505 int64_t Factor = *I;
3506
3507 // Check that the multiplication doesn't overflow.
3508 if (Base.BaseOffset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3509 continue;
3510 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3511 if (NewBaseOffset / Factor != Base.BaseOffset)
3512 continue;
3513 // If the offset will be truncated at this use, check that it is in bounds.
3514 if (!IntTy->isPointerTy() &&
3515 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3516 continue;
3517
3518 // Check that multiplying with the use offset doesn't overflow.
3519 int64_t Offset = LU.MinOffset;
3520 if (Offset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3521 continue;
3522 Offset = (uint64_t)Offset * Factor;
3523 if (Offset / Factor != LU.MinOffset)
3524 continue;
3525 // If the offset will be truncated at this use, check that it is in bounds.
3526 if (!IntTy->isPointerTy() &&
3527 !ConstantInt::isValueValidForType(IntTy, Offset))
3528 continue;
3529
3530 Formula F = Base;
3531 F.BaseOffset = NewBaseOffset;
3532
3533 // Check that this scale is legal.
3534 if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3535 continue;
3536
3537 // Compensate for the use having MinOffset built into it.
3538 F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3539
3540 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3541
3542 // Check that multiplying with each base register doesn't overflow.
3543 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3544 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
3545 if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3546 goto next;
3547 }
3548
3549 // Check that multiplying with the scaled register doesn't overflow.
3550 if (F.ScaledReg) {
3551 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
3552 if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3553 continue;
3554 }
3555
3556 // Check that multiplying with the unfolded offset doesn't overflow.
3557 if (F.UnfoldedOffset != 0) {
3558 if (F.UnfoldedOffset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3559 continue;
3560 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3561 if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3562 continue;
3563 // If the offset will be truncated, check that it is in bounds.
3564 if (!IntTy->isPointerTy() &&
3565 !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3566 continue;
3567 }
3568
3569 // If we make it here and it's legal, add it.
3570 (void)InsertFormula(LU, LUIdx, F);
3571 next:;
3572 }
3573}
3574
3575/// GenerateScales - Generate stride factor reuse formulae by making use of
3576/// scaled-offset address modes, for example.
3577void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
3578 // Determine the integer type for the base formula.
3579 Type *IntTy = Base.getType();
3580 if (!IntTy) return;
3581
3582 // If this Formula already has a scaled register, we can't add another one.
3583 // Try to unscale the formula to generate a better scale.
3584 if (Base.Scale != 0 && !Base.Unscale())
3585 return;
3586
3587 assert(Base.Scale == 0 && "Unscale did not did its job!")((Base.Scale == 0 && "Unscale did not did its job!") ?
static_cast<void> (0) : __assert_fail ("Base.Scale == 0 && \"Unscale did not did its job!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3587, __PRETTY_FUNCTION__))
;
3588
3589 // Check each interesting stride.
3590 for (SmallSetVector<int64_t, 8>::const_iterator
3591 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
3592 int64_t Factor = *I;
3593
3594 Base.Scale = Factor;
3595 Base.HasBaseReg = Base.BaseRegs.size() > 1;
3596 // Check whether this scale is going to be legal.
3597 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3598 Base)) {
3599 // As a special-case, handle special out-of-loop Basic users specially.
3600 // TODO: Reconsider this special case.
3601 if (LU.Kind == LSRUse::Basic &&
3602 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3603 LU.AccessTy, Base) &&
3604 LU.AllFixupsOutsideLoop)
3605 LU.Kind = LSRUse::Special;
3606 else
3607 continue;
3608 }
3609 // For an ICmpZero, negating a solitary base register won't lead to
3610 // new solutions.
3611 if (LU.Kind == LSRUse::ICmpZero &&
3612 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3613 continue;
3614 // For each addrec base reg, apply the scale, if possible.
3615 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3616 if (const SCEVAddRecExpr *AR =
3617 dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
3618 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3619 if (FactorS->isZero())
3620 continue;
3621 // Divide out the factor, ignoring high bits, since we'll be
3622 // scaling the value back up in the end.
3623 if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
3624 // TODO: This could be optimized to avoid all the copying.
3625 Formula F = Base;
3626 F.ScaledReg = Quotient;
3627 F.DeleteBaseReg(F.BaseRegs[i]);
3628 // The canonical representation of 1*reg is reg, which is already in
3629 // Base. In that case, do not try to insert the formula, it will be
3630 // rejected anyway.
3631 if (F.Scale == 1 && F.BaseRegs.empty())
3632 continue;
3633 (void)InsertFormula(LU, LUIdx, F);
3634 }
3635 }
3636 }
3637}
3638
3639/// GenerateTruncates - Generate reuse formulae from different IV types.
3640void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
3641 // Don't bother truncating symbolic values.
3642 if (Base.BaseGV) return;
3643
3644 // Determine the integer type for the base formula.
3645 Type *DstTy = Base.getType();
3646 if (!DstTy) return;
3647 DstTy = SE.getEffectiveSCEVType(DstTy);
3648
3649 for (SmallSetVector<Type *, 4>::const_iterator
3650 I = Types.begin(), E = Types.end(); I != E; ++I) {
3651 Type *SrcTy = *I;
3652 if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
3653 Formula F = Base;
3654
3655 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
3656 for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
3657 JE = F.BaseRegs.end(); J != JE; ++J)
3658 *J = SE.getAnyExtendExpr(*J, SrcTy);
3659
3660 // TODO: This assumes we've done basic processing on all uses and
3661 // have an idea what the register usage is.
3662 if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3663 continue;
3664
3665 (void)InsertFormula(LU, LUIdx, F);
3666 }
3667 }
3668}
3669
3670namespace {
3671
3672/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
3673/// defer modifications so that the search phase doesn't have to worry about
3674/// the data structures moving underneath it.
3675struct WorkItem {
3676 size_t LUIdx;
3677 int64_t Imm;
3678 const SCEV *OrigReg;
3679
3680 WorkItem(size_t LI, int64_t I, const SCEV *R)
3681 : LUIdx(LI), Imm(I), OrigReg(R) {}
3682
3683 void print(raw_ostream &OS) const;
3684 void dump() const;
3685};
3686
3687}
3688
3689void WorkItem::print(raw_ostream &OS) const {
3690 OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
3691 << " , add offset " << Imm;
3692}
3693
3694#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3695void WorkItem::dump() const {
3696 print(errs()); errs() << '\n';
3697}
3698#endif
3699
3700/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
3701/// distance apart and try to form reuse opportunities between them.
3702void LSRInstance::GenerateCrossUseConstantOffsets() {
3703 // Group the registers by their value without any added constant offset.
3704 typedef std::map<int64_t, const SCEV *> ImmMapTy;
3705 typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
3706 RegMapTy Map;
3707 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
3708 SmallVector<const SCEV *, 8> Sequence;
3709 for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
3710 I != E; ++I) {
3711 const SCEV *Reg = *I;
3712 int64_t Imm = ExtractImmediate(Reg, SE);
3713 std::pair<RegMapTy::iterator, bool> Pair =
3714 Map.insert(std::make_pair(Reg, ImmMapTy()));
3715 if (Pair.second)
3716 Sequence.push_back(Reg);
3717 Pair.first->second.insert(std::make_pair(Imm, *I));
3718 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
3719 }
3720
3721 // Now examine each set of registers with the same base value. Build up
3722 // a list of work to do and do the work in a separate step so that we're
3723 // not adding formulae and register counts while we're searching.
3724 SmallVector<WorkItem, 32> WorkItems;
3725 SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
3726 for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
3727 E = Sequence.end(); I != E; ++I) {
3728 const SCEV *Reg = *I;
3729 const ImmMapTy &Imms = Map.find(Reg)->second;
3730
3731 // It's not worthwhile looking for reuse if there's only one offset.
3732 if (Imms.size() == 1)
3733 continue;
3734
3735 DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (ImmMapTy::const_iterator J =
Imms.begin(), JE = Imms.end(); J != JE; ++J) dbgs() <<
' ' << J->first; dbgs() << '\n'; } } while (0
)
3736 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (ImmMapTy::const_iterator J =
Imms.begin(), JE = Imms.end(); J != JE; ++J) dbgs() <<
' ' << J->first; dbgs() << '\n'; } } while (0
)
3737 J != JE; ++J)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (ImmMapTy::const_iterator J =
Imms.begin(), JE = Imms.end(); J != JE; ++J) dbgs() <<
' ' << J->first; dbgs() << '\n'; } } while (0
)
3738 dbgs() << ' ' << J->first;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (ImmMapTy::const_iterator J =
Imms.begin(), JE = Imms.end(); J != JE; ++J) dbgs() <<
' ' << J->first; dbgs() << '\n'; } } while (0
)
3739 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (ImmMapTy::const_iterator J =
Imms.begin(), JE = Imms.end(); J != JE; ++J) dbgs() <<
' ' << J->first; dbgs() << '\n'; } } while (0
)
;
3740
3741 // Examine each offset.
3742 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3743 J != JE; ++J) {
3744 const SCEV *OrigReg = J->second;
3745
3746 int64_t JImm = J->first;
3747 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3748
3749 if (!isa<SCEVConstant>(OrigReg) &&
3750 UsedByIndicesMap[Reg].count() == 1) {
3751 DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Skipping cross-use reuse for "
<< *OrigReg << '\n'; } } while (0)
;
3752 continue;
3753 }
3754
3755 // Conservatively examine offsets between this orig reg a few selected
3756 // other orig regs.
3757 ImmMapTy::const_iterator OtherImms[] = {
3758 Imms.begin(), std::prev(Imms.end()),
3759 Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3760 2)
3761 };
3762 for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
3763 ImmMapTy::const_iterator M = OtherImms[i];
3764 if (M == J || M == JE) continue;
3765
3766 // Compute the difference between the two.
3767 int64_t Imm = (uint64_t)JImm - M->first;
3768 for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
3769 LUIdx = UsedByIndices.find_next(LUIdx))
3770 // Make a memo of this use, offset, and register tuple.
3771 if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
3772 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
3773 }
3774 }
3775 }
3776
3777 Map.clear();
3778 Sequence.clear();
3779 UsedByIndicesMap.clear();
3780 UniqueItems.clear();
3781
3782 // Now iterate through the worklist and add new formulae.
3783 for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
3784 E = WorkItems.end(); I != E; ++I) {
3785 const WorkItem &WI = *I;
3786 size_t LUIdx = WI.LUIdx;
3787 LSRUse &LU = Uses[LUIdx];
3788 int64_t Imm = WI.Imm;
3789 const SCEV *OrigReg = WI.OrigReg;
3790
3791 Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
3792 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3793 unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
3794
3795 // TODO: Use a more targeted data structure.
3796 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
3797 Formula F = LU.Formulae[L];
3798 // FIXME: The code for the scaled and unscaled registers looks
3799 // very similar but slightly different. Investigate if they
3800 // could be merged. That way, we would not have to unscale the
3801 // Formula.
3802 F.Unscale();
3803 // Use the immediate in the scaled register.
3804 if (F.ScaledReg == OrigReg) {
3805 int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
3806 // Don't create 50 + reg(-50).
3807 if (F.referencesReg(SE.getSCEV(
3808 ConstantInt::get(IntTy, -(uint64_t)Offset))))
3809 continue;
3810 Formula NewF = F;
3811 NewF.BaseOffset = Offset;
3812 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3813 NewF))
3814 continue;
3815 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
3816
3817 // If the new scale is a constant in a register, and adding the constant
3818 // value to the immediate would produce a value closer to zero than the
3819 // immediate itself, then the formula isn't worthwhile.
3820 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
3821 if (C->getValue()->isNegative() !=
3822 (NewF.BaseOffset < 0) &&
3823 (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
3824 .ule(std::abs(NewF.BaseOffset)))
3825 continue;
3826
3827 // OK, looks good.
3828 NewF.Canonicalize();
3829 (void)InsertFormula(LU, LUIdx, NewF);
3830 } else {
3831 // Use the immediate in a base register.
3832 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
3833 const SCEV *BaseReg = F.BaseRegs[N];
3834 if (BaseReg != OrigReg)
3835 continue;
3836 Formula NewF = F;
3837 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
3838 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
3839 LU.Kind, LU.AccessTy, NewF)) {
3840 if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
3841 continue;
3842 NewF = F;
3843 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3844 }
3845 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
3846
3847 // If the new formula has a constant in a register, and adding the
3848 // constant value to the immediate would produce a value closer to
3849 // zero than the immediate itself, then the formula isn't worthwhile.
3850 for (SmallVectorImpl<const SCEV *>::const_iterator
3851 J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
3852 J != JE; ++J)
3853 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
3854 if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
3855 std::abs(NewF.BaseOffset)) &&
3856 (C->getValue()->getValue() +
3857 NewF.BaseOffset).countTrailingZeros() >=
3858 countTrailingZeros<uint64_t>(NewF.BaseOffset))
3859 goto skip_formula;
3860
3861 // Ok, looks good.
3862 NewF.Canonicalize();
3863 (void)InsertFormula(LU, LUIdx, NewF);
3864 break;
3865 skip_formula:;
3866 }
3867 }
3868 }
3869 }
3870}
3871
3872/// GenerateAllReuseFormulae - Generate formulae for each use.
3873void
3874LSRInstance::GenerateAllReuseFormulae() {
3875 // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
3876 // queries are more precise.
3877 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3878 LSRUse &LU = Uses[LUIdx];
3879 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3880 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
3881 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3882 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
3883 }
3884 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3885 LSRUse &LU = Uses[LUIdx];
3886 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3887 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
3888 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3889 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
3890 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3891 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
3892 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3893 GenerateScales(LU, LUIdx, LU.Formulae[i]);
3894 }
3895 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3896 LSRUse &LU = Uses[LUIdx];
3897 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3898 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
3899 }
3900
3901 GenerateCrossUseConstantOffsets();
3902
3903 DEBUG(dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (0)
3904 "After generating reuse formulae:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (0)
3905 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (0)
;
3906}
3907
3908/// If there are multiple formulae with the same set of registers used
3909/// by other uses, pick the best one and delete the others.
3910void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
3911 DenseSet<const SCEV *> VisitedRegs;
3912 SmallPtrSet<const SCEV *, 16> Regs;
3913 SmallPtrSet<const SCEV *, 16> LoserRegs;
3914#ifndef NDEBUG
3915 bool ChangedFormulae = false;
3916#endif
3917
3918 // Collect the best formula for each unique set of shared registers. This
3919 // is reset for each use.
3920 typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
3921 BestFormulaeTy;
3922 BestFormulaeTy BestFormulae;
3923
3924 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3925 LSRUse &LU = Uses[LUIdx];
3926 DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Filtering for use "; LU.print
(dbgs()); dbgs() << '\n'; } } while (0)
;
3927
3928 bool Any = false;
3929 for (size_t FIdx = 0, NumForms = LU.Formulae.size();
3930 FIdx != NumForms; ++FIdx) {
3931 Formula &F = LU.Formulae[FIdx];
3932
3933 // Some formulas are instant losers. For example, they may depend on
3934 // nonexistent AddRecs from other loops. These need to be filtered
3935 // immediately, otherwise heuristics could choose them over others leading
3936 // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
3937 // avoids the need to recompute this information across formulae using the
3938 // same bad AddRec. Passing LoserRegs is also essential unless we remove
3939 // the corresponding bad register from the Regs set.
3940 Cost CostF;
3941 Regs.clear();
3942 CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
3943 &LoserRegs);
3944 if (CostF.isLoser()) {
3945 // During initial formula generation, undesirable formulae are generated
3946 // by uses within other loops that have some non-trivial address mode or
3947 // use the postinc form of the IV. LSR needs to provide these formulae
3948 // as the basis of rediscovering the desired formula that uses an AddRec
3949 // corresponding to the existing phi. Once all formulae have been
3950 // generated, these initial losers may be pruned.
3951 DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering loser "; F.print
(dbgs()); dbgs() << "\n"; } } while (0)
3952 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering loser "; F.print
(dbgs()); dbgs() << "\n"; } } while (0)
;
3953 }
3954 else {
3955 SmallVector<const SCEV *, 4> Key;
3956 for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
3957 JE = F.BaseRegs.end(); J != JE; ++J) {
3958 const SCEV *Reg = *J;
3959 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
3960 Key.push_back(Reg);
3961 }
3962 if (F.ScaledReg &&
3963 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
3964 Key.push_back(F.ScaledReg);
3965 // Unstable sort by host order ok, because this is only used for
3966 // uniquifying.
3967 std::sort(Key.begin(), Key.end());
3968
3969 std::pair<BestFormulaeTy::const_iterator, bool> P =
3970 BestFormulae.insert(std::make_pair(Key, FIdx));
3971 if (P.second)
3972 continue;
3973
3974 Formula &Best = LU.Formulae[P.first->second];
3975
3976 Cost CostBest;
3977 Regs.clear();
3978 CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
3979 DT, LU);
3980 if (CostF < CostBest)
3981 std::swap(F, Best);
3982 DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (0)
3983 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (0)
3984 " in favor of formula "; Best.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (0)
3985 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (0)
;
3986 }
3987#ifndef NDEBUG
3988 ChangedFormulae = true;
3989#endif
3990 LU.DeleteFormula(F);
3991 --FIdx;
3992 --NumForms;
3993 Any = true;
3994 }
3995
3996 // Now that we've filtered out some formulae, recompute the Regs set.
3997 if (Any)
3998 LU.RecomputeRegs(LUIdx, RegUses);
3999
4000 // Reset this to prepare for the next use.
4001 BestFormulae.clear();
4002 }
4003
4004 DEBUG(if (ChangedFormulae) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (0)
4005 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (0)
4006 "After filtering out undesirable candidates:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (0)
4007 print_uses(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (0)
4008 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (0)
;
4009}
4010
4011// This is a rough guess that seems to work fairly well.
4012static const size_t ComplexityLimit = UINT16_MAX(65535);
4013
4014/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
4015/// solutions the solver might have to consider. It almost never considers
4016/// this many solutions because it prune the search space, but the pruning
4017/// isn't always sufficient.
4018size_t LSRInstance::EstimateSearchSpaceComplexity() const {
4019 size_t Power = 1;
4020 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
4021 E = Uses.end(); I != E; ++I) {
4022 size_t FSize = I->Formulae.size();
4023 if (FSize >= ComplexityLimit) {
4024 Power = ComplexityLimit;
4025 break;
4026 }
4027 Power *= FSize;
4028 if (Power >= ComplexityLimit)
4029 break;
4030 }
4031 return Power;
4032}
4033
4034/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
4035/// of the registers of another formula, it won't help reduce register
4036/// pressure (though it may not necessarily hurt register pressure); remove
4037/// it to simplify the system.
4038void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4039 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4040 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (0)
;
4041
4042 DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (0)
4043 "which use a superset of registers used by other "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (0)
4044 "formulae.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (0)
;
4045
4046 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4047 LSRUse &LU = Uses[LUIdx];
4048 bool Any = false;
4049 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4050 Formula &F = LU.Formulae[i];
4051 // Look for a formula with a constant or GV in a register. If the use
4052 // also has a formula with that same value in an immediate field,
4053 // delete the one that uses a register.
4054 for (SmallVectorImpl<const SCEV *>::const_iterator
4055 I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4056 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4057 Formula NewF = F;
4058 NewF.BaseOffset += C->getValue()->getSExtValue();
4059 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4060 (I - F.BaseRegs.begin()));
4061 if (LU.HasFormulaWithSameRegs(NewF)) {
4062 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (0)
;
4063 LU.DeleteFormula(F);
4064 --i;
4065 --e;
4066 Any = true;
4067 break;
4068 }
4069 } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4070 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4071 if (!F.BaseGV) {
4072 Formula NewF = F;
4073 NewF.BaseGV = GV;
4074 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4075 (I - F.BaseRegs.begin()));
4076 if (LU.HasFormulaWithSameRegs(NewF)) {
4077 DEBUG(dbgs() << " Deleting "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (0)
4078 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (0)
;
4079 LU.DeleteFormula(F);
4080 --i;
4081 --e;
4082 Any = true;
4083 break;
4084 }
4085 }
4086 }
4087 }
4088 }
4089 if (Any)
4090 LU.RecomputeRegs(LUIdx, RegUses);
4091 }
4092
4093 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (0)
4094 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (0)
;
4095 }
4096}
4097
4098/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
4099/// for expressions like A, A+1, A+2, etc., allocate a single register for
4100/// them.
4101void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4102 if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4103 return;
4104
4105 DEBUG(dbgs() << "The search space is too complex.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(0)
4106 "Narrowing the search space by assuming that uses separated "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(0)
4107 "by a constant offset will use the same registers.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(0)
;
4108
4109 // This is especially useful for unrolled loops.
4110
4111 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4112 LSRUse &LU = Uses[LUIdx];
4113 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
4114 E = LU.Formulae.end(); I != E; ++I) {
4115 const Formula &F = *I;
4116 if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4117 continue;
4118
4119 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4120 if (!LUThatHas)
4121 continue;
4122
4123 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
4124 LU.Kind, LU.AccessTy))
4125 continue;
4126
4127 DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting use "; LU.print
(dbgs()); dbgs() << '\n'; } } while (0)
;
4128
4129 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4130
4131 // Update the relocs to reference the new use.
4132 for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
4133 E = Fixups.end(); I != E; ++I) {
4134 LSRFixup &Fixup = *I;
4135 if (Fixup.LUIdx == LUIdx) {
4136 Fixup.LUIdx = LUThatHas - &Uses.front();
4137 Fixup.Offset += F.BaseOffset;
4138 // Add the new offset to LUThatHas' offset list.
4139 if (LUThatHas->Offsets.back() != Fixup.Offset) {
4140 LUThatHas->Offsets.push_back(Fixup.Offset);
4141 if (Fixup.Offset > LUThatHas->MaxOffset)
4142 LUThatHas->MaxOffset = Fixup.Offset;
4143 if (Fixup.Offset < LUThatHas->MinOffset)
4144 LUThatHas->MinOffset = Fixup.Offset;
4145 }
4146 DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New fixup has offset " <<
Fixup.Offset << '\n'; } } while (0)
;
4147 }
4148 if (Fixup.LUIdx == NumUses-1)
4149 Fixup.LUIdx = LUIdx;
4150 }
4151
4152 // Delete formulae from the new use which are no longer legal.
4153 bool Any = false;
4154 for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4155 Formula &F = LUThatHas->Formulae[i];
4156 if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4157 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4158 DEBUG(dbgs() << " Deleting "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (0)
4159 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (0)
;
4160 LUThatHas->DeleteFormula(F);
4161 --i;
4162 --e;
4163 Any = true;
4164 }
4165 }
4166
4167 if (Any)
4168 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4169
4170 // Delete the old use.
4171 DeleteUse(LU, LUIdx);
4172 --LUIdx;
4173 --NumUses;
4174 break;
4175 }
4176 }
4177
4178 DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (0)
;
4179}
4180
4181/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
4182/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4183/// we've done more filtering, as it may be able to find more formulae to
4184/// eliminate.
4185void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4186 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4187 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (0)
;
4188
4189 DEBUG(dbgs() << "Narrowing the search space by re-filtering out "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n"; } } while (0)
4190 "undesirable dedicated registers.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n"; } } while (0)
;
4191
4192 FilterOutUndesirableDedicatedRegisters();
4193
4194 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (0)
4195 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (0)
;
4196 }
4197}
4198
4199/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
4200/// to be profitable, and then in any use which has any reference to that
4201/// register, delete all formulae which do not reference that register.
4202void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4203 // With all other options exhausted, loop until the system is simple
4204 // enough to handle.
4205 SmallPtrSet<const SCEV *, 4> Taken;
4206 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
2
Loop condition is true. Entering loop body
7
Loop condition is true. Entering loop body
12
Loop condition is true. Entering loop body
17
Loop condition is true. Entering loop body
4207 // Ok, we have too many of formulae on our hands to conveniently handle.
4208 // Use a rough heuristic to thin out the list.
4209 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (0)
;
4210
4211 // Pick the register which is used by the most LSRUses, which is likely
4212 // to be a good reuse register candidate.
4213 const SCEV *Best = nullptr;
18
'Best' initialized to a null pointer value
4214 unsigned BestNum = 0;
4215 for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
4
Loop condition is false. Execution continues on line 4231
9
Loop condition is false. Execution continues on line 4231
14
Loop condition is false. Execution continues on line 4231
20
Loop condition is false. Execution continues on line 4231
4216 I != E; ++I) {
3
Assuming 'I' is equal to 'E'
8
Assuming 'I' is equal to 'E'
13
Assuming 'I' is equal to 'E'
19
Assuming 'I' is equal to 'E'
4217 const SCEV *Reg = *I;
4218 if (Taken.count(Reg))
4219 continue;
4220 if (!Best)
4221 Best = Reg;
4222 else {
4223 unsigned Count = RegUses.getUsedByIndices(Reg).count();
4224 if (Count > BestNum) {
4225 Best = Reg;
4226 BestNum = Count;
4227 }
4228 }
4229 }
4230
4231 DEBUG(dbgs() << "Narrowing the search space by assuming " << *Bestdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by assuming "
<< *Best << " will yield profitable reuse.\n"; }
} while (0)
21
Within the expansion of the macro 'DEBUG':
a
Forming reference to null pointer
4232 << " will yield profitable reuse.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by assuming "
<< *Best << " will yield profitable reuse.\n"; }
} while (0)
;
4233 Taken.insert(Best);
4234
4235 // In any use with formulae which references this register, delete formulae
4236 // which don't reference it.
4237 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5
Assuming 'LUIdx' is equal to 'NumUses'
6
Loop condition is false. Execution continues on line 4259
10
Assuming 'LUIdx' is equal to 'NumUses'
11
Loop condition is false. Execution continues on line 4259
15
Assuming 'LUIdx' is equal to 'NumUses'
16
Loop condition is false. Execution continues on line 4259
4238 LSRUse &LU = Uses[LUIdx];
4239 if (!LU.Regs.count(Best)) continue;
4240
4241 bool Any = false;
4242 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4243 Formula &F = LU.Formulae[i];
4244 if (!F.referencesReg(Best)) {
4245 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (0)
;
4246 LU.DeleteFormula(F);
4247 --e;
4248 --i;
4249 Any = true;
4250 assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?")((e != 0 && "Use has no formulae left! Is Regs inconsistent?"
) ? static_cast<void> (0) : __assert_fail ("e != 0 && \"Use has no formulae left! Is Regs inconsistent?\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4250, __PRETTY_FUNCTION__))
;
4251 continue;
4252 }
4253 }
4254
4255 if (Any)
4256 LU.RecomputeRegs(LUIdx, RegUses);
4257 }
4258
4259 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (0)
4260 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (0)
;
4261 }
4262}
4263
4264/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
4265/// formulae to choose from, use some rough heuristics to prune down the number
4266/// of formulae. This keeps the main solver from taking an extraordinary amount
4267/// of time in some worst-case scenarios.
4268void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4269 NarrowSearchSpaceByDetectingSupersets();
4270 NarrowSearchSpaceByCollapsingUnrolledCode();
4271 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4272 NarrowSearchSpaceByPickingWinnerRegs();
1
Calling 'LSRInstance::NarrowSearchSpaceByPickingWinnerRegs'
4273}
4274
4275/// SolveRecurse - This is the recursive solver.
4276void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4277 Cost &SolutionCost,
4278 SmallVectorImpl<const Formula *> &Workspace,
4279 const Cost &CurCost,
4280 const SmallPtrSet<const SCEV *, 16> &CurRegs,
4281 DenseSet<const SCEV *> &VisitedRegs) const {
4282 // Some ideas:
4283 // - prune more:
4284 // - use more aggressive filtering
4285 // - sort the formula so that the most profitable solutions are found first
4286 // - sort the uses too
4287 // - search faster:
4288 // - don't compute a cost, and then compare. compare while computing a cost
4289 // and bail early.
4290 // - track register sets with SmallBitVector
4291
4292 const LSRUse &LU = Uses[Workspace.size()];
4293
4294 // If this use references any register that's already a part of the
4295 // in-progress solution, consider it a requirement that a formula must
4296 // reference that register in order to be considered. This prunes out
4297 // unprofitable searching.
4298 SmallSetVector<const SCEV *, 4> ReqRegs;
4299 for (const SCEV *S : CurRegs)
4300 if (LU.Regs.count(S))
4301 ReqRegs.insert(S);
4302
4303 SmallPtrSet<const SCEV *, 16> NewRegs;
4304 Cost NewCost;
4305 for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
4306 E = LU.Formulae.end(); I != E; ++I) {
4307 const Formula &F = *I;
4308
4309 // Ignore formulae which may not be ideal in terms of register reuse of
4310 // ReqRegs. The formula should use all required registers before
4311 // introducing new ones.
4312 int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
4313 for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
4314 JE = ReqRegs.end(); J != JE; ++J) {
4315 const SCEV *Reg = *J;
4316 if ((F.ScaledReg && F.ScaledReg == Reg) ||
4317 std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
4318 F.BaseRegs.end()) {
4319 --NumReqRegsToFind;
4320 if (NumReqRegsToFind == 0)
4321 break;
4322 }
4323 }
4324 if (NumReqRegsToFind != 0) {
4325 // If none of the formulae satisfied the required registers, then we could
4326 // clear ReqRegs and try again. Currently, we simply give up in this case.
4327 continue;
4328 }
4329
4330 // Evaluate the cost of the current formula. If it's already worse than
4331 // the current best, prune the search at that point.
4332 NewCost = CurCost;
4333 NewRegs = CurRegs;
4334 NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
4335 LU);
4336 if (NewCost < SolutionCost) {
4337 Workspace.push_back(&F);
4338 if (Workspace.size() != Uses.size()) {
4339 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4340 NewRegs, VisitedRegs);
4341 if (F.getNumRegs() == 1 && Workspace.size() == 1)
4342 VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4343 } else {
4344 DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(0)
4345 dbgs() << ".\n Regs:";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(0)
4346 for (const SCEV *S : NewRegs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(0)
4347 dbgs() << ' ' << *S;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(0)
4348 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(0)
;
4349
4350 SolutionCost = NewCost;
4351 Solution = Workspace;
4352 }
4353 Workspace.pop_back();
4354 }
4355 }
4356}
4357
4358/// Solve - Choose one formula from each use. Return the results in the given
4359/// Solution vector.
4360void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4361 SmallVector<const Formula *, 8> Workspace;
4362 Cost SolutionCost;
4363 SolutionCost.Lose();
4364 Cost CurCost;
4365 SmallPtrSet<const SCEV *, 16> CurRegs;
4366 DenseSet<const SCEV *> VisitedRegs;
4367 Workspace.reserve(Uses.size());
4368
4369 // SolveRecurse does all the work.
4370 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4371 CurRegs, VisitedRegs);
4372 if (Solution.empty()) {
4373 DEBUG(dbgs() << "\nNo Satisfactory Solution\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nNo Satisfactory Solution\n"
; } } while (0)
;
4374 return;
4375 }
4376
4377 // Ok, we've now made all our decisions.
4378 DEBUG(dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4379 "The chosen solution requires "; SolutionCost.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4380 dbgs() << ":\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4381 for (size_t i = 0, e = Uses.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4382 dbgs() << " ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4383 Uses[i].print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4384 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4385 " ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4386 Solution[i]->print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4387 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
4388 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (0)
;
4389
4390 assert(Solution.size() == Uses.size() && "Malformed solution!")((Solution.size() == Uses.size() && "Malformed solution!"
) ? static_cast<void> (0) : __assert_fail ("Solution.size() == Uses.size() && \"Malformed solution!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4390, __PRETTY_FUNCTION__))
;
4391}
4392
4393/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
4394/// the dominator tree far as we can go while still being dominated by the
4395/// input positions. This helps canonicalize the insert position, which
4396/// encourages sharing.
4397BasicBlock::iterator
4398LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4399 const SmallVectorImpl<Instruction *> &Inputs)
4400 const {
4401 for (;;) {
4402 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4403 unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4404
4405 BasicBlock *IDom;
4406 for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
4407 if (!Rung) return IP;
4408 Rung = Rung->getIDom();
4409 if (!Rung) return IP;
4410 IDom = Rung->getBlock();
4411
4412 // Don't climb into a loop though.
4413 const Loop *IDomLoop = LI.getLoopFor(IDom);
4414 unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4415 if (IDomDepth <= IPLoopDepth &&
4416 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4417 break;
4418 }
4419
4420 bool AllDominate = true;
4421 Instruction *BetterPos = nullptr;
4422 Instruction *Tentative = IDom->getTerminator();
4423 for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
4424 E = Inputs.end(); I != E; ++I) {
4425 Instruction *Inst = *I;
4426 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
4427 AllDominate = false;
4428 break;
4429 }
4430 // Attempt to find an insert position in the middle of the block,
4431 // instead of at the end, so that it can be used for other expansions.
4432 if (IDom == Inst->getParent() &&
4433 (!BetterPos || !DT.dominates(Inst, BetterPos)))
4434 BetterPos = std::next(BasicBlock::iterator(Inst));
4435 }
4436 if (!AllDominate)
4437 break;
4438 if (BetterPos)
4439 IP = BetterPos;
4440 else
4441 IP = Tentative;
4442 }
4443
4444 return IP;
4445}
4446
4447/// AdjustInsertPositionForExpand - Determine an input position which will be
4448/// dominated by the operands and which will dominate the result.
4449BasicBlock::iterator
4450LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
4451 const LSRFixup &LF,
4452 const LSRUse &LU,
4453 SCEVExpander &Rewriter) const {
4454 // Collect some instructions which must be dominated by the
4455 // expanding replacement. These must be dominated by any operands that
4456 // will be required in the expansion.
4457 SmallVector<Instruction *, 4> Inputs;
4458 if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4459 Inputs.push_back(I);
4460 if (LU.Kind == LSRUse::ICmpZero)
4461 if (Instruction *I =
4462 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4463 Inputs.push_back(I);
4464 if (LF.PostIncLoops.count(L)) {
4465 if (LF.isUseFullyOutsideLoop(L))
4466 Inputs.push_back(L->getLoopLatch()->getTerminator());
4467 else
4468 Inputs.push_back(IVIncInsertPos);
4469 }
4470 // The expansion must also be dominated by the increment positions of any
4471 // loops it for which it is using post-inc mode.
4472 for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
4473 E = LF.PostIncLoops.end(); I != E; ++I) {
4474 const Loop *PIL = *I;
4475 if (PIL == L) continue;
4476
4477 // Be dominated by the loop exit.
4478 SmallVector<BasicBlock *, 4> ExitingBlocks;
4479 PIL->getExitingBlocks(ExitingBlocks);
4480 if (!ExitingBlocks.empty()) {
4481 BasicBlock *BB = ExitingBlocks[0];
4482 for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
4483 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
4484 Inputs.push_back(BB->getTerminator());
4485 }
4486 }
4487
4488 assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)((!isa<PHINode>(LowestIP) && !isa<LandingPadInst
>(LowestIP) && !isa<DbgInfoIntrinsic>(LowestIP
) && "Insertion point must be a normal instruction") ?
static_cast<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP) && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4490, __PRETTY_FUNCTION__))
4489 && !isa<DbgInfoIntrinsic>(LowestIP) &&((!isa<PHINode>(LowestIP) && !isa<LandingPadInst
>(LowestIP) && !isa<DbgInfoIntrinsic>(LowestIP
) && "Insertion point must be a normal instruction") ?
static_cast<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP) && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4490, __PRETTY_FUNCTION__))
4490 "Insertion point must be a normal instruction")((!isa<PHINode>(LowestIP) && !isa<LandingPadInst
>(LowestIP) && !isa<DbgInfoIntrinsic>(LowestIP
) && "Insertion point must be a normal instruction") ?
static_cast<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP) && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4490, __PRETTY_FUNCTION__))
;
4491
4492 // Then, climb up the immediate dominator tree as far as we can go while
4493 // still being dominated by the input positions.
4494 BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
4495
4496 // Don't insert instructions before PHI nodes.
4497 while (isa<PHINode>(IP)) ++IP;
4498
4499 // Ignore landingpad instructions.
4500 while (isa<LandingPadInst>(IP)) ++IP;
4501
4502 // Ignore debug intrinsics.
4503 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
4504
4505 // Set IP below instructions recently inserted by SCEVExpander. This keeps the
4506 // IP consistent across expansions and allows the previously inserted
4507 // instructions to be reused by subsequent expansion.
4508 while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
4509
4510 return IP;
4511}
4512
4513/// Expand - Emit instructions for the leading candidate expression for this
4514/// LSRUse (this is called "expanding").
4515Value *LSRInstance::Expand(const LSRFixup &LF,
4516 const Formula &F,
4517 BasicBlock::iterator IP,
4518 SCEVExpander &Rewriter,
4519 SmallVectorImpl<WeakVH> &DeadInsts) const {
4520 const LSRUse &LU = Uses[LF.LUIdx];
4521 if (LU.RigidFormula)
4522 return LF.OperandValToReplace;
4523
4524 // Determine an input position which will be dominated by the operands and
4525 // which will dominate the result.
4526 IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
4527
4528 // Inform the Rewriter if we have a post-increment use, so that it can
4529 // perform an advantageous expansion.
4530 Rewriter.setPostInc(LF.PostIncLoops);
4531
4532 // This is the type that the user actually needs.
4533 Type *OpTy = LF.OperandValToReplace->getType();
4534 // This will be the type that we'll initially expand to.
4535 Type *Ty = F.getType();
4536 if (!Ty)
4537 // No type known; just expand directly to the ultimate type.
4538 Ty = OpTy;
4539 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
4540 // Expand directly to the ultimate type if it's the right size.
4541 Ty = OpTy;
4542 // This is the type to do integer arithmetic in.
4543 Type *IntTy = SE.getEffectiveSCEVType(Ty);
4544
4545 // Build up a list of operands to add together to form the full base.
4546 SmallVector<const SCEV *, 8> Ops;
4547
4548 // Expand the BaseRegs portion.
4549 for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
4550 E = F.BaseRegs.end(); I != E; ++I) {
4551 const SCEV *Reg = *I;
4552 assert(!Reg->isZero() && "Zero allocated in a base register!")((!Reg->isZero() && "Zero allocated in a base register!"
) ? static_cast<void> (0) : __assert_fail ("!Reg->isZero() && \"Zero allocated in a base register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4552, __PRETTY_FUNCTION__))
;
4553
4554 // If we're expanding for a post-inc user, make the post-inc adjustment.
4555 PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4556 Reg = TransformForPostIncUse(Denormalize, Reg,
4557 LF.UserInst, LF.OperandValToReplace,
4558 Loops, SE, DT);
4559
4560 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
4561 }
4562
4563 // Expand the ScaledReg portion.
4564 Value *ICmpScaledV = nullptr;
4565 if (F.Scale != 0) {
4566 const SCEV *ScaledS = F.ScaledReg;
4567
4568 // If we're expanding for a post-inc user, make the post-inc adjustment.
4569 PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4570 ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
4571 LF.UserInst, LF.OperandValToReplace,
4572 Loops, SE, DT);
4573
4574 if (LU.Kind == LSRUse::ICmpZero) {
4575 // Expand ScaleReg as if it was part of the base regs.
4576 if (F.Scale == 1)
4577 Ops.push_back(
4578 SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
4579 else {
4580 // An interesting way of "folding" with an icmp is to use a negated
4581 // scale, which we'll implement by inserting it into the other operand
4582 // of the icmp.
4583 assert(F.Scale == -1 &&((F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"
) ? static_cast<void> (0) : __assert_fail ("F.Scale == -1 && \"The only scale supported by ICmpZero uses is -1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4584, __PRETTY_FUNCTION__))
4584 "The only scale supported by ICmpZero uses is -1!")((F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"
) ? static_cast<void> (0) : __assert_fail ("F.Scale == -1 && \"The only scale supported by ICmpZero uses is -1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4584, __PRETTY_FUNCTION__))
;
4585 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
4586 }
4587 } else {
4588 // Otherwise just expand the scaled register and an explicit scale,
4589 // which is expected to be matched as part of the address.
4590
4591 // Flush the operand list to suppress SCEVExpander hoisting address modes.
4592 // Unless the addressing mode will not be folded.
4593 if (!Ops.empty() && LU.Kind == LSRUse::Address &&
4594 isAMCompletelyFolded(TTI, LU, F)) {
4595 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4596 Ops.clear();
4597 Ops.push_back(SE.getUnknown(FullV));
4598 }
4599 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
4600 if (F.Scale != 1)
4601 ScaledS =
4602 SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
4603 Ops.push_back(ScaledS);
4604 }
4605 }
4606
4607 // Expand the GV portion.
4608 if (F.BaseGV) {
4609 // Flush the operand list to suppress SCEVExpander hoisting.
4610 if (!Ops.empty()) {
4611 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4612 Ops.clear();
4613 Ops.push_back(SE.getUnknown(FullV));
4614 }
4615 Ops.push_back(SE.getUnknown(F.BaseGV));
4616 }
4617
4618 // Flush the operand list to suppress SCEVExpander hoisting of both folded and
4619 // unfolded offsets. LSR assumes they both live next to their uses.
4620 if (!Ops.empty()) {
4621 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
4622 Ops.clear();
4623 Ops.push_back(SE.getUnknown(FullV));
4624 }
4625
4626 // Expand the immediate portion.
4627 int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
4628 if (Offset != 0) {
4629 if (LU.Kind == LSRUse::ICmpZero) {
4630 // The other interesting way of "folding" with an ICmpZero is to use a
4631 // negated immediate.
4632 if (!ICmpScaledV)
4633 ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
4634 else {
4635 Ops.push_back(SE.getUnknown(ICmpScaledV));
4636 ICmpScaledV = ConstantInt::get(IntTy, Offset);
4637 }
4638 } else {
4639 // Just add the immediate values. These again are expected to be matched
4640 // as part of the address.
4641 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
4642 }
4643 }
4644
4645 // Expand the unfolded offset portion.
4646 int64_t UnfoldedOffset = F.UnfoldedOffset;
4647 if (UnfoldedOffset != 0) {
4648 // Just add the immediate values.
4649 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
4650 UnfoldedOffset)));
4651 }
4652
4653 // Emit instructions summing all the operands.
4654 const SCEV *FullS = Ops.empty() ?
4655 SE.getConstant(IntTy, 0) :
4656 SE.getAddExpr(Ops);
4657 Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
4658
4659 // We're done expanding now, so reset the rewriter.
4660 Rewriter.clearPostInc();
4661
4662 // An ICmpZero Formula represents an ICmp which we're handling as a
4663 // comparison against zero. Now that we've expanded an expression for that
4664 // form, update the ICmp's other operand.
4665 if (LU.Kind == LSRUse::ICmpZero) {
4666 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4667 DeadInsts.push_back(CI->getOperand(1));
4668 assert(!F.BaseGV && "ICmp does not support folding a global value and "((!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("!F.BaseGV && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4669, __PRETTY_FUNCTION__))
4669 "a scale at the same time!")((!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("!F.BaseGV && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4669, __PRETTY_FUNCTION__))
;
4670 if (F.Scale == -1) {
4671 if (ICmpScaledV->getType() != OpTy) {
4672 Instruction *Cast =
4673 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
4674 OpTy, false),
4675 ICmpScaledV, OpTy, "tmp", CI);
4676 ICmpScaledV = Cast;
4677 }
4678 CI->setOperand(1, ICmpScaledV);
4679 } else {
4680 // A scale of 1 means that the scale has been expanded as part of the
4681 // base regs.
4682 assert((F.Scale == 0 || F.Scale == 1) &&(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4684, __PRETTY_FUNCTION__))
4683 "ICmp does not support folding a global value and "(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4684, __PRETTY_FUNCTION__))
4684 "a scale at the same time!")(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4684, __PRETTY_FUNCTION__))
;
4685 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
4686 -(uint64_t)Offset);
4687 if (C->getType() != OpTy)
4688 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
4689 OpTy, false),
4690 C, OpTy);
4691
4692 CI->setOperand(1, C);
4693 }
4694 }
4695
4696 return FullV;
4697}
4698
4699/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
4700/// of their operands effectively happens in their predecessor blocks, so the
4701/// expression may need to be expanded in multiple places.
4702void LSRInstance::RewriteForPHI(PHINode *PN,
4703 const LSRFixup &LF,
4704 const Formula &F,
4705 SCEVExpander &Rewriter,
4706 SmallVectorImpl<WeakVH> &DeadInsts,
4707 Pass *P) const {
4708 DenseMap<BasicBlock *, Value *> Inserted;
4709 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
4710 if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
4711 BasicBlock *BB = PN->getIncomingBlock(i);
4712
4713 // If this is a critical edge, split the edge so that we do not insert
4714 // the code on all predecessor/successor paths. We do this unless this
4715 // is the canonical backedge for this loop, which complicates post-inc
4716 // users.
4717 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
4718 !isa<IndirectBrInst>(BB->getTerminator())) {
4719 BasicBlock *Parent = PN->getParent();
4720 Loop *PNLoop = LI.getLoopFor(Parent);
4721 if (!PNLoop || Parent != PNLoop->getHeader()) {
4722 // Split the critical edge.
4723 BasicBlock *NewBB = nullptr;
4724 if (!Parent->isLandingPad()) {
4725 NewBB = SplitCriticalEdge(BB, Parent,
4726 CriticalEdgeSplittingOptions(&DT, &LI)
4727 .setMergeIdenticalEdges()
4728 .setDontDeleteUselessPHIs());
4729 } else {
4730 SmallVector<BasicBlock*, 2> NewBBs;
4731 SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
4732 /*AliasAnalysis*/ nullptr, &DT, &LI);
4733 NewBB = NewBBs[0];
4734 }
4735 // If NewBB==NULL, then SplitCriticalEdge refused to split because all
4736 // phi predecessors are identical. The simple thing to do is skip
4737 // splitting in this case rather than complicate the API.
4738 if (NewBB) {
4739 // If PN is outside of the loop and BB is in the loop, we want to
4740 // move the block to be immediately before the PHI block, not
4741 // immediately after BB.
4742 if (L->contains(BB) && !L->contains(PN))
4743 NewBB->moveBefore(PN->getParent());
4744
4745 // Splitting the edge can reduce the number of PHI entries we have.
4746 e = PN->getNumIncomingValues();
4747 BB = NewBB;
4748 i = PN->getBasicBlockIndex(BB);
4749 }
4750 }
4751 }
4752
4753 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
4754 Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
4755 if (!Pair.second)
4756 PN->setIncomingValue(i, Pair.first->second);
4757 else {
4758 Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
4759
4760 // If this is reuse-by-noop-cast, insert the noop cast.
4761 Type *OpTy = LF.OperandValToReplace->getType();
4762 if (FullV->getType() != OpTy)
4763 FullV =
4764 CastInst::Create(CastInst::getCastOpcode(FullV, false,
4765 OpTy, false),
4766 FullV, LF.OperandValToReplace->getType(),
4767 "tmp", BB->getTerminator());
4768
4769 PN->setIncomingValue(i, FullV);
4770 Pair.first->second = FullV;
4771 }
4772 }
4773}
4774
4775/// Rewrite - Emit instructions for the leading candidate expression for this
4776/// LSRUse (this is called "expanding"), and update the UserInst to reference
4777/// the newly expanded value.
4778void LSRInstance::Rewrite(const LSRFixup &LF,
4779 const Formula &F,
4780 SCEVExpander &Rewriter,
4781 SmallVectorImpl<WeakVH> &DeadInsts,
4782 Pass *P) const {
4783 // First, find an insertion point that dominates UserInst. For PHI nodes,
4784 // find the nearest block which dominates all the relevant uses.
4785 if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
4786 RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
4787 } else {
4788 Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
4789
4790 // If this is reuse-by-noop-cast, insert the noop cast.
4791 Type *OpTy = LF.OperandValToReplace->getType();
4792 if (FullV->getType() != OpTy) {
4793 Instruction *Cast =
4794 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
4795 FullV, OpTy, "tmp", LF.UserInst);
4796 FullV = Cast;
4797 }
4798
4799 // Update the user. ICmpZero is handled specially here (for now) because
4800 // Expand may have updated one of the operands of the icmp already, and
4801 // its new value may happen to be equal to LF.OperandValToReplace, in
4802 // which case doing replaceUsesOfWith leads to replacing both operands
4803 // with the same value. TODO: Reorganize this.
4804 if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
4805 LF.UserInst->setOperand(0, FullV);
4806 else
4807 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
4808 }
4809
4810 DeadInsts.push_back(LF.OperandValToReplace);
4811}
4812
4813/// ImplementSolution - Rewrite all the fixup locations with new values,
4814/// following the chosen solution.
4815void
4816LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
4817 Pass *P) {
4818 // Keep track of instructions we may have made dead, so that
4819 // we can remove them after we are done working.
4820 SmallVector<WeakVH, 16> DeadInsts;
4821
4822 SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
4823 "lsr");
4824#ifndef NDEBUG
4825 Rewriter.setDebugType(DEBUG_TYPE"loop-reduce");
4826#endif
4827 Rewriter.disableCanonicalMode();
4828 Rewriter.enableLSRMode();
4829 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
4830
4831 // Mark phi nodes that terminate chains so the expander tries to reuse them.
4832 for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
4833 ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
4834 if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
4835 Rewriter.setChainedPhi(PN);
4836 }
4837
4838 // Expand the new value definitions and update the users.
4839 for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
4840 E = Fixups.end(); I != E; ++I) {
4841 const LSRFixup &Fixup = *I;
4842
4843 Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
4844
4845 Changed = true;
4846 }
4847
4848 for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
4849 ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
4850 GenerateIVChain(*ChainI, Rewriter, DeadInsts);
4851 Changed = true;
4852 }
4853 // Clean up after ourselves. This must be done before deleting any
4854 // instructions.
4855 Rewriter.clear();
4856
4857 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
4858}
4859
4860LSRInstance::LSRInstance(Loop *L, Pass *P)
4861 : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
4862 DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
4863 LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
4864 TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
4865 *L->getHeader()->getParent())),
4866 L(L), Changed(false), IVIncInsertPos(nullptr) {
4867 // If LoopSimplify form is not available, stay out of trouble.
4868 if (!L->isLoopSimplifyForm())
4869 return;
4870
4871 // If there's no interesting work to be done, bail early.
4872 if (IU.empty()) return;
4873
4874 // If there's too much analysis to be done, bail early. We won't be able to
4875 // model the problem anyway.
4876 unsigned NumUsers = 0;
4877 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
4878 if (++NumUsers > MaxIVUsers) {
4879 DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR skipping loop, too many IV Users in "
<< *L << "\n"; } } while (0)
4880 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR skipping loop, too many IV Users in "
<< *L << "\n"; } } while (0)
;
4881 return;
4882 }
4883 }
4884
4885#ifndef NDEBUG
4886 // All dominating loops must have preheaders, or SCEVExpander may not be able
4887 // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
4888 //
4889 // IVUsers analysis should only create users that are dominated by simple loop
4890 // headers. Since this loop should dominate all of its users, its user list
4891 // should be empty if this loop itself is not within a simple loop nest.
4892 for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
4893 Rung; Rung = Rung->getIDom()) {
4894 BasicBlock *BB = Rung->getBlock();
4895 const Loop *DomLoop = LI.getLoopFor(BB);
4896 if (DomLoop && DomLoop->getHeader() == BB) {
4897 assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest")((DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest"
) ? static_cast<void> (0) : __assert_fail ("DomLoop->getLoopPreheader() && \"LSR needs a simplified loop nest\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4897, __PRETTY_FUNCTION__))
;
4898 }
4899 }
4900#endif // DEBUG
4901
4902 DEBUG(dbgs() << "\nLSR on loop ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (0)
4903 L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (0)
4904 dbgs() << ":\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (0)
;
4905
4906 // First, perform some low-level loop optimizations.
4907 OptimizeShadowIV();
4908 OptimizeLoopTermCond();
4909
4910 // If loop preparation eliminates all interesting IV users, bail.
4911 if (IU.empty()) return;
4912
4913 // Skip nested loops until we can model them better with formulae.
4914 if (!L->empty()) {
4915 DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR skipping outer loop "
<< *L << "\n"; } } while (0)
;
4916 return;
4917 }
4918
4919 // Start collecting data and preparing for the solver.
4920 CollectChains();
4921 CollectInterestingTypesAndFactors();
4922 CollectFixupsAndInitialFormulae();
4923 CollectLoopInvariantFixupsAndFormulae();
4924
4925 assert(!Uses.empty() && "IVUsers reported at least one use")((!Uses.empty() && "IVUsers reported at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Uses.empty() && \"IVUsers reported at least one use\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4925, __PRETTY_FUNCTION__))
;
4926 DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR found " << Uses
.size() << " uses:\n"; print_uses(dbgs()); } } while (0
)
4927 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR found " << Uses
.size() << " uses:\n"; print_uses(dbgs()); } } while (0
)
;
4928
4929 // Now use the reuse data to generate a bunch of interesting ways
4930 // to formulate the values needed for the uses.
4931 GenerateAllReuseFormulae();
4932
4933 FilterOutUndesirableDedicatedRegisters();
4934 NarrowSearchSpaceUsingHeuristics();
4935
4936 SmallVector<const Formula *, 8> Solution;
4937 Solve(Solution);
4938
4939 // Release memory that is no longer needed.
4940 Factors.clear();
4941 Types.clear();
4942 RegUses.clear();
4943
4944 if (Solution.empty())
4945 return;
4946
4947#ifndef NDEBUG
4948 // Formulae should be legal.
4949 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
4950 I != E; ++I) {
4951 const LSRUse &LU = *I;
4952 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
4953 JE = LU.Formulae.end();
4954 J != JE; ++J)
4955 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, *J) && "Illegal formula generated!") ? static_cast<
void> (0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, *J) && \"Illegal formula generated!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4956, __PRETTY_FUNCTION__))
4956 *J) && "Illegal formula generated!")((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, *J) && "Illegal formula generated!") ? static_cast<
void> (0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, *J) && \"Illegal formula generated!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4956, __PRETTY_FUNCTION__))
;
4957 };
4958#endif
4959
4960 // Now that we've decided what we want, make it so.
4961 ImplementSolution(Solution, P);
4962}
4963
4964void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
4965 if (Factors.empty() && Types.empty()) return;
4966
4967 OS << "LSR has identified the following interesting factors and types: ";
4968 bool First = true;
4969
4970 for (SmallSetVector<int64_t, 8>::const_iterator
4971 I = Factors.begin(), E = Factors.end(); I != E; ++I) {
4972 if (!First) OS << ", ";
4973 First = false;
4974 OS << '*' << *I;
4975 }
4976
4977 for (SmallSetVector<Type *, 4>::const_iterator
4978 I = Types.begin(), E = Types.end(); I != E; ++I) {
4979 if (!First) OS << ", ";
4980 First = false;
4981 OS << '(' << **I << ')';
4982 }
4983 OS << '\n';
4984}
4985
4986void LSRInstance::print_fixups(raw_ostream &OS) const {
4987 OS << "LSR is examining the following fixup sites:\n";
4988 for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
4989 E = Fixups.end(); I != E; ++I) {
4990 dbgs() << " ";
4991 I->print(OS);
4992 OS << '\n';
4993 }
4994}
4995
4996void LSRInstance::print_uses(raw_ostream &OS) const {
4997 OS << "LSR is examining the following uses:\n";
4998 for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
4999 E = Uses.end(); I != E; ++I) {
5000 const LSRUse &LU = *I;
5001 dbgs() << " ";
5002 LU.print(OS);
5003 OS << '\n';
5004 for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
5005 JE = LU.Formulae.end(); J != JE; ++J) {
5006 OS << " ";
5007 J->print(OS);
5008 OS << '\n';
5009 }
5010 }
5011}
5012
5013void LSRInstance::print(raw_ostream &OS) const {
5014 print_factors_and_types(OS);
5015 print_fixups(OS);
5016 print_uses(OS);
5017}
5018
5019#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5020void LSRInstance::dump() const {
5021 print(errs()); errs() << '\n';
5022}
5023#endif
5024
5025namespace {
5026
5027class LoopStrengthReduce : public LoopPass {
5028public:
5029 static char ID; // Pass ID, replacement for typeid
5030 LoopStrengthReduce();
5031
5032private:
5033 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
5034 void getAnalysisUsage(AnalysisUsage &AU) const override;
5035};
5036
5037}
5038
5039char LoopStrengthReduce::ID = 0;
5040INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",static void* initializeLoopStrengthReducePassOnce(PassRegistry
&Registry) {
5041 "Loop Strength Reduction", false, false)static void* initializeLoopStrengthReducePassOnce(PassRegistry
&Registry) {
5042INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
5043INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
5044INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)initializeScalarEvolutionPass(Registry);
5045INITIALIZE_PASS_DEPENDENCY(IVUsers)initializeIVUsersPass(Registry);
5046INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
5047INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
5048INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",PassInfo *PI = new PassInfo("Loop Strength Reduction", "loop-reduce"
, & LoopStrengthReduce ::ID, PassInfo::NormalCtor_t(callDefaultCtor
< LoopStrengthReduce >), false, false); Registry.registerPass
(*PI, true); return PI; } void llvm::initializeLoopStrengthReducePass
(PassRegistry &Registry) { static volatile sys::cas_flag initialized
= 0; sys::cas_flag old_val = sys::CompareAndSwap(&initialized
, 1, 0); if (old_val == 0) { initializeLoopStrengthReducePassOnce
(Registry); sys::MemoryFence(); AnnotateIgnoreWritesBegin("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049); AnnotateHappensBefore("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049, &initialized); initialized = 2; AnnotateIgnoreWritesEnd
("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049); } else { sys::cas_flag tmp = initialized; sys::MemoryFence
(); while (tmp != 2) { tmp = initialized; sys::MemoryFence();
} } AnnotateHappensAfter("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049, &initialized); }
5049 "Loop Strength Reduction", false, false)PassInfo *PI = new PassInfo("Loop Strength Reduction", "loop-reduce"
, & LoopStrengthReduce ::ID, PassInfo::NormalCtor_t(callDefaultCtor
< LoopStrengthReduce >), false, false); Registry.registerPass
(*PI, true); return PI; } void llvm::initializeLoopStrengthReducePass
(PassRegistry &Registry) { static volatile sys::cas_flag initialized
= 0; sys::cas_flag old_val = sys::CompareAndSwap(&initialized
, 1, 0); if (old_val == 0) { initializeLoopStrengthReducePassOnce
(Registry); sys::MemoryFence(); AnnotateIgnoreWritesBegin("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049); AnnotateHappensBefore("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049, &initialized); initialized = 2; AnnotateIgnoreWritesEnd
("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049); } else { sys::cas_flag tmp = initialized; sys::MemoryFence
(); while (tmp != 2) { tmp = initialized; sys::MemoryFence();
} } AnnotateHappensAfter("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn237945/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5049, &initialized); }
5050
5051
5052Pass *llvm::createLoopStrengthReducePass() {
5053 return new LoopStrengthReduce();
5054}
5055
5056LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
5057 initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
5058}
5059
5060void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
5061 // We split critical edges, so we change the CFG. However, we do update
5062 // many analyses if they are around.
5063 AU.addPreservedID(LoopSimplifyID);
5064
5065 AU.addRequired<LoopInfoWrapperPass>();
5066 AU.addPreserved<LoopInfoWrapperPass>();
5067 AU.addRequiredID(LoopSimplifyID);
5068 AU.addRequired<DominatorTreeWrapperPass>();
5069 AU.addPreserved<DominatorTreeWrapperPass>();
5070 AU.addRequired<ScalarEvolution>();
5071 AU.addPreserved<ScalarEvolution>();
5072 // Requiring LoopSimplify a second time here prevents IVUsers from running
5073 // twice, since LoopSimplify was invalidated by running ScalarEvolution.
5074 AU.addRequiredID(LoopSimplifyID);
5075 AU.addRequired<IVUsers>();
5076 AU.addPreserved<IVUsers>();
5077 AU.addRequired<TargetTransformInfoWrapperPass>();
5078}
5079
5080bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
5081 if (skipOptnoneFunction(L))
5082 return false;
5083
5084 bool Changed = false;
5085
5086 // Run the main LSR transformation.
5087 Changed |= LSRInstance(L, this).getChanged();
5088
5089 // Remove any extra phis created by processing inner loops.
5090 Changed |= DeleteDeadPHIs(L->getHeader());
5091 if (EnablePhiElim && L->isLoopSimplifyForm()) {
5092 SmallVector<WeakVH, 16> DeadInsts;
5093 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
5094 SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
5095#ifndef NDEBUG
5096 Rewriter.setDebugType(DEBUG_TYPE"loop-reduce");
5097#endif
5098 unsigned numFolded = Rewriter.replaceCongruentIVs(
5099 L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
5100 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5101 *L->getHeader()->getParent()));
5102 if (numFolded) {
5103 Changed = true;
5104 DeleteTriviallyDeadInstructions(DeadInsts);
5105 DeleteDeadPHIs(L->getHeader());
5106 }
5107 }
5108 return Changed;
5109}