Bug Summary

File:lib/Transforms/Scalar/LoopStrengthReduce.cpp
Warning:line 4475, column 5
Forming reference to null pointer

Annotated Source Code

1//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into forms suitable for efficient execution
12// on the target.
13//
14// This pass performs a strength reduction on array references inside loops that
15// have as one or more of their components the loop induction variable, it
16// rewrites expressions to take advantage of scaled-index addressing modes
17// available on the target, and it performs a variety of other optimizations
18// related to loop induction variables.
19//
20// Terminology note: this code has a lot of handling for "post-increment" or
21// "post-inc" users. This is not talking about post-increment addressing modes;
22// it is instead talking about code like this:
23//
24// %i = phi [ 0, %entry ], [ %i.next, %latch ]
25// ...
26// %i.next = add %i, 1
27// %c = icmp eq %i.next, %n
28//
29// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30// it's useful to think about these as the same register, with some uses using
31// the value of the register before the add and some using it after. In this
32// example, the icmp is a post-increment user, since it uses %i.next, which is
33// the value of the induction variable after the increment. The other common
34// case of post-increment users is users outside the loop.
35//
36// TODO: More sophistication in the way Formulae are generated and filtered.
37//
38// TODO: Handle multiple loops at a time.
39//
40// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41// of a GlobalValue?
42//
43// TODO: When truncation is free, truncate ICmp users' operands to make it a
44// smaller encoding (on x86 at least).
45//
46// TODO: When a negated register is used by an add (such as in a list of
47// multiple base registers, or as the increment expression in an addrec),
48// we may not actually need both reg and (-1 * reg) in registers; the
49// negation can be implemented by using a sub instead of an add. The
50// lack of support for taking this into consideration when making
51// register pressure decisions is partly worked around by the "Special"
52// use kind.
53//
54//===----------------------------------------------------------------------===//
55
56#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
57#include "llvm/ADT/APInt.h"
58#include "llvm/ADT/DenseMap.h"
59#include "llvm/ADT/DenseSet.h"
60#include "llvm/ADT/Hashing.h"
61#include "llvm/ADT/PointerIntPair.h"
62#include "llvm/ADT/STLExtras.h"
63#include "llvm/ADT/SetVector.h"
64#include "llvm/ADT/SmallBitVector.h"
65#include "llvm/ADT/SmallPtrSet.h"
66#include "llvm/ADT/SmallSet.h"
67#include "llvm/ADT/SmallVector.h"
68#include "llvm/Analysis/IVUsers.h"
69#include "llvm/Analysis/LoopInfo.h"
70#include "llvm/Analysis/LoopPass.h"
71#include "llvm/Analysis/ScalarEvolution.h"
72#include "llvm/Analysis/ScalarEvolutionExpander.h"
73#include "llvm/Analysis/ScalarEvolutionExpressions.h"
74#include "llvm/Analysis/ScalarEvolutionNormalization.h"
75#include "llvm/Analysis/TargetTransformInfo.h"
76#include "llvm/IR/BasicBlock.h"
77#include "llvm/IR/Constant.h"
78#include "llvm/IR/Constants.h"
79#include "llvm/IR/DerivedTypes.h"
80#include "llvm/IR/Dominators.h"
81#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/IRBuilder.h"
83#include "llvm/IR/Instruction.h"
84#include "llvm/IR/Instructions.h"
85#include "llvm/IR/IntrinsicInst.h"
86#include "llvm/IR/Module.h"
87#include "llvm/IR/OperandTraits.h"
88#include "llvm/IR/Operator.h"
89#include "llvm/IR/Type.h"
90#include "llvm/IR/Value.h"
91#include "llvm/IR/ValueHandle.h"
92#include "llvm/Pass.h"
93#include "llvm/Support/Casting.h"
94#include "llvm/Support/CommandLine.h"
95#include "llvm/Support/Compiler.h"
96#include "llvm/Support/Debug.h"
97#include "llvm/Support/ErrorHandling.h"
98#include "llvm/Support/MathExtras.h"
99#include "llvm/Support/raw_ostream.h"
100#include "llvm/Transforms/Scalar.h"
101#include "llvm/Transforms/Scalar/LoopPassManager.h"
102#include "llvm/Transforms/Utils/BasicBlockUtils.h"
103#include "llvm/Transforms/Utils/Local.h"
104#include <algorithm>
105#include <cassert>
106#include <cstddef>
107#include <cstdint>
108#include <cstdlib>
109#include <iterator>
110#include <map>
111#include <tuple>
112#include <utility>
113
114using namespace llvm;
115
116#define DEBUG_TYPE"loop-reduce" "loop-reduce"
117
118/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
119/// bail out. This threshold is far beyond the number of users that LSR can
120/// conceivably solve, so it should not affect generated code, but catches the
121/// worst cases before LSR burns too much compile time and stack space.
122static const unsigned MaxIVUsers = 200;
123
124// Temporary flag to cleanup congruent phis after LSR phi expansion.
125// It's currently disabled until we can determine whether it's truly useful or
126// not. The flag should be removed after the v3.0 release.
127// This is now needed for ivchains.
128static cl::opt<bool> EnablePhiElim(
129 "enable-lsr-phielim", cl::Hidden, cl::init(true),
130 cl::desc("Enable LSR phi elimination"));
131
132// The flag adds instruction count to solutions cost comparision.
133static cl::opt<bool> InsnsCost(
134 "lsr-insns-cost", cl::Hidden, cl::init(false),
135 cl::desc("Add instruction count to a LSR cost model"));
136
137// Flag to choose how to narrow complex lsr solution
138static cl::opt<bool> LSRExpNarrow(
139 "lsr-exp-narrow", cl::Hidden, cl::init(false),
140 cl::desc("Narrow LSR complex solution using"
141 " expectation of registers number"));
142
143#ifndef NDEBUG
144// Stress test IV chain generation.
145static cl::opt<bool> StressIVChain(
146 "stress-ivchain", cl::Hidden, cl::init(false),
147 cl::desc("Stress test LSR IV chains"));
148#else
149static bool StressIVChain = false;
150#endif
151
152namespace {
153
154struct MemAccessTy {
155 /// Used in situations where the accessed memory type is unknown.
156 static const unsigned UnknownAddressSpace = ~0u;
157
158 Type *MemTy;
159 unsigned AddrSpace;
160
161 MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
162
163 MemAccessTy(Type *Ty, unsigned AS) :
164 MemTy(Ty), AddrSpace(AS) {}
165
166 bool operator==(MemAccessTy Other) const {
167 return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
168 }
169
170 bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
171
172 static MemAccessTy getUnknown(LLVMContext &Ctx,
173 unsigned AS = UnknownAddressSpace) {
174 return MemAccessTy(Type::getVoidTy(Ctx), AS);
175 }
176};
177
178/// This class holds data which is used to order reuse candidates.
179class RegSortData {
180public:
181 /// This represents the set of LSRUse indices which reference
182 /// a particular register.
183 SmallBitVector UsedByIndices;
184
185 void print(raw_ostream &OS) const;
186 void dump() const;
187};
188
189} // end anonymous namespace
190
191void RegSortData::print(raw_ostream &OS) const {
192 OS << "[NumUses=" << UsedByIndices.count() << ']';
193}
194
195#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
196LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void RegSortData::dump() const {
197 print(errs()); errs() << '\n';
198}
199#endif
200
201namespace {
202
203/// Map register candidates to information about how they are used.
204class RegUseTracker {
205 typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
206
207 RegUsesTy RegUsesMap;
208 SmallVector<const SCEV *, 16> RegSequence;
209
210public:
211 void countRegister(const SCEV *Reg, size_t LUIdx);
212 void dropRegister(const SCEV *Reg, size_t LUIdx);
213 void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
214
215 bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
216
217 const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
218
219 void clear();
220
221 typedef SmallVectorImpl<const SCEV *>::iterator iterator;
222 typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
223 iterator begin() { return RegSequence.begin(); }
224 iterator end() { return RegSequence.end(); }
225 const_iterator begin() const { return RegSequence.begin(); }
226 const_iterator end() const { return RegSequence.end(); }
227};
228
229} // end anonymous namespace
230
231void
232RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
233 std::pair<RegUsesTy::iterator, bool> Pair =
234 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
235 RegSortData &RSD = Pair.first->second;
236 if (Pair.second)
237 RegSequence.push_back(Reg);
238 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
239 RSD.UsedByIndices.set(LUIdx);
240}
241
242void
243RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
244 RegUsesTy::iterator It = RegUsesMap.find(Reg);
245 assert(It != RegUsesMap.end())((It != RegUsesMap.end()) ? static_cast<void> (0) : __assert_fail
("It != RegUsesMap.end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 245, __PRETTY_FUNCTION__))
;
246 RegSortData &RSD = It->second;
247 assert(RSD.UsedByIndices.size() > LUIdx)((RSD.UsedByIndices.size() > LUIdx) ? static_cast<void>
(0) : __assert_fail ("RSD.UsedByIndices.size() > LUIdx", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 247, __PRETTY_FUNCTION__))
;
248 RSD.UsedByIndices.reset(LUIdx);
249}
250
251void
252RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
253 assert(LUIdx <= LastLUIdx)((LUIdx <= LastLUIdx) ? static_cast<void> (0) : __assert_fail
("LUIdx <= LastLUIdx", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 253, __PRETTY_FUNCTION__))
;
254
255 // Update RegUses. The data structure is not optimized for this purpose;
256 // we must iterate through it and update each of the bit vectors.
257 for (auto &Pair : RegUsesMap) {
258 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
259 if (LUIdx < UsedByIndices.size())
260 UsedByIndices[LUIdx] =
261 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
262 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
263 }
264}
265
266bool
267RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
268 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
269 if (I == RegUsesMap.end())
270 return false;
271 const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
272 int i = UsedByIndices.find_first();
273 if (i == -1) return false;
274 if ((size_t)i != LUIdx) return true;
275 return UsedByIndices.find_next(i) != -1;
276}
277
278const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
279 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
280 assert(I != RegUsesMap.end() && "Unknown register!")((I != RegUsesMap.end() && "Unknown register!") ? static_cast
<void> (0) : __assert_fail ("I != RegUsesMap.end() && \"Unknown register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 280, __PRETTY_FUNCTION__))
;
281 return I->second.UsedByIndices;
282}
283
284void RegUseTracker::clear() {
285 RegUsesMap.clear();
286 RegSequence.clear();
287}
288
289namespace {
290
291/// This class holds information that describes a formula for computing
292/// satisfying a use. It may include broken-out immediates and scaled registers.
293struct Formula {
294 /// Global base address used for complex addressing.
295 GlobalValue *BaseGV;
296
297 /// Base offset for complex addressing.
298 int64_t BaseOffset;
299
300 /// Whether any complex addressing has a base register.
301 bool HasBaseReg;
302
303 /// The scale of any complex addressing.
304 int64_t Scale;
305
306 /// The list of "base" registers for this use. When this is non-empty. The
307 /// canonical representation of a formula is
308 /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
309 /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
310 /// 3. The reg containing recurrent expr related with currect loop in the
311 /// formula should be put in the ScaledReg.
312 /// #1 enforces that the scaled register is always used when at least two
313 /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
314 /// #2 enforces that 1 * reg is reg.
315 /// #3 ensures invariant regs with respect to current loop can be combined
316 /// together in LSR codegen.
317 /// This invariant can be temporarly broken while building a formula.
318 /// However, every formula inserted into the LSRInstance must be in canonical
319 /// form.
320 SmallVector<const SCEV *, 4> BaseRegs;
321
322 /// The 'scaled' register for this use. This should be non-null when Scale is
323 /// not zero.
324 const SCEV *ScaledReg;
325
326 /// An additional constant offset which added near the use. This requires a
327 /// temporary register, but the offset itself can live in an add immediate
328 /// field rather than a register.
329 int64_t UnfoldedOffset;
330
331 Formula()
332 : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
333 ScaledReg(nullptr), UnfoldedOffset(0) {}
334
335 void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
336
337 bool isCanonical(const Loop &L) const;
338
339 void canonicalize(const Loop &L);
340
341 bool unscale();
342
343 bool hasZeroEnd() const;
344
345 size_t getNumRegs() const;
346 Type *getType() const;
347
348 void deleteBaseReg(const SCEV *&S);
349
350 bool referencesReg(const SCEV *S) const;
351 bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
352 const RegUseTracker &RegUses) const;
353
354 void print(raw_ostream &OS) const;
355 void dump() const;
356};
357
358} // end anonymous namespace
359
360/// Recursion helper for initialMatch.
361static void DoInitialMatch(const SCEV *S, Loop *L,
362 SmallVectorImpl<const SCEV *> &Good,
363 SmallVectorImpl<const SCEV *> &Bad,
364 ScalarEvolution &SE) {
365 // Collect expressions which properly dominate the loop header.
366 if (SE.properlyDominates(S, L->getHeader())) {
367 Good.push_back(S);
368 return;
369 }
370
371 // Look at add operands.
372 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
373 for (const SCEV *S : Add->operands())
374 DoInitialMatch(S, L, Good, Bad, SE);
375 return;
376 }
377
378 // Look at addrec operands.
379 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
380 if (!AR->getStart()->isZero() && AR->isAffine()) {
381 DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
382 DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
383 AR->getStepRecurrence(SE),
384 // FIXME: AR->getNoWrapFlags()
385 AR->getLoop(), SCEV::FlagAnyWrap),
386 L, Good, Bad, SE);
387 return;
388 }
389
390 // Handle a multiplication by -1 (negation) if it didn't fold.
391 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
392 if (Mul->getOperand(0)->isAllOnesValue()) {
393 SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
394 const SCEV *NewMul = SE.getMulExpr(Ops);
395
396 SmallVector<const SCEV *, 4> MyGood;
397 SmallVector<const SCEV *, 4> MyBad;
398 DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
399 const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
400 SE.getEffectiveSCEVType(NewMul->getType())));
401 for (const SCEV *S : MyGood)
402 Good.push_back(SE.getMulExpr(NegOne, S));
403 for (const SCEV *S : MyBad)
404 Bad.push_back(SE.getMulExpr(NegOne, S));
405 return;
406 }
407
408 // Ok, we can't do anything interesting. Just stuff the whole thing into a
409 // register and hope for the best.
410 Bad.push_back(S);
411}
412
413/// Incorporate loop-variant parts of S into this Formula, attempting to keep
414/// all loop-invariant and loop-computable values in a single base register.
415void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
416 SmallVector<const SCEV *, 4> Good;
417 SmallVector<const SCEV *, 4> Bad;
418 DoInitialMatch(S, L, Good, Bad, SE);
419 if (!Good.empty()) {
420 const SCEV *Sum = SE.getAddExpr(Good);
421 if (!Sum->isZero())
422 BaseRegs.push_back(Sum);
423 HasBaseReg = true;
424 }
425 if (!Bad.empty()) {
426 const SCEV *Sum = SE.getAddExpr(Bad);
427 if (!Sum->isZero())
428 BaseRegs.push_back(Sum);
429 HasBaseReg = true;
430 }
431 canonicalize(*L);
432}
433
434/// \brief Check whether or not this formula statisfies the canonical
435/// representation.
436/// \see Formula::BaseRegs.
437bool Formula::isCanonical(const Loop &L) const {
438 if (!ScaledReg)
439 return BaseRegs.size() <= 1;
440
441 if (Scale != 1)
442 return true;
443
444 if (Scale == 1 && BaseRegs.empty())
445 return false;
446
447 const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
448 if (SAR && SAR->getLoop() == &L)
449 return true;
450
451 // If ScaledReg is not a recurrent expr, or it is but its loop is not current
452 // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
453 // loop, we want to swap the reg in BaseRegs with ScaledReg.
454 auto I =
455 find_if(make_range(BaseRegs.begin(), BaseRegs.end()), [&](const SCEV *S) {
456 return isa<const SCEVAddRecExpr>(S) &&
457 (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
458 });
459 return I == BaseRegs.end();
460}
461
462/// \brief Helper method to morph a formula into its canonical representation.
463/// \see Formula::BaseRegs.
464/// Every formula having more than one base register, must use the ScaledReg
465/// field. Otherwise, we would have to do special cases everywhere in LSR
466/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
467/// On the other hand, 1*reg should be canonicalized into reg.
468void Formula::canonicalize(const Loop &L) {
469 if (isCanonical(L))
470 return;
471 // So far we did not need this case. This is easy to implement but it is
472 // useless to maintain dead code. Beside it could hurt compile time.
473 assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.")((!BaseRegs.empty() && "1*reg => reg, should not be needed."
) ? static_cast<void> (0) : __assert_fail ("!BaseRegs.empty() && \"1*reg => reg, should not be needed.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 473, __PRETTY_FUNCTION__))
;
474
475 // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
476 if (!ScaledReg) {
477 ScaledReg = BaseRegs.back();
478 BaseRegs.pop_back();
479 Scale = 1;
480 }
481
482 // If ScaledReg is an invariant with respect to L, find the reg from
483 // BaseRegs containing the recurrent expr related with Loop L. Swap the
484 // reg with ScaledReg.
485 const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
486 if (!SAR || SAR->getLoop() != &L) {
487 auto I = find_if(make_range(BaseRegs.begin(), BaseRegs.end()),
488 [&](const SCEV *S) {
489 return isa<const SCEVAddRecExpr>(S) &&
490 (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
491 });
492 if (I != BaseRegs.end())
493 std::swap(ScaledReg, *I);
494 }
495}
496
497/// \brief Get rid of the scale in the formula.
498/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
499/// \return true if it was possible to get rid of the scale, false otherwise.
500/// \note After this operation the formula may not be in the canonical form.
501bool Formula::unscale() {
502 if (Scale != 1)
503 return false;
504 Scale = 0;
505 BaseRegs.push_back(ScaledReg);
506 ScaledReg = nullptr;
507 return true;
508}
509
510bool Formula::hasZeroEnd() const {
511 if (UnfoldedOffset || BaseOffset)
512 return false;
513 if (BaseRegs.size() != 1 || ScaledReg)
514 return false;
515 return true;
516}
517
518/// Return the total number of register operands used by this formula. This does
519/// not include register uses implied by non-constant addrec strides.
520size_t Formula::getNumRegs() const {
521 return !!ScaledReg + BaseRegs.size();
522}
523
524/// Return the type of this formula, if it has one, or null otherwise. This type
525/// is meaningless except for the bit size.
526Type *Formula::getType() const {
527 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
528 ScaledReg ? ScaledReg->getType() :
529 BaseGV ? BaseGV->getType() :
530 nullptr;
531}
532
533/// Delete the given base reg from the BaseRegs list.
534void Formula::deleteBaseReg(const SCEV *&S) {
535 if (&S != &BaseRegs.back())
536 std::swap(S, BaseRegs.back());
537 BaseRegs.pop_back();
538}
539
540/// Test if this formula references the given register.
541bool Formula::referencesReg(const SCEV *S) const {
542 return S == ScaledReg || is_contained(BaseRegs, S);
543}
544
545/// Test whether this formula uses registers which are used by uses other than
546/// the use with the given index.
547bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
548 const RegUseTracker &RegUses) const {
549 if (ScaledReg)
550 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
551 return true;
552 for (const SCEV *BaseReg : BaseRegs)
553 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
554 return true;
555 return false;
556}
557
558void Formula::print(raw_ostream &OS) const {
559 bool First = true;
560 if (BaseGV) {
561 if (!First) OS << " + "; else First = false;
562 BaseGV->printAsOperand(OS, /*PrintType=*/false);
563 }
564 if (BaseOffset != 0) {
565 if (!First) OS << " + "; else First = false;
566 OS << BaseOffset;
567 }
568 for (const SCEV *BaseReg : BaseRegs) {
569 if (!First) OS << " + "; else First = false;
570 OS << "reg(" << *BaseReg << ')';
571 }
572 if (HasBaseReg && BaseRegs.empty()) {
573 if (!First) OS << " + "; else First = false;
574 OS << "**error: HasBaseReg**";
575 } else if (!HasBaseReg && !BaseRegs.empty()) {
576 if (!First) OS << " + "; else First = false;
577 OS << "**error: !HasBaseReg**";
578 }
579 if (Scale != 0) {
580 if (!First) OS << " + "; else First = false;
581 OS << Scale << "*reg(";
582 if (ScaledReg)
583 OS << *ScaledReg;
584 else
585 OS << "<unknown>";
586 OS << ')';
587 }
588 if (UnfoldedOffset != 0) {
589 if (!First) OS << " + ";
590 OS << "imm(" << UnfoldedOffset << ')';
591 }
592}
593
594#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
595LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void Formula::dump() const {
596 print(errs()); errs() << '\n';
597}
598#endif
599
600/// Return true if the given addrec can be sign-extended without changing its
601/// value.
602static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
603 Type *WideTy =
604 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
605 return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
606}
607
608/// Return true if the given add can be sign-extended without changing its
609/// value.
610static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
611 Type *WideTy =
612 IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
613 return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
614}
615
616/// Return true if the given mul can be sign-extended without changing its
617/// value.
618static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
619 Type *WideTy =
620 IntegerType::get(SE.getContext(),
621 SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
622 return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
623}
624
625/// Return an expression for LHS /s RHS, if it can be determined and if the
626/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
627/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
628/// the multiplication may overflow, which is useful when the result will be
629/// used in a context where the most significant bits are ignored.
630static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
631 ScalarEvolution &SE,
632 bool IgnoreSignificantBits = false) {
633 // Handle the trivial case, which works for any SCEV type.
634 if (LHS == RHS)
635 return SE.getConstant(LHS->getType(), 1);
636
637 // Handle a few RHS special cases.
638 const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
639 if (RC) {
640 const APInt &RA = RC->getAPInt();
641 // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
642 // some folding.
643 if (RA.isAllOnesValue())
644 return SE.getMulExpr(LHS, RC);
645 // Handle x /s 1 as x.
646 if (RA == 1)
647 return LHS;
648 }
649
650 // Check for a division of a constant by a constant.
651 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
652 if (!RC)
653 return nullptr;
654 const APInt &LA = C->getAPInt();
655 const APInt &RA = RC->getAPInt();
656 if (LA.srem(RA) != 0)
657 return nullptr;
658 return SE.getConstant(LA.sdiv(RA));
659 }
660
661 // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
662 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
663 if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
664 const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
665 IgnoreSignificantBits);
666 if (!Step) return nullptr;
667 const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
668 IgnoreSignificantBits);
669 if (!Start) return nullptr;
670 // FlagNW is independent of the start value, step direction, and is
671 // preserved with smaller magnitude steps.
672 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
673 return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
674 }
675 return nullptr;
676 }
677
678 // Distribute the sdiv over add operands, if the add doesn't overflow.
679 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
680 if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
681 SmallVector<const SCEV *, 8> Ops;
682 for (const SCEV *S : Add->operands()) {
683 const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
684 if (!Op) return nullptr;
685 Ops.push_back(Op);
686 }
687 return SE.getAddExpr(Ops);
688 }
689 return nullptr;
690 }
691
692 // Check for a multiply operand that we can pull RHS out of.
693 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
694 if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
695 SmallVector<const SCEV *, 4> Ops;
696 bool Found = false;
697 for (const SCEV *S : Mul->operands()) {
698 if (!Found)
699 if (const SCEV *Q = getExactSDiv(S, RHS, SE,
700 IgnoreSignificantBits)) {
701 S = Q;
702 Found = true;
703 }
704 Ops.push_back(S);
705 }
706 return Found ? SE.getMulExpr(Ops) : nullptr;
707 }
708 return nullptr;
709 }
710
711 // Otherwise we don't know.
712 return nullptr;
713}
714
715/// If S involves the addition of a constant integer value, return that integer
716/// value, and mutate S to point to a new SCEV with that value excluded.
717static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
718 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
719 if (C->getAPInt().getMinSignedBits() <= 64) {
720 S = SE.getConstant(C->getType(), 0);
721 return C->getValue()->getSExtValue();
722 }
723 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
724 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
725 int64_t Result = ExtractImmediate(NewOps.front(), SE);
726 if (Result != 0)
727 S = SE.getAddExpr(NewOps);
728 return Result;
729 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
730 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
731 int64_t Result = ExtractImmediate(NewOps.front(), SE);
732 if (Result != 0)
733 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
734 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
735 SCEV::FlagAnyWrap);
736 return Result;
737 }
738 return 0;
739}
740
741/// If S involves the addition of a GlobalValue address, return that symbol, and
742/// mutate S to point to a new SCEV with that value excluded.
743static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
744 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
745 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
746 S = SE.getConstant(GV->getType(), 0);
747 return GV;
748 }
749 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
750 SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
751 GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
752 if (Result)
753 S = SE.getAddExpr(NewOps);
754 return Result;
755 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
756 SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
757 GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
758 if (Result)
759 S = SE.getAddRecExpr(NewOps, AR->getLoop(),
760 // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
761 SCEV::FlagAnyWrap);
762 return Result;
763 }
764 return nullptr;
765}
766
767/// Returns true if the specified instruction is using the specified value as an
768/// address.
769static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
770 bool isAddress = isa<LoadInst>(Inst);
771 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
772 if (SI->getPointerOperand() == OperandVal)
773 isAddress = true;
774 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
775 // Addressing modes can also be folded into prefetches and a variety
776 // of intrinsics.
777 switch (II->getIntrinsicID()) {
778 default: break;
779 case Intrinsic::prefetch:
780 if (II->getArgOperand(0) == OperandVal)
781 isAddress = true;
782 break;
783 }
784 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
785 if (RMW->getPointerOperand() == OperandVal)
786 isAddress = true;
787 } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
788 if (CmpX->getPointerOperand() == OperandVal)
789 isAddress = true;
790 }
791 return isAddress;
792}
793
794/// Return the type of the memory being accessed.
795static MemAccessTy getAccessType(const Instruction *Inst) {
796 MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
797 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
798 AccessTy.MemTy = SI->getOperand(0)->getType();
799 AccessTy.AddrSpace = SI->getPointerAddressSpace();
800 } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
801 AccessTy.AddrSpace = LI->getPointerAddressSpace();
802 } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
803 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
804 } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
805 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
806 }
807
808 // All pointers have the same requirements, so canonicalize them to an
809 // arbitrary pointer type to minimize variation.
810 if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
811 AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
812 PTy->getAddressSpace());
813
814 return AccessTy;
815}
816
817/// Return true if this AddRec is already a phi in its loop.
818static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
819 for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
820 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
821 if (SE.isSCEVable(PN->getType()) &&
822 (SE.getEffectiveSCEVType(PN->getType()) ==
823 SE.getEffectiveSCEVType(AR->getType())) &&
824 SE.getSCEV(PN) == AR)
825 return true;
826 }
827 return false;
828}
829
830/// Check if expanding this expression is likely to incur significant cost. This
831/// is tricky because SCEV doesn't track which expressions are actually computed
832/// by the current IR.
833///
834/// We currently allow expansion of IV increments that involve adds,
835/// multiplication by constants, and AddRecs from existing phis.
836///
837/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
838/// obvious multiple of the UDivExpr.
839static bool isHighCostExpansion(const SCEV *S,
840 SmallPtrSetImpl<const SCEV*> &Processed,
841 ScalarEvolution &SE) {
842 // Zero/One operand expressions
843 switch (S->getSCEVType()) {
844 case scUnknown:
845 case scConstant:
846 return false;
847 case scTruncate:
848 return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
849 Processed, SE);
850 case scZeroExtend:
851 return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
852 Processed, SE);
853 case scSignExtend:
854 return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
855 Processed, SE);
856 }
857
858 if (!Processed.insert(S).second)
859 return false;
860
861 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
862 for (const SCEV *S : Add->operands()) {
863 if (isHighCostExpansion(S, Processed, SE))
864 return true;
865 }
866 return false;
867 }
868
869 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
870 if (Mul->getNumOperands() == 2) {
871 // Multiplication by a constant is ok
872 if (isa<SCEVConstant>(Mul->getOperand(0)))
873 return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
874
875 // If we have the value of one operand, check if an existing
876 // multiplication already generates this expression.
877 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
878 Value *UVal = U->getValue();
879 for (User *UR : UVal->users()) {
880 // If U is a constant, it may be used by a ConstantExpr.
881 Instruction *UI = dyn_cast<Instruction>(UR);
882 if (UI && UI->getOpcode() == Instruction::Mul &&
883 SE.isSCEVable(UI->getType())) {
884 return SE.getSCEV(UI) == Mul;
885 }
886 }
887 }
888 }
889 }
890
891 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
892 if (isExistingPhi(AR, SE))
893 return false;
894 }
895
896 // Fow now, consider any other type of expression (div/mul/min/max) high cost.
897 return true;
898}
899
900/// If any of the instructions is the specified set are trivially dead, delete
901/// them and see if this makes any of their operands subsequently dead.
902static bool
903DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
904 bool Changed = false;
905
906 while (!DeadInsts.empty()) {
907 Value *V = DeadInsts.pop_back_val();
908 Instruction *I = dyn_cast_or_null<Instruction>(V);
909
910 if (!I || !isInstructionTriviallyDead(I))
911 continue;
912
913 for (Use &O : I->operands())
914 if (Instruction *U = dyn_cast<Instruction>(O)) {
915 O = nullptr;
916 if (U->use_empty())
917 DeadInsts.emplace_back(U);
918 }
919
920 I->eraseFromParent();
921 Changed = true;
922 }
923
924 return Changed;
925}
926
927namespace {
928
929class LSRUse;
930
931} // end anonymous namespace
932
933/// \brief Check if the addressing mode defined by \p F is completely
934/// folded in \p LU at isel time.
935/// This includes address-mode folding and special icmp tricks.
936/// This function returns true if \p LU can accommodate what \p F
937/// defines and up to 1 base + 1 scaled + offset.
938/// In other words, if \p F has several base registers, this function may
939/// still return true. Therefore, users still need to account for
940/// additional base registers and/or unfolded offsets to derive an
941/// accurate cost model.
942static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
943 const LSRUse &LU, const Formula &F);
944// Get the cost of the scaling factor used in F for LU.
945static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
946 const LSRUse &LU, const Formula &F,
947 const Loop &L);
948
949namespace {
950
951/// This class is used to measure and compare candidate formulae.
952class Cost {
953 /// TODO: Some of these could be merged. Also, a lexical ordering
954 /// isn't always optimal.
955 unsigned Insns;
956 unsigned NumRegs;
957 unsigned AddRecCost;
958 unsigned NumIVMuls;
959 unsigned NumBaseAdds;
960 unsigned ImmCost;
961 unsigned SetupCost;
962 unsigned ScaleCost;
963
964public:
965 Cost()
966 : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0),
967 ImmCost(0), SetupCost(0), ScaleCost(0) {}
968
969 bool operator<(const Cost &Other) const;
970
971 void Lose();
972
973#ifndef NDEBUG
974 // Once any of the metrics loses, they must all remain losers.
975 bool isValid() {
976 return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
977 | ImmCost | SetupCost | ScaleCost) != ~0u)
978 || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
979 & ImmCost & SetupCost & ScaleCost) == ~0u);
980 }
981#endif
982
983 bool isLoser() {
984 assert(isValid() && "invalid cost")((isValid() && "invalid cost") ? static_cast<void>
(0) : __assert_fail ("isValid() && \"invalid cost\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 984, __PRETTY_FUNCTION__))
;
985 return NumRegs == ~0u;
986 }
987
988 void RateFormula(const TargetTransformInfo &TTI,
989 const Formula &F,
990 SmallPtrSetImpl<const SCEV *> &Regs,
991 const DenseSet<const SCEV *> &VisitedRegs,
992 const Loop *L,
993 ScalarEvolution &SE, DominatorTree &DT,
994 const LSRUse &LU,
995 SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
996
997 void print(raw_ostream &OS) const;
998 void dump() const;
999
1000private:
1001 void RateRegister(const SCEV *Reg,
1002 SmallPtrSetImpl<const SCEV *> &Regs,
1003 const Loop *L,
1004 ScalarEvolution &SE, DominatorTree &DT);
1005 void RatePrimaryRegister(const SCEV *Reg,
1006 SmallPtrSetImpl<const SCEV *> &Regs,
1007 const Loop *L,
1008 ScalarEvolution &SE, DominatorTree &DT,
1009 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1010};
1011
1012/// An operand value in an instruction which is to be replaced with some
1013/// equivalent, possibly strength-reduced, replacement.
1014struct LSRFixup {
1015 /// The instruction which will be updated.
1016 Instruction *UserInst;
1017
1018 /// The operand of the instruction which will be replaced. The operand may be
1019 /// used more than once; every instance will be replaced.
1020 Value *OperandValToReplace;
1021
1022 /// If this user is to use the post-incremented value of an induction
1023 /// variable, this variable is non-null and holds the loop associated with the
1024 /// induction variable.
1025 PostIncLoopSet PostIncLoops;
1026
1027 /// A constant offset to be added to the LSRUse expression. This allows
1028 /// multiple fixups to share the same LSRUse with different offsets, for
1029 /// example in an unrolled loop.
1030 int64_t Offset;
1031
1032 bool isUseFullyOutsideLoop(const Loop *L) const;
1033
1034 LSRFixup();
1035
1036 void print(raw_ostream &OS) const;
1037 void dump() const;
1038};
1039
1040/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
1041/// SmallVectors of const SCEV*.
1042struct UniquifierDenseMapInfo {
1043 static SmallVector<const SCEV *, 4> getEmptyKey() {
1044 SmallVector<const SCEV *, 4> V;
1045 V.push_back(reinterpret_cast<const SCEV *>(-1));
1046 return V;
1047 }
1048
1049 static SmallVector<const SCEV *, 4> getTombstoneKey() {
1050 SmallVector<const SCEV *, 4> V;
1051 V.push_back(reinterpret_cast<const SCEV *>(-2));
1052 return V;
1053 }
1054
1055 static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
1056 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1057 }
1058
1059 static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
1060 const SmallVector<const SCEV *, 4> &RHS) {
1061 return LHS == RHS;
1062 }
1063};
1064
1065/// This class holds the state that LSR keeps for each use in IVUsers, as well
1066/// as uses invented by LSR itself. It includes information about what kinds of
1067/// things can be folded into the user, information about the user itself, and
1068/// information about how the use may be satisfied. TODO: Represent multiple
1069/// users of the same expression in common?
1070class LSRUse {
1071 DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
1072
1073public:
1074 /// An enum for a kind of use, indicating what types of scaled and immediate
1075 /// operands it might support.
1076 enum KindType {
1077 Basic, ///< A normal use, with no folding.
1078 Special, ///< A special case of basic, allowing -1 scales.
1079 Address, ///< An address use; folding according to TargetLowering
1080 ICmpZero ///< An equality icmp with both operands folded into one.
1081 // TODO: Add a generic icmp too?
1082 };
1083
1084 typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
1085
1086 KindType Kind;
1087 MemAccessTy AccessTy;
1088
1089 /// The list of operands which are to be replaced.
1090 SmallVector<LSRFixup, 8> Fixups;
1091
1092 /// Keep track of the min and max offsets of the fixups.
1093 int64_t MinOffset;
1094 int64_t MaxOffset;
1095
1096 /// This records whether all of the fixups using this LSRUse are outside of
1097 /// the loop, in which case some special-case heuristics may be used.
1098 bool AllFixupsOutsideLoop;
1099
1100 /// RigidFormula is set to true to guarantee that this use will be associated
1101 /// with a single formula--the one that initially matched. Some SCEV
1102 /// expressions cannot be expanded. This allows LSR to consider the registers
1103 /// used by those expressions without the need to expand them later after
1104 /// changing the formula.
1105 bool RigidFormula;
1106
1107 /// This records the widest use type for any fixup using this
1108 /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1109 /// fixup widths to be equivalent, because the narrower one may be relying on
1110 /// the implicit truncation to truncate away bogus bits.
1111 Type *WidestFixupType;
1112
1113 /// A list of ways to build a value that can satisfy this user. After the
1114 /// list is populated, one of these is selected heuristically and used to
1115 /// formulate a replacement for OperandValToReplace in UserInst.
1116 SmallVector<Formula, 12> Formulae;
1117
1118 /// The set of register candidates used by all formulae in this LSRUse.
1119 SmallPtrSet<const SCEV *, 4> Regs;
1120
1121 LSRUse(KindType K, MemAccessTy AT)
1122 : Kind(K), AccessTy(AT), MinOffset(INT64_MAX(9223372036854775807L)), MaxOffset(INT64_MIN(-9223372036854775807L -1)),
1123 AllFixupsOutsideLoop(true), RigidFormula(false),
1124 WidestFixupType(nullptr) {}
1125
1126 LSRFixup &getNewFixup() {
1127 Fixups.push_back(LSRFixup());
1128 return Fixups.back();
1129 }
1130
1131 void pushFixup(LSRFixup &f) {
1132 Fixups.push_back(f);
1133 if (f.Offset > MaxOffset)
1134 MaxOffset = f.Offset;
1135 if (f.Offset < MinOffset)
1136 MinOffset = f.Offset;
1137 }
1138
1139 bool HasFormulaWithSameRegs(const Formula &F) const;
1140 float getNotSelectedProbability(const SCEV *Reg) const;
1141 bool InsertFormula(const Formula &F, const Loop &L);
1142 void DeleteFormula(Formula &F);
1143 void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1144
1145 void print(raw_ostream &OS) const;
1146 void dump() const;
1147};
1148
1149} // end anonymous namespace
1150
1151/// Tally up interesting quantities from the given register.
1152void Cost::RateRegister(const SCEV *Reg,
1153 SmallPtrSetImpl<const SCEV *> &Regs,
1154 const Loop *L,
1155 ScalarEvolution &SE, DominatorTree &DT) {
1156 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1157 // If this is an addrec for another loop, it should be an invariant
1158 // with respect to L since L is the innermost loop (at least
1159 // for now LSR only handles innermost loops).
1160 if (AR->getLoop() != L) {
1161 // If the AddRec exists, consider it's register free and leave it alone.
1162 if (isExistingPhi(AR, SE))
1163 return;
1164
1165 // It is bad to allow LSR for current loop to add induction variables
1166 // for its sibling loops.
1167 if (!AR->getLoop()->contains(L)) {
1168 Lose();
1169 return;
1170 }
1171
1172 // Otherwise, it will be an invariant with respect to Loop L.
1173 ++NumRegs;
1174 return;
1175 }
1176 AddRecCost += 1; /// TODO: This should be a function of the stride.
1177
1178 // Add the step value register, if it needs one.
1179 // TODO: The non-affine case isn't precisely modeled here.
1180 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1181 if (!Regs.count(AR->getOperand(1))) {
1182 RateRegister(AR->getOperand(1), Regs, L, SE, DT);
1183 if (isLoser())
1184 return;
1185 }
1186 }
1187 }
1188 ++NumRegs;
1189
1190 // Rough heuristic; favor registers which don't require extra setup
1191 // instructions in the preheader.
1192 if (!isa<SCEVUnknown>(Reg) &&
1193 !isa<SCEVConstant>(Reg) &&
1194 !(isa<SCEVAddRecExpr>(Reg) &&
1195 (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
1196 isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
1197 ++SetupCost;
1198
1199 NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1200 SE.hasComputableLoopEvolution(Reg, L);
1201}
1202
1203/// Record this register in the set. If we haven't seen it before, rate
1204/// it. Optional LoserRegs provides a way to declare any formula that refers to
1205/// one of those regs an instant loser.
1206void Cost::RatePrimaryRegister(const SCEV *Reg,
1207 SmallPtrSetImpl<const SCEV *> &Regs,
1208 const Loop *L,
1209 ScalarEvolution &SE, DominatorTree &DT,
1210 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1211 if (LoserRegs && LoserRegs->count(Reg)) {
1212 Lose();
1213 return;
1214 }
1215 if (Regs.insert(Reg).second) {
1216 RateRegister(Reg, Regs, L, SE, DT);
1217 if (LoserRegs && isLoser())
1218 LoserRegs->insert(Reg);
1219 }
1220}
1221
1222void Cost::RateFormula(const TargetTransformInfo &TTI,
1223 const Formula &F,
1224 SmallPtrSetImpl<const SCEV *> &Regs,
1225 const DenseSet<const SCEV *> &VisitedRegs,
1226 const Loop *L,
1227 ScalarEvolution &SE, DominatorTree &DT,
1228 const LSRUse &LU,
1229 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1230 assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula")((F.isCanonical(*L) && "Cost is accurate only for canonical formula"
) ? static_cast<void> (0) : __assert_fail ("F.isCanonical(*L) && \"Cost is accurate only for canonical formula\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1230, __PRETTY_FUNCTION__))
;
1231 // Tally up the registers.
1232 unsigned PrevAddRecCost = AddRecCost;
1233 unsigned PrevNumRegs = NumRegs;
1234 unsigned PrevNumBaseAdds = NumBaseAdds;
1235 if (const SCEV *ScaledReg = F.ScaledReg) {
1236 if (VisitedRegs.count(ScaledReg)) {
1237 Lose();
1238 return;
1239 }
1240 RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
1241 if (isLoser())
1242 return;
1243 }
1244 for (const SCEV *BaseReg : F.BaseRegs) {
1245 if (VisitedRegs.count(BaseReg)) {
1246 Lose();
1247 return;
1248 }
1249 RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
1250 if (isLoser())
1251 return;
1252 }
1253
1254 // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
1255 // additional instruction (at least fill).
1256 unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
1257 if (NumRegs > TTIRegNum) {
1258 // Cost already exceeded TTIRegNum, then only newly added register can add
1259 // new instructions.
1260 if (PrevNumRegs > TTIRegNum)
1261 Insns += (NumRegs - PrevNumRegs);
1262 else
1263 Insns += (NumRegs - TTIRegNum);
1264 }
1265
1266 // Determine how many (unfolded) adds we'll need inside the loop.
1267 size_t NumBaseParts = F.getNumRegs();
1268 if (NumBaseParts > 1)
1269 // Do not count the base and a possible second register if the target
1270 // allows to fold 2 registers.
1271 NumBaseAdds +=
1272 NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1273 NumBaseAdds += (F.UnfoldedOffset != 0);
1274
1275 // Accumulate non-free scaling amounts.
1276 ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
1277
1278 // Tally up the non-zero immediates.
1279 for (const LSRFixup &Fixup : LU.Fixups) {
1280 int64_t O = Fixup.Offset;
1281 int64_t Offset = (uint64_t)O + F.BaseOffset;
1282 if (F.BaseGV)
1283 ImmCost += 64; // Handle symbolic values conservatively.
1284 // TODO: This should probably be the pointer size.
1285 else if (Offset != 0)
1286 ImmCost += APInt(64, Offset, true).getMinSignedBits();
1287
1288 // Check with target if this offset with this instruction is
1289 // specifically not supported.
1290 if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
1291 !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
1292 NumBaseAdds++;
1293 }
1294
1295 // If ICmpZero formula ends with not 0, it could not be replaced by
1296 // just add or sub. We'll need to compare final result of AddRec.
1297 // That means we'll need an additional instruction.
1298 // For -10 + {0, +, 1}:
1299 // i = i + 1;
1300 // cmp i, 10
1301 //
1302 // For {-10, +, 1}:
1303 // i = i + 1;
1304 if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
1305 Insns++;
1306 // Each new AddRec adds 1 instruction to calculation.
1307 Insns += (AddRecCost - PrevAddRecCost);
1308
1309 // BaseAdds adds instructions for unfolded registers.
1310 if (LU.Kind != LSRUse::ICmpZero)
1311 Insns += NumBaseAdds - PrevNumBaseAdds;
1312 assert(isValid() && "invalid cost")((isValid() && "invalid cost") ? static_cast<void>
(0) : __assert_fail ("isValid() && \"invalid cost\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1312, __PRETTY_FUNCTION__))
;
1313}
1314
1315/// Set this cost to a losing value.
1316void Cost::Lose() {
1317 Insns = ~0u;
1318 NumRegs = ~0u;
1319 AddRecCost = ~0u;
1320 NumIVMuls = ~0u;
1321 NumBaseAdds = ~0u;
1322 ImmCost = ~0u;
1323 SetupCost = ~0u;
1324 ScaleCost = ~0u;
1325}
1326
1327/// Choose the lower cost.
1328bool Cost::operator<(const Cost &Other) const {
1329 if (InsnsCost && Insns != Other.Insns)
1330 return Insns < Other.Insns;
1331 return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
1332 ImmCost, SetupCost) <
1333 std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
1334 Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
1335 Other.SetupCost);
1336}
1337
1338void Cost::print(raw_ostream &OS) const {
1339 OS << Insns << " instruction" << (Insns == 1 ? " " : "s ");
1340 OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1341 if (AddRecCost != 0)
1342 OS << ", with addrec cost " << AddRecCost;
1343 if (NumIVMuls != 0)
1344 OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1345 if (NumBaseAdds != 0)
1346 OS << ", plus " << NumBaseAdds << " base add"
1347 << (NumBaseAdds == 1 ? "" : "s");
1348 if (ScaleCost != 0)
1349 OS << ", plus " << ScaleCost << " scale cost";
1350 if (ImmCost != 0)
1351 OS << ", plus " << ImmCost << " imm cost";
1352 if (SetupCost != 0)
1353 OS << ", plus " << SetupCost << " setup cost";
1354}
1355
1356#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1357LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void Cost::dump() const {
1358 print(errs()); errs() << '\n';
1359}
1360#endif
1361
1362LSRFixup::LSRFixup()
1363 : UserInst(nullptr), OperandValToReplace(nullptr),
1364 Offset(0) {}
1365
1366/// Test whether this fixup always uses its value outside of the given loop.
1367bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1368 // PHI nodes use their value in their incoming blocks.
1369 if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1370 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1371 if (PN->getIncomingValue(i) == OperandValToReplace &&
1372 L->contains(PN->getIncomingBlock(i)))
1373 return false;
1374 return true;
1375 }
1376
1377 return !L->contains(UserInst);
1378}
1379
1380void LSRFixup::print(raw_ostream &OS) const {
1381 OS << "UserInst=";
1382 // Store is common and interesting enough to be worth special-casing.
1383 if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1384 OS << "store ";
1385 Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
1386 } else if (UserInst->getType()->isVoidTy())
1387 OS << UserInst->getOpcodeName();
1388 else
1389 UserInst->printAsOperand(OS, /*PrintType=*/false);
1390
1391 OS << ", OperandValToReplace=";
1392 OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
1393
1394 for (const Loop *PIL : PostIncLoops) {
1395 OS << ", PostIncLoop=";
1396 PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
1397 }
1398
1399 if (Offset != 0)
1400 OS << ", Offset=" << Offset;
1401}
1402
1403#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1404LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void LSRFixup::dump() const {
1405 print(errs()); errs() << '\n';
1406}
1407#endif
1408
1409/// Test whether this use as a formula which has the same registers as the given
1410/// formula.
1411bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1412 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1413 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1414 // Unstable sort by host order ok, because this is only used for uniquifying.
1415 std::sort(Key.begin(), Key.end());
1416 return Uniquifier.count(Key);
1417}
1418
1419/// The function returns a probability of selecting formula without Reg.
1420float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
1421 unsigned FNum = 0;
1422 for (const Formula &F : Formulae)
1423 if (F.referencesReg(Reg))
1424 FNum++;
1425 return ((float)(Formulae.size() - FNum)) / Formulae.size();
1426}
1427
1428/// If the given formula has not yet been inserted, add it to the list, and
1429/// return true. Return false otherwise. The formula must be in canonical form.
1430bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
1431 assert(F.isCanonical(L) && "Invalid canonical representation")((F.isCanonical(L) && "Invalid canonical representation"
) ? static_cast<void> (0) : __assert_fail ("F.isCanonical(L) && \"Invalid canonical representation\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1431, __PRETTY_FUNCTION__))
;
1432
1433 if (!Formulae.empty() && RigidFormula)
1434 return false;
1435
1436 SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1437 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1438 // Unstable sort by host order ok, because this is only used for uniquifying.
1439 std::sort(Key.begin(), Key.end());
1440
1441 if (!Uniquifier.insert(Key).second)
1442 return false;
1443
1444 // Using a register to hold the value of 0 is not profitable.
1445 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&(((!F.ScaledReg || !F.ScaledReg->isZero()) && "Zero allocated in a scaled register!"
) ? static_cast<void> (0) : __assert_fail ("(!F.ScaledReg || !F.ScaledReg->isZero()) && \"Zero allocated in a scaled register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1446, __PRETTY_FUNCTION__))
1446 "Zero allocated in a scaled register!")(((!F.ScaledReg || !F.ScaledReg->isZero()) && "Zero allocated in a scaled register!"
) ? static_cast<void> (0) : __assert_fail ("(!F.ScaledReg || !F.ScaledReg->isZero()) && \"Zero allocated in a scaled register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1446, __PRETTY_FUNCTION__))
;
1447#ifndef NDEBUG
1448 for (const SCEV *BaseReg : F.BaseRegs)
1449 assert(!BaseReg->isZero() && "Zero allocated in a base register!")((!BaseReg->isZero() && "Zero allocated in a base register!"
) ? static_cast<void> (0) : __assert_fail ("!BaseReg->isZero() && \"Zero allocated in a base register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1449, __PRETTY_FUNCTION__))
;
1450#endif
1451
1452 // Add the formula to the list.
1453 Formulae.push_back(F);
1454
1455 // Record registers now being used by this use.
1456 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1457 if (F.ScaledReg)
1458 Regs.insert(F.ScaledReg);
1459
1460 return true;
1461}
1462
1463/// Remove the given formula from this use's list.
1464void LSRUse::DeleteFormula(Formula &F) {
1465 if (&F != &Formulae.back())
1466 std::swap(F, Formulae.back());
1467 Formulae.pop_back();
1468}
1469
1470/// Recompute the Regs field, and update RegUses.
1471void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1472 // Now that we've filtered out some formulae, recompute the Regs set.
1473 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1474 Regs.clear();
1475 for (const Formula &F : Formulae) {
1476 if (F.ScaledReg) Regs.insert(F.ScaledReg);
1477 Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1478 }
1479
1480 // Update the RegTracker.
1481 for (const SCEV *S : OldRegs)
1482 if (!Regs.count(S))
1483 RegUses.dropRegister(S, LUIdx);
1484}
1485
1486void LSRUse::print(raw_ostream &OS) const {
1487 OS << "LSR Use: Kind=";
1488 switch (Kind) {
1489 case Basic: OS << "Basic"; break;
1490 case Special: OS << "Special"; break;
1491 case ICmpZero: OS << "ICmpZero"; break;
1492 case Address:
1493 OS << "Address of ";
1494 if (AccessTy.MemTy->isPointerTy())
1495 OS << "pointer"; // the full pointer type could be really verbose
1496 else {
1497 OS << *AccessTy.MemTy;
1498 }
1499
1500 OS << " in addrspace(" << AccessTy.AddrSpace << ')';
1501 }
1502
1503 OS << ", Offsets={";
1504 bool NeedComma = false;
1505 for (const LSRFixup &Fixup : Fixups) {
1506 if (NeedComma) OS << ',';
1507 OS << Fixup.Offset;
1508 NeedComma = true;
1509 }
1510 OS << '}';
1511
1512 if (AllFixupsOutsideLoop)
1513 OS << ", all-fixups-outside-loop";
1514
1515 if (WidestFixupType)
1516 OS << ", widest fixup type: " << *WidestFixupType;
1517}
1518
1519#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1520LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void LSRUse::dump() const {
1521 print(errs()); errs() << '\n';
1522}
1523#endif
1524
1525static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1526 LSRUse::KindType Kind, MemAccessTy AccessTy,
1527 GlobalValue *BaseGV, int64_t BaseOffset,
1528 bool HasBaseReg, int64_t Scale) {
1529 switch (Kind) {
1530 case LSRUse::Address:
1531 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
1532 HasBaseReg, Scale, AccessTy.AddrSpace);
1533
1534 case LSRUse::ICmpZero:
1535 // There's not even a target hook for querying whether it would be legal to
1536 // fold a GV into an ICmp.
1537 if (BaseGV)
1538 return false;
1539
1540 // ICmp only has two operands; don't allow more than two non-trivial parts.
1541 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1542 return false;
1543
1544 // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1545 // putting the scaled register in the other operand of the icmp.
1546 if (Scale != 0 && Scale != -1)
1547 return false;
1548
1549 // If we have low-level target information, ask the target if it can fold an
1550 // integer immediate on an icmp.
1551 if (BaseOffset != 0) {
1552 // We have one of:
1553 // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1554 // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1555 // Offs is the ICmp immediate.
1556 if (Scale == 0)
1557 // The cast does the right thing with INT64_MIN.
1558 BaseOffset = -(uint64_t)BaseOffset;
1559 return TTI.isLegalICmpImmediate(BaseOffset);
1560 }
1561
1562 // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1563 return true;
1564
1565 case LSRUse::Basic:
1566 // Only handle single-register values.
1567 return !BaseGV && Scale == 0 && BaseOffset == 0;
1568
1569 case LSRUse::Special:
1570 // Special case Basic to handle -1 scales.
1571 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1572 }
1573
1574 llvm_unreachable("Invalid LSRUse Kind!")::llvm::llvm_unreachable_internal("Invalid LSRUse Kind!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1574)
;
1575}
1576
1577static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1578 int64_t MinOffset, int64_t MaxOffset,
1579 LSRUse::KindType Kind, MemAccessTy AccessTy,
1580 GlobalValue *BaseGV, int64_t BaseOffset,
1581 bool HasBaseReg, int64_t Scale) {
1582 // Check for overflow.
1583 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1584 (MinOffset > 0))
1585 return false;
1586 MinOffset = (uint64_t)BaseOffset + MinOffset;
1587 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1588 (MaxOffset > 0))
1589 return false;
1590 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1591
1592 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
1593 HasBaseReg, Scale) &&
1594 isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
1595 HasBaseReg, Scale);
1596}
1597
1598static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1599 int64_t MinOffset, int64_t MaxOffset,
1600 LSRUse::KindType Kind, MemAccessTy AccessTy,
1601 const Formula &F, const Loop &L) {
1602 // For the purpose of isAMCompletelyFolded either having a canonical formula
1603 // or a scale not equal to zero is correct.
1604 // Problems may arise from non canonical formulae having a scale == 0.
1605 // Strictly speaking it would best to just rely on canonical formulae.
1606 // However, when we generate the scaled formulae, we first check that the
1607 // scaling factor is profitable before computing the actual ScaledReg for
1608 // compile time sake.
1609 assert((F.isCanonical(L) || F.Scale != 0))(((F.isCanonical(L) || F.Scale != 0)) ? static_cast<void>
(0) : __assert_fail ("(F.isCanonical(L) || F.Scale != 0)", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1609, __PRETTY_FUNCTION__))
;
1610 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1611 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1612}
1613
1614/// Test whether we know how to expand the current formula.
1615static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1616 int64_t MaxOffset, LSRUse::KindType Kind,
1617 MemAccessTy AccessTy, GlobalValue *BaseGV,
1618 int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
1619 // We know how to expand completely foldable formulae.
1620 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1621 BaseOffset, HasBaseReg, Scale) ||
1622 // Or formulae that use a base register produced by a sum of base
1623 // registers.
1624 (Scale == 1 &&
1625 isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1626 BaseGV, BaseOffset, true, 0));
1627}
1628
1629static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1630 int64_t MaxOffset, LSRUse::KindType Kind,
1631 MemAccessTy AccessTy, const Formula &F) {
1632 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1633 F.BaseOffset, F.HasBaseReg, F.Scale);
1634}
1635
1636static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1637 const LSRUse &LU, const Formula &F) {
1638 return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1639 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1640 F.Scale);
1641}
1642
1643static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
1644 const LSRUse &LU, const Formula &F,
1645 const Loop &L) {
1646 if (!F.Scale)
1647 return 0;
1648
1649 // If the use is not completely folded in that instruction, we will have to
1650 // pay an extra cost only for scale != 1.
1651 if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1652 LU.AccessTy, F, L))
1653 return F.Scale != 1;
1654
1655 switch (LU.Kind) {
1656 case LSRUse::Address: {
1657 // Check the scaling factor cost with both the min and max offsets.
1658 int ScaleCostMinOffset = TTI.getScalingFactorCost(
1659 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
1660 F.Scale, LU.AccessTy.AddrSpace);
1661 int ScaleCostMaxOffset = TTI.getScalingFactorCost(
1662 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
1663 F.Scale, LU.AccessTy.AddrSpace);
1664
1665 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&((ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >=
0 && "Legal addressing mode has an illegal cost!") ?
static_cast<void> (0) : __assert_fail ("ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && \"Legal addressing mode has an illegal cost!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1666, __PRETTY_FUNCTION__))
1666 "Legal addressing mode has an illegal cost!")((ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >=
0 && "Legal addressing mode has an illegal cost!") ?
static_cast<void> (0) : __assert_fail ("ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && \"Legal addressing mode has an illegal cost!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1666, __PRETTY_FUNCTION__))
;
1667 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1668 }
1669 case LSRUse::ICmpZero:
1670 case LSRUse::Basic:
1671 case LSRUse::Special:
1672 // The use is completely folded, i.e., everything is folded into the
1673 // instruction.
1674 return 0;
1675 }
1676
1677 llvm_unreachable("Invalid LSRUse Kind!")::llvm::llvm_unreachable_internal("Invalid LSRUse Kind!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1677)
;
1678}
1679
1680static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1681 LSRUse::KindType Kind, MemAccessTy AccessTy,
1682 GlobalValue *BaseGV, int64_t BaseOffset,
1683 bool HasBaseReg) {
1684 // Fast-path: zero is always foldable.
1685 if (BaseOffset == 0 && !BaseGV) return true;
1686
1687 // Conservatively, create an address with an immediate and a
1688 // base and a scale.
1689 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1690
1691 // Canonicalize a scale of 1 to a base register if the formula doesn't
1692 // already have a base register.
1693 if (!HasBaseReg && Scale == 1) {
1694 Scale = 0;
1695 HasBaseReg = true;
1696 }
1697
1698 return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
1699 HasBaseReg, Scale);
1700}
1701
1702static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1703 ScalarEvolution &SE, int64_t MinOffset,
1704 int64_t MaxOffset, LSRUse::KindType Kind,
1705 MemAccessTy AccessTy, const SCEV *S,
1706 bool HasBaseReg) {
1707 // Fast-path: zero is always foldable.
1708 if (S->isZero()) return true;
1709
1710 // Conservatively, create an address with an immediate and a
1711 // base and a scale.
1712 int64_t BaseOffset = ExtractImmediate(S, SE);
1713 GlobalValue *BaseGV = ExtractSymbol(S, SE);
1714
1715 // If there's anything else involved, it's not foldable.
1716 if (!S->isZero()) return false;
1717
1718 // Fast-path: zero is always foldable.
1719 if (BaseOffset == 0 && !BaseGV) return true;
1720
1721 // Conservatively, create an address with an immediate and a
1722 // base and a scale.
1723 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1724
1725 return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1726 BaseOffset, HasBaseReg, Scale);
1727}
1728
1729namespace {
1730
1731/// An individual increment in a Chain of IV increments. Relate an IV user to
1732/// an expression that computes the IV it uses from the IV used by the previous
1733/// link in the Chain.
1734///
1735/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1736/// original IVOperand. The head of the chain's IVOperand is only valid during
1737/// chain collection, before LSR replaces IV users. During chain generation,
1738/// IncExpr can be used to find the new IVOperand that computes the same
1739/// expression.
1740struct IVInc {
1741 Instruction *UserInst;
1742 Value* IVOperand;
1743 const SCEV *IncExpr;
1744
1745 IVInc(Instruction *U, Value *O, const SCEV *E):
1746 UserInst(U), IVOperand(O), IncExpr(E) {}
1747};
1748
1749// The list of IV increments in program order. We typically add the head of a
1750// chain without finding subsequent links.
1751struct IVChain {
1752 SmallVector<IVInc,1> Incs;
1753 const SCEV *ExprBase;
1754
1755 IVChain() : ExprBase(nullptr) {}
1756
1757 IVChain(const IVInc &Head, const SCEV *Base)
1758 : Incs(1, Head), ExprBase(Base) {}
1759
1760 typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
1761
1762 // Return the first increment in the chain.
1763 const_iterator begin() const {
1764 assert(!Incs.empty())((!Incs.empty()) ? static_cast<void> (0) : __assert_fail
("!Incs.empty()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 1764, __PRETTY_FUNCTION__))
;
1765 return std::next(Incs.begin());
1766 }
1767 const_iterator end() const {
1768 return Incs.end();
1769 }
1770
1771 // Returns true if this chain contains any increments.
1772 bool hasIncs() const { return Incs.size() >= 2; }
1773
1774 // Add an IVInc to the end of this chain.
1775 void add(const IVInc &X) { Incs.push_back(X); }
1776
1777 // Returns the last UserInst in the chain.
1778 Instruction *tailUserInst() const { return Incs.back().UserInst; }
1779
1780 // Returns true if IncExpr can be profitably added to this chain.
1781 bool isProfitableIncrement(const SCEV *OperExpr,
1782 const SCEV *IncExpr,
1783 ScalarEvolution&);
1784};
1785
1786/// Helper for CollectChains to track multiple IV increment uses. Distinguish
1787/// between FarUsers that definitely cross IV increments and NearUsers that may
1788/// be used between IV increments.
1789struct ChainUsers {
1790 SmallPtrSet<Instruction*, 4> FarUsers;
1791 SmallPtrSet<Instruction*, 4> NearUsers;
1792};
1793
1794/// This class holds state for the main loop strength reduction logic.
1795class LSRInstance {
1796 IVUsers &IU;
1797 ScalarEvolution &SE;
1798 DominatorTree &DT;
1799 LoopInfo &LI;
1800 const TargetTransformInfo &TTI;
1801 Loop *const L;
1802 bool Changed;
1803
1804 /// This is the insert position that the current loop's induction variable
1805 /// increment should be placed. In simple loops, this is the latch block's
1806 /// terminator. But in more complicated cases, this is a position which will
1807 /// dominate all the in-loop post-increment users.
1808 Instruction *IVIncInsertPos;
1809
1810 /// Interesting factors between use strides.
1811 ///
1812 /// We explicitly use a SetVector which contains a SmallSet, instead of the
1813 /// default, a SmallDenseSet, because we need to use the full range of
1814 /// int64_ts, and there's currently no good way of doing that with
1815 /// SmallDenseSet.
1816 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
1817
1818 /// Interesting use types, to facilitate truncation reuse.
1819 SmallSetVector<Type *, 4> Types;
1820
1821 /// The list of interesting uses.
1822 SmallVector<LSRUse, 16> Uses;
1823
1824 /// Track which uses use which register candidates.
1825 RegUseTracker RegUses;
1826
1827 // Limit the number of chains to avoid quadratic behavior. We don't expect to
1828 // have more than a few IV increment chains in a loop. Missing a Chain falls
1829 // back to normal LSR behavior for those uses.
1830 static const unsigned MaxChains = 8;
1831
1832 /// IV users can form a chain of IV increments.
1833 SmallVector<IVChain, MaxChains> IVChainVec;
1834
1835 /// IV users that belong to profitable IVChains.
1836 SmallPtrSet<Use*, MaxChains> IVIncSet;
1837
1838 void OptimizeShadowIV();
1839 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1840 ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1841 void OptimizeLoopTermCond();
1842
1843 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1844 SmallVectorImpl<ChainUsers> &ChainUsersVec);
1845 void FinalizeChain(IVChain &Chain);
1846 void CollectChains();
1847 void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1848 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
1849
1850 void CollectInterestingTypesAndFactors();
1851 void CollectFixupsAndInitialFormulae();
1852
1853 // Support for sharing of LSRUses between LSRFixups.
1854 typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
1855 UseMapTy UseMap;
1856
1857 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
1858 LSRUse::KindType Kind, MemAccessTy AccessTy);
1859
1860 std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
1861 MemAccessTy AccessTy);
1862
1863 void DeleteUse(LSRUse &LU, size_t LUIdx);
1864
1865 LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
1866
1867 void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1868 void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1869 void CountRegisters(const Formula &F, size_t LUIdx);
1870 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1871
1872 void CollectLoopInvariantFixupsAndFormulae();
1873
1874 void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1875 unsigned Depth = 0);
1876
1877 void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
1878 const Formula &Base, unsigned Depth,
1879 size_t Idx, bool IsScaledReg = false);
1880 void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
1881 void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1882 const Formula &Base, size_t Idx,
1883 bool IsScaledReg = false);
1884 void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1885 void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1886 const Formula &Base,
1887 const SmallVectorImpl<int64_t> &Worklist,
1888 size_t Idx, bool IsScaledReg = false);
1889 void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1890 void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1891 void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1892 void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1893 void GenerateCrossUseConstantOffsets();
1894 void GenerateAllReuseFormulae();
1895
1896 void FilterOutUndesirableDedicatedRegisters();
1897
1898 size_t EstimateSearchSpaceComplexity() const;
1899 void NarrowSearchSpaceByDetectingSupersets();
1900 void NarrowSearchSpaceByCollapsingUnrolledCode();
1901 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1902 void NarrowSearchSpaceByDeletingCostlyFormulas();
1903 void NarrowSearchSpaceByPickingWinnerRegs();
1904 void NarrowSearchSpaceUsingHeuristics();
1905
1906 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1907 Cost &SolutionCost,
1908 SmallVectorImpl<const Formula *> &Workspace,
1909 const Cost &CurCost,
1910 const SmallPtrSet<const SCEV *, 16> &CurRegs,
1911 DenseSet<const SCEV *> &VisitedRegs) const;
1912 void Solve(SmallVectorImpl<const Formula *> &Solution) const;
1913
1914 BasicBlock::iterator
1915 HoistInsertPosition(BasicBlock::iterator IP,
1916 const SmallVectorImpl<Instruction *> &Inputs) const;
1917 BasicBlock::iterator
1918 AdjustInsertPositionForExpand(BasicBlock::iterator IP,
1919 const LSRFixup &LF,
1920 const LSRUse &LU,
1921 SCEVExpander &Rewriter) const;
1922
1923 Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
1924 BasicBlock::iterator IP, SCEVExpander &Rewriter,
1925 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
1926 void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
1927 const Formula &F, SCEVExpander &Rewriter,
1928 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
1929 void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
1930 SCEVExpander &Rewriter,
1931 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
1932 void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
1933
1934public:
1935 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
1936 LoopInfo &LI, const TargetTransformInfo &TTI);
1937
1938 bool getChanged() const { return Changed; }
1939
1940 void print_factors_and_types(raw_ostream &OS) const;
1941 void print_fixups(raw_ostream &OS) const;
1942 void print_uses(raw_ostream &OS) const;
1943 void print(raw_ostream &OS) const;
1944 void dump() const;
1945};
1946
1947} // end anonymous namespace
1948
1949/// If IV is used in a int-to-float cast inside the loop then try to eliminate
1950/// the cast operation.
1951void LSRInstance::OptimizeShadowIV() {
1952 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1953 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1954 return;
1955
1956 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
1957 UI != E; /* empty */) {
1958 IVUsers::const_iterator CandidateUI = UI;
1959 ++UI;
1960 Instruction *ShadowUse = CandidateUI->getUser();
1961 Type *DestTy = nullptr;
1962 bool IsSigned = false;
1963
1964 /* If shadow use is a int->float cast then insert a second IV
1965 to eliminate this cast.
1966
1967 for (unsigned i = 0; i < n; ++i)
1968 foo((double)i);
1969
1970 is transformed into
1971
1972 double d = 0.0;
1973 for (unsigned i = 0; i < n; ++i, ++d)
1974 foo(d);
1975 */
1976 if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1977 IsSigned = false;
1978 DestTy = UCast->getDestTy();
1979 }
1980 else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1981 IsSigned = true;
1982 DestTy = SCast->getDestTy();
1983 }
1984 if (!DestTy) continue;
1985
1986 // If target does not support DestTy natively then do not apply
1987 // this transformation.
1988 if (!TTI.isTypeLegal(DestTy)) continue;
1989
1990 PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1991 if (!PH) continue;
1992 if (PH->getNumIncomingValues() != 2) continue;
1993
1994 Type *SrcTy = PH->getType();
1995 int Mantissa = DestTy->getFPMantissaWidth();
1996 if (Mantissa == -1) continue;
1997 if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
1998 continue;
1999
2000 unsigned Entry, Latch;
2001 if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
2002 Entry = 0;
2003 Latch = 1;
2004 } else {
2005 Entry = 1;
2006 Latch = 0;
2007 }
2008
2009 ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
2010 if (!Init) continue;
2011 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2012 (double)Init->getSExtValue() :
2013 (double)Init->getZExtValue());
2014
2015 BinaryOperator *Incr =
2016 dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
2017 if (!Incr) continue;
2018 if (Incr->getOpcode() != Instruction::Add
2019 && Incr->getOpcode() != Instruction::Sub)
2020 continue;
2021
2022 /* Initialize new IV, double d = 0.0 in above example. */
2023 ConstantInt *C = nullptr;
2024 if (Incr->getOperand(0) == PH)
2025 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
2026 else if (Incr->getOperand(1) == PH)
2027 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
2028 else
2029 continue;
2030
2031 if (!C) continue;
2032
2033 // Ignore negative constants, as the code below doesn't handle them
2034 // correctly. TODO: Remove this restriction.
2035 if (!C->getValue().isStrictlyPositive()) continue;
2036
2037 /* Add new PHINode. */
2038 PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
2039
2040 /* create new increment. '++d' in above example. */
2041 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
2042 BinaryOperator *NewIncr =
2043 BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
2044 Instruction::FAdd : Instruction::FSub,
2045 NewPH, CFP, "IV.S.next.", Incr);
2046
2047 NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
2048 NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
2049
2050 /* Remove cast operation */
2051 ShadowUse->replaceAllUsesWith(NewPH);
2052 ShadowUse->eraseFromParent();
2053 Changed = true;
2054 break;
2055 }
2056}
2057
2058/// If Cond has an operand that is an expression of an IV, set the IV user and
2059/// stride information and return true, otherwise return false.
2060bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
2061 for (IVStrideUse &U : IU)
2062 if (U.getUser() == Cond) {
2063 // NOTE: we could handle setcc instructions with multiple uses here, but
2064 // InstCombine does it as well for simple uses, it's not clear that it
2065 // occurs enough in real life to handle.
2066 CondUse = &U;
2067 return true;
2068 }
2069 return false;
2070}
2071
2072/// Rewrite the loop's terminating condition if it uses a max computation.
2073///
2074/// This is a narrow solution to a specific, but acute, problem. For loops
2075/// like this:
2076///
2077/// i = 0;
2078/// do {
2079/// p[i] = 0.0;
2080/// } while (++i < n);
2081///
2082/// the trip count isn't just 'n', because 'n' might not be positive. And
2083/// unfortunately this can come up even for loops where the user didn't use
2084/// a C do-while loop. For example, seemingly well-behaved top-test loops
2085/// will commonly be lowered like this:
2086//
2087/// if (n > 0) {
2088/// i = 0;
2089/// do {
2090/// p[i] = 0.0;
2091/// } while (++i < n);
2092/// }
2093///
2094/// and then it's possible for subsequent optimization to obscure the if
2095/// test in such a way that indvars can't find it.
2096///
2097/// When indvars can't find the if test in loops like this, it creates a
2098/// max expression, which allows it to give the loop a canonical
2099/// induction variable:
2100///
2101/// i = 0;
2102/// max = n < 1 ? 1 : n;
2103/// do {
2104/// p[i] = 0.0;
2105/// } while (++i != max);
2106///
2107/// Canonical induction variables are necessary because the loop passes
2108/// are designed around them. The most obvious example of this is the
2109/// LoopInfo analysis, which doesn't remember trip count values. It
2110/// expects to be able to rediscover the trip count each time it is
2111/// needed, and it does this using a simple analysis that only succeeds if
2112/// the loop has a canonical induction variable.
2113///
2114/// However, when it comes time to generate code, the maximum operation
2115/// can be quite costly, especially if it's inside of an outer loop.
2116///
2117/// This function solves this problem by detecting this type of loop and
2118/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
2119/// the instructions for the maximum computation.
2120///
2121ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
2122 // Check that the loop matches the pattern we're looking for.
2123 if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
2124 Cond->getPredicate() != CmpInst::ICMP_NE)
2125 return Cond;
2126
2127 SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
2128 if (!Sel || !Sel->hasOneUse()) return Cond;
2129
2130 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2131 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2132 return Cond;
2133 const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
2134
2135 // Add one to the backedge-taken count to get the trip count.
2136 const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
2137 if (IterationCount != SE.getSCEV(Sel)) return Cond;
2138
2139 // Check for a max calculation that matches the pattern. There's no check
2140 // for ICMP_ULE here because the comparison would be with zero, which
2141 // isn't interesting.
2142 CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
2143 const SCEVNAryExpr *Max = nullptr;
2144 if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2145 Pred = ICmpInst::ICMP_SLE;
2146 Max = S;
2147 } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2148 Pred = ICmpInst::ICMP_SLT;
2149 Max = S;
2150 } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2151 Pred = ICmpInst::ICMP_ULT;
2152 Max = U;
2153 } else {
2154 // No match; bail.
2155 return Cond;
2156 }
2157
2158 // To handle a max with more than two operands, this optimization would
2159 // require additional checking and setup.
2160 if (Max->getNumOperands() != 2)
2161 return Cond;
2162
2163 const SCEV *MaxLHS = Max->getOperand(0);
2164 const SCEV *MaxRHS = Max->getOperand(1);
2165
2166 // ScalarEvolution canonicalizes constants to the left. For < and >, look
2167 // for a comparison with 1. For <= and >=, a comparison with zero.
2168 if (!MaxLHS ||
2169 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2170 return Cond;
2171
2172 // Check the relevant induction variable for conformance to
2173 // the pattern.
2174 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2175 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2176 if (!AR || !AR->isAffine() ||
2177 AR->getStart() != One ||
2178 AR->getStepRecurrence(SE) != One)
2179 return Cond;
2180
2181 assert(AR->getLoop() == L &&((AR->getLoop() == L && "Loop condition operand is an addrec in a different loop!"
) ? static_cast<void> (0) : __assert_fail ("AR->getLoop() == L && \"Loop condition operand is an addrec in a different loop!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2182, __PRETTY_FUNCTION__))
2182 "Loop condition operand is an addrec in a different loop!")((AR->getLoop() == L && "Loop condition operand is an addrec in a different loop!"
) ? static_cast<void> (0) : __assert_fail ("AR->getLoop() == L && \"Loop condition operand is an addrec in a different loop!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2182, __PRETTY_FUNCTION__))
;
2183
2184 // Check the right operand of the select, and remember it, as it will
2185 // be used in the new comparison instruction.
2186 Value *NewRHS = nullptr;
2187 if (ICmpInst::isTrueWhenEqual(Pred)) {
2188 // Look for n+1, and grab n.
2189 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2190 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2191 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2192 NewRHS = BO->getOperand(0);
2193 if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2194 if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2195 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2196 NewRHS = BO->getOperand(0);
2197 if (!NewRHS)
2198 return Cond;
2199 } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2200 NewRHS = Sel->getOperand(1);
2201 else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2202 NewRHS = Sel->getOperand(2);
2203 else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2204 NewRHS = SU->getValue();
2205 else
2206 // Max doesn't match expected pattern.
2207 return Cond;
2208
2209 // Determine the new comparison opcode. It may be signed or unsigned,
2210 // and the original comparison may be either equality or inequality.
2211 if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2212 Pred = CmpInst::getInversePredicate(Pred);
2213
2214 // Ok, everything looks ok to change the condition into an SLT or SGE and
2215 // delete the max calculation.
2216 ICmpInst *NewCond =
2217 new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
2218
2219 // Delete the max calculation instructions.
2220 Cond->replaceAllUsesWith(NewCond);
2221 CondUse->setUser(NewCond);
2222 Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2223 Cond->eraseFromParent();
2224 Sel->eraseFromParent();
2225 if (Cmp->use_empty())
2226 Cmp->eraseFromParent();
2227 return NewCond;
2228}
2229
2230/// Change loop terminating condition to use the postinc iv when possible.
2231void
2232LSRInstance::OptimizeLoopTermCond() {
2233 SmallPtrSet<Instruction *, 4> PostIncs;
2234
2235 // We need a different set of heuristics for rotated and non-rotated loops.
2236 // If a loop is rotated then the latch is also the backedge, so inserting
2237 // post-inc expressions just before the latch is ideal. To reduce live ranges
2238 // it also makes sense to rewrite terminating conditions to use post-inc
2239 // expressions.
2240 //
2241 // If the loop is not rotated then the latch is not a backedge; the latch
2242 // check is done in the loop head. Adding post-inc expressions before the
2243 // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2244 // in the loop body. In this case we do *not* want to use post-inc expressions
2245 // in the latch check, and we want to insert post-inc expressions before
2246 // the backedge.
2247 BasicBlock *LatchBlock = L->getLoopLatch();
2248 SmallVector<BasicBlock*, 8> ExitingBlocks;
2249 L->getExitingBlocks(ExitingBlocks);
2250 if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) {
2251 return LatchBlock != BB;
2252 })) {
2253 // The backedge doesn't exit the loop; treat this as a head-tested loop.
2254 IVIncInsertPos = LatchBlock->getTerminator();
2255 return;
2256 }
2257
2258 // Otherwise treat this as a rotated loop.
2259 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2260
2261 // Get the terminating condition for the loop if possible. If we
2262 // can, we want to change it to use a post-incremented version of its
2263 // induction variable, to allow coalescing the live ranges for the IV into
2264 // one register value.
2265
2266 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2267 if (!TermBr)
2268 continue;
2269 // FIXME: Overly conservative, termination condition could be an 'or' etc..
2270 if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2271 continue;
2272
2273 // Search IVUsesByStride to find Cond's IVUse if there is one.
2274 IVStrideUse *CondUse = nullptr;
2275 ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2276 if (!FindIVUserForCond(Cond, CondUse))
2277 continue;
2278
2279 // If the trip count is computed in terms of a max (due to ScalarEvolution
2280 // being unable to find a sufficient guard, for example), change the loop
2281 // comparison to use SLT or ULT instead of NE.
2282 // One consequence of doing this now is that it disrupts the count-down
2283 // optimization. That's not always a bad thing though, because in such
2284 // cases it may still be worthwhile to avoid a max.
2285 Cond = OptimizeMax(Cond, CondUse);
2286
2287 // If this exiting block dominates the latch block, it may also use
2288 // the post-inc value if it won't be shared with other uses.
2289 // Check for dominance.
2290 if (!DT.dominates(ExitingBlock, LatchBlock))
2291 continue;
2292
2293 // Conservatively avoid trying to use the post-inc value in non-latch
2294 // exits if there may be pre-inc users in intervening blocks.
2295 if (LatchBlock != ExitingBlock)
2296 for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2297 // Test if the use is reachable from the exiting block. This dominator
2298 // query is a conservative approximation of reachability.
2299 if (&*UI != CondUse &&
2300 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2301 // Conservatively assume there may be reuse if the quotient of their
2302 // strides could be a legal scale.
2303 const SCEV *A = IU.getStride(*CondUse, L);
2304 const SCEV *B = IU.getStride(*UI, L);
2305 if (!A || !B) continue;
2306 if (SE.getTypeSizeInBits(A->getType()) !=
2307 SE.getTypeSizeInBits(B->getType())) {
2308 if (SE.getTypeSizeInBits(A->getType()) >
2309 SE.getTypeSizeInBits(B->getType()))
2310 B = SE.getSignExtendExpr(B, A->getType());
2311 else
2312 A = SE.getSignExtendExpr(A, B->getType());
2313 }
2314 if (const SCEVConstant *D =
2315 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
2316 const ConstantInt *C = D->getValue();
2317 // Stride of one or negative one can have reuse with non-addresses.
2318 if (C->isOne() || C->isAllOnesValue())
2319 goto decline_post_inc;
2320 // Avoid weird situations.
2321 if (C->getValue().getMinSignedBits() >= 64 ||
2322 C->getValue().isMinSignedValue())
2323 goto decline_post_inc;
2324 // Check for possible scaled-address reuse.
2325 MemAccessTy AccessTy = getAccessType(UI->getUser());
2326 int64_t Scale = C->getSExtValue();
2327 if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2328 /*BaseOffset=*/0,
2329 /*HasBaseReg=*/false, Scale,
2330 AccessTy.AddrSpace))
2331 goto decline_post_inc;
2332 Scale = -Scale;
2333 if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2334 /*BaseOffset=*/0,
2335 /*HasBaseReg=*/false, Scale,
2336 AccessTy.AddrSpace))
2337 goto decline_post_inc;
2338 }
2339 }
2340
2341 DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n'; } } while (false)
2342 << *Cond << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n'; } } while (false)
;
2343
2344 // It's possible for the setcc instruction to be anywhere in the loop, and
2345 // possible for it to have multiple users. If it is not immediately before
2346 // the exiting block branch, move it.
2347 if (&*++BasicBlock::iterator(Cond) != TermBr) {
2348 if (Cond->hasOneUse()) {
2349 Cond->moveBefore(TermBr);
2350 } else {
2351 // Clone the terminating condition and insert into the loopend.
2352 ICmpInst *OldCond = Cond;
2353 Cond = cast<ICmpInst>(Cond->clone());
2354 Cond->setName(L->getHeader()->getName() + ".termcond");
2355 ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
2356
2357 // Clone the IVUse, as the old use still exists!
2358 CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2359 TermBr->replaceUsesOfWith(OldCond, Cond);
2360 }
2361 }
2362
2363 // If we get to here, we know that we can transform the setcc instruction to
2364 // use the post-incremented version of the IV, allowing us to coalesce the
2365 // live ranges for the IV correctly.
2366 CondUse->transformToPostInc(L);
2367 Changed = true;
2368
2369 PostIncs.insert(Cond);
2370 decline_post_inc:;
2371 }
2372
2373 // Determine an insertion point for the loop induction variable increment. It
2374 // must dominate all the post-inc comparisons we just set up, and it must
2375 // dominate the loop latch edge.
2376 IVIncInsertPos = L->getLoopLatch()->getTerminator();
2377 for (Instruction *Inst : PostIncs) {
2378 BasicBlock *BB =
2379 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
2380 Inst->getParent());
2381 if (BB == Inst->getParent())
2382 IVIncInsertPos = Inst;
2383 else if (BB != IVIncInsertPos->getParent())
2384 IVIncInsertPos = BB->getTerminator();
2385 }
2386}
2387
2388/// Determine if the given use can accommodate a fixup at the given offset and
2389/// other details. If so, update the use and return true.
2390bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2391 bool HasBaseReg, LSRUse::KindType Kind,
2392 MemAccessTy AccessTy) {
2393 int64_t NewMinOffset = LU.MinOffset;
2394 int64_t NewMaxOffset = LU.MaxOffset;
2395 MemAccessTy NewAccessTy = AccessTy;
2396
2397 // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2398 // something conservative, however this can pessimize in the case that one of
2399 // the uses will have all its uses outside the loop, for example.
2400 if (LU.Kind != Kind)
2401 return false;
2402
2403 // Check for a mismatched access type, and fall back conservatively as needed.
2404 // TODO: Be less conservative when the type is similar and can use the same
2405 // addressing modes.
2406 if (Kind == LSRUse::Address) {
2407 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2408 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2409 AccessTy.AddrSpace);
2410 }
2411 }
2412
2413 // Conservatively assume HasBaseReg is true for now.
2414 if (NewOffset < LU.MinOffset) {
2415 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2416 LU.MaxOffset - NewOffset, HasBaseReg))
2417 return false;
2418 NewMinOffset = NewOffset;
2419 } else if (NewOffset > LU.MaxOffset) {
2420 if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2421 NewOffset - LU.MinOffset, HasBaseReg))
2422 return false;
2423 NewMaxOffset = NewOffset;
2424 }
2425
2426 // Update the use.
2427 LU.MinOffset = NewMinOffset;
2428 LU.MaxOffset = NewMaxOffset;
2429 LU.AccessTy = NewAccessTy;
2430 return true;
2431}
2432
2433/// Return an LSRUse index and an offset value for a fixup which needs the given
2434/// expression, with the given kind and optional access type. Either reuse an
2435/// existing use or create a new one, as needed.
2436std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
2437 LSRUse::KindType Kind,
2438 MemAccessTy AccessTy) {
2439 const SCEV *Copy = Expr;
2440 int64_t Offset = ExtractImmediate(Expr, SE);
2441
2442 // Basic uses can't accept any offset, for example.
2443 if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2444 Offset, /*HasBaseReg=*/ true)) {
2445 Expr = Copy;
2446 Offset = 0;
2447 }
2448
2449 std::pair<UseMapTy::iterator, bool> P =
2450 UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2451 if (!P.second) {
2452 // A use already existed with this base.
2453 size_t LUIdx = P.first->second;
2454 LSRUse &LU = Uses[LUIdx];
2455 if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
2456 // Reuse this use.
2457 return std::make_pair(LUIdx, Offset);
2458 }
2459
2460 // Create a new use.
2461 size_t LUIdx = Uses.size();
2462 P.first->second = LUIdx;
2463 Uses.push_back(LSRUse(Kind, AccessTy));
2464 LSRUse &LU = Uses[LUIdx];
2465
2466 LU.MinOffset = Offset;
2467 LU.MaxOffset = Offset;
2468 return std::make_pair(LUIdx, Offset);
2469}
2470
2471/// Delete the given use from the Uses list.
2472void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2473 if (&LU != &Uses.back())
2474 std::swap(LU, Uses.back());
2475 Uses.pop_back();
2476
2477 // Update RegUses.
2478 RegUses.swapAndDropUse(LUIdx, Uses.size());
2479}
2480
2481/// Look for a use distinct from OrigLU which is has a formula that has the same
2482/// registers as the given formula.
2483LSRUse *
2484LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2485 const LSRUse &OrigLU) {
2486 // Search all uses for the formula. This could be more clever.
2487 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2488 LSRUse &LU = Uses[LUIdx];
2489 // Check whether this use is close enough to OrigLU, to see whether it's
2490 // worthwhile looking through its formulae.
2491 // Ignore ICmpZero uses because they may contain formulae generated by
2492 // GenerateICmpZeroScales, in which case adding fixup offsets may
2493 // be invalid.
2494 if (&LU != &OrigLU &&
2495 LU.Kind != LSRUse::ICmpZero &&
2496 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2497 LU.WidestFixupType == OrigLU.WidestFixupType &&
2498 LU.HasFormulaWithSameRegs(OrigF)) {
2499 // Scan through this use's formulae.
2500 for (const Formula &F : LU.Formulae) {
2501 // Check to see if this formula has the same registers and symbols
2502 // as OrigF.
2503 if (F.BaseRegs == OrigF.BaseRegs &&
2504 F.ScaledReg == OrigF.ScaledReg &&
2505 F.BaseGV == OrigF.BaseGV &&
2506 F.Scale == OrigF.Scale &&
2507 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2508 if (F.BaseOffset == 0)
2509 return &LU;
2510 // This is the formula where all the registers and symbols matched;
2511 // there aren't going to be any others. Since we declined it, we
2512 // can skip the rest of the formulae and proceed to the next LSRUse.
2513 break;
2514 }
2515 }
2516 }
2517 }
2518
2519 // Nothing looked good.
2520 return nullptr;
2521}
2522
2523void LSRInstance::CollectInterestingTypesAndFactors() {
2524 SmallSetVector<const SCEV *, 4> Strides;
2525
2526 // Collect interesting types and strides.
2527 SmallVector<const SCEV *, 4> Worklist;
2528 for (const IVStrideUse &U : IU) {
2529 const SCEV *Expr = IU.getExpr(U);
2530
2531 // Collect interesting types.
2532 Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2533
2534 // Add strides for mentioned loops.
2535 Worklist.push_back(Expr);
2536 do {
2537 const SCEV *S = Worklist.pop_back_val();
2538 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2539 if (AR->getLoop() == L)
2540 Strides.insert(AR->getStepRecurrence(SE));
2541 Worklist.push_back(AR->getStart());
2542 } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2543 Worklist.append(Add->op_begin(), Add->op_end());
2544 }
2545 } while (!Worklist.empty());
2546 }
2547
2548 // Compute interesting factors from the set of interesting strides.
2549 for (SmallSetVector<const SCEV *, 4>::const_iterator
2550 I = Strides.begin(), E = Strides.end(); I != E; ++I)
2551 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2552 std::next(I); NewStrideIter != E; ++NewStrideIter) {
2553 const SCEV *OldStride = *I;
2554 const SCEV *NewStride = *NewStrideIter;
2555
2556 if (SE.getTypeSizeInBits(OldStride->getType()) !=
2557 SE.getTypeSizeInBits(NewStride->getType())) {
2558 if (SE.getTypeSizeInBits(OldStride->getType()) >
2559 SE.getTypeSizeInBits(NewStride->getType()))
2560 NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2561 else
2562 OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2563 }
2564 if (const SCEVConstant *Factor =
2565 dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2566 SE, true))) {
2567 if (Factor->getAPInt().getMinSignedBits() <= 64)
2568 Factors.insert(Factor->getAPInt().getSExtValue());
2569 } else if (const SCEVConstant *Factor =
2570 dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2571 NewStride,
2572 SE, true))) {
2573 if (Factor->getAPInt().getMinSignedBits() <= 64)
2574 Factors.insert(Factor->getAPInt().getSExtValue());
2575 }
2576 }
2577
2578 // If all uses use the same type, don't bother looking for truncation-based
2579 // reuse.
2580 if (Types.size() == 1)
2581 Types.clear();
2582
2583 DEBUG(print_factors_and_types(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { print_factors_and_types(dbgs()); } } while
(false)
;
2584}
2585
2586/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
2587/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
2588/// IVStrideUses, we could partially skip this.
2589static User::op_iterator
2590findIVOperand(User::op_iterator OI, User::op_iterator OE,
2591 Loop *L, ScalarEvolution &SE) {
2592 for(; OI != OE; ++OI) {
2593 if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2594 if (!SE.isSCEVable(Oper->getType()))
2595 continue;
2596
2597 if (const SCEVAddRecExpr *AR =
2598 dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2599 if (AR->getLoop() == L)
2600 break;
2601 }
2602 }
2603 }
2604 return OI;
2605}
2606
2607/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
2608/// a convenient helper.
2609static Value *getWideOperand(Value *Oper) {
2610 if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2611 return Trunc->getOperand(0);
2612 return Oper;
2613}
2614
2615/// Return true if we allow an IV chain to include both types.
2616static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2617 Type *LType = LVal->getType();
2618 Type *RType = RVal->getType();
2619 return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
2620 // Different address spaces means (possibly)
2621 // different types of the pointer implementation,
2622 // e.g. i16 vs i32 so disallow that.
2623 (LType->getPointerAddressSpace() ==
2624 RType->getPointerAddressSpace()));
2625}
2626
2627/// Return an approximation of this SCEV expression's "base", or NULL for any
2628/// constant. Returning the expression itself is conservative. Returning a
2629/// deeper subexpression is more precise and valid as long as it isn't less
2630/// complex than another subexpression. For expressions involving multiple
2631/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
2632/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
2633/// IVInc==b-a.
2634///
2635/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2636/// SCEVUnknown, we simply return the rightmost SCEV operand.
2637static const SCEV *getExprBase(const SCEV *S) {
2638 switch (S->getSCEVType()) {
2639 default: // uncluding scUnknown.
2640 return S;
2641 case scConstant:
2642 return nullptr;
2643 case scTruncate:
2644 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2645 case scZeroExtend:
2646 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2647 case scSignExtend:
2648 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2649 case scAddExpr: {
2650 // Skip over scaled operands (scMulExpr) to follow add operands as long as
2651 // there's nothing more complex.
2652 // FIXME: not sure if we want to recognize negation.
2653 const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2654 for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2655 E(Add->op_begin()); I != E; ++I) {
2656 const SCEV *SubExpr = *I;
2657 if (SubExpr->getSCEVType() == scAddExpr)
2658 return getExprBase(SubExpr);
2659
2660 if (SubExpr->getSCEVType() != scMulExpr)
2661 return SubExpr;
2662 }
2663 return S; // all operands are scaled, be conservative.
2664 }
2665 case scAddRecExpr:
2666 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2667 }
2668}
2669
2670/// Return true if the chain increment is profitable to expand into a loop
2671/// invariant value, which may require its own register. A profitable chain
2672/// increment will be an offset relative to the same base. We allow such offsets
2673/// to potentially be used as chain increment as long as it's not obviously
2674/// expensive to expand using real instructions.
2675bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
2676 const SCEV *IncExpr,
2677 ScalarEvolution &SE) {
2678 // Aggressively form chains when -stress-ivchain.
2679 if (StressIVChain)
2680 return true;
2681
2682 // Do not replace a constant offset from IV head with a nonconstant IV
2683 // increment.
2684 if (!isa<SCEVConstant>(IncExpr)) {
2685 const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
2686 if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2687 return false;
2688 }
2689
2690 SmallPtrSet<const SCEV*, 8> Processed;
2691 return !isHighCostExpansion(IncExpr, Processed, SE);
2692}
2693
2694/// Return true if the number of registers needed for the chain is estimated to
2695/// be less than the number required for the individual IV users. First prohibit
2696/// any IV users that keep the IV live across increments (the Users set should
2697/// be empty). Next count the number and type of increments in the chain.
2698///
2699/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2700/// effectively use postinc addressing modes. Only consider it profitable it the
2701/// increments can be computed in fewer registers when chained.
2702///
2703/// TODO: Consider IVInc free if it's already used in another chains.
2704static bool
2705isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
2706 ScalarEvolution &SE, const TargetTransformInfo &TTI) {
2707 if (StressIVChain)
2708 return true;
2709
2710 if (!Chain.hasIncs())
2711 return false;
2712
2713 if (!Users.empty()) {
2714 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
2715 for (Instruction *Inst : Users) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
2716 dbgs() << " " << *Inst << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
2717 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " users:\n"; for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"
; }; } } while (false)
;
2718 return false;
2719 }
2720 assert(!Chain.Incs.empty() && "empty IV chains are not allowed")((!Chain.Incs.empty() && "empty IV chains are not allowed"
) ? static_cast<void> (0) : __assert_fail ("!Chain.Incs.empty() && \"empty IV chains are not allowed\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2720, __PRETTY_FUNCTION__))
;
2721
2722 // The chain itself may require a register, so intialize cost to 1.
2723 int cost = 1;
2724
2725 // A complete chain likely eliminates the need for keeping the original IV in
2726 // a register. LSR does not currently know how to form a complete chain unless
2727 // the header phi already exists.
2728 if (isa<PHINode>(Chain.tailUserInst())
2729 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2730 --cost;
2731 }
2732 const SCEV *LastIncExpr = nullptr;
2733 unsigned NumConstIncrements = 0;
2734 unsigned NumVarIncrements = 0;
2735 unsigned NumReusedIncrements = 0;
2736 for (const IVInc &Inc : Chain) {
2737 if (Inc.IncExpr->isZero())
2738 continue;
2739
2740 // Incrementing by zero or some constant is neutral. We assume constants can
2741 // be folded into an addressing mode or an add's immediate operand.
2742 if (isa<SCEVConstant>(Inc.IncExpr)) {
2743 ++NumConstIncrements;
2744 continue;
2745 }
2746
2747 if (Inc.IncExpr == LastIncExpr)
2748 ++NumReusedIncrements;
2749 else
2750 ++NumVarIncrements;
2751
2752 LastIncExpr = Inc.IncExpr;
2753 }
2754 // An IV chain with a single increment is handled by LSR's postinc
2755 // uses. However, a chain with multiple increments requires keeping the IV's
2756 // value live longer than it needs to be if chained.
2757 if (NumConstIncrements > 1)
2758 --cost;
2759
2760 // Materializing increment expressions in the preheader that didn't exist in
2761 // the original code may cost a register. For example, sign-extended array
2762 // indices can produce ridiculous increments like this:
2763 // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2764 cost += NumVarIncrements;
2765
2766 // Reusing variable increments likely saves a register to hold the multiple of
2767 // the stride.
2768 cost -= NumReusedIncrements;
2769
2770 DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " Cost: " << cost << "\n"
; } } while (false)
2771 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Chain: " << *Chain.
Incs[0].UserInst << " Cost: " << cost << "\n"
; } } while (false)
;
2772
2773 return cost < 0;
2774}
2775
2776/// Add this IV user to an existing chain or make it the head of a new chain.
2777void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2778 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2779 // When IVs are used as types of varying widths, they are generally converted
2780 // to a wider type with some uses remaining narrow under a (free) trunc.
2781 Value *const NextIV = getWideOperand(IVOper);
2782 const SCEV *const OperExpr = SE.getSCEV(NextIV);
2783 const SCEV *const OperExprBase = getExprBase(OperExpr);
2784
2785 // Visit all existing chains. Check if its IVOper can be computed as a
2786 // profitable loop invariant increment from the last link in the Chain.
2787 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2788 const SCEV *LastIncExpr = nullptr;
2789 for (; ChainIdx < NChains; ++ChainIdx) {
2790 IVChain &Chain = IVChainVec[ChainIdx];
2791
2792 // Prune the solution space aggressively by checking that both IV operands
2793 // are expressions that operate on the same unscaled SCEVUnknown. This
2794 // "base" will be canceled by the subsequent getMinusSCEV call. Checking
2795 // first avoids creating extra SCEV expressions.
2796 if (!StressIVChain && Chain.ExprBase != OperExprBase)
2797 continue;
2798
2799 Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
2800 if (!isCompatibleIVType(PrevIV, NextIV))
2801 continue;
2802
2803 // A phi node terminates a chain.
2804 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2805 continue;
2806
2807 // The increment must be loop-invariant so it can be kept in a register.
2808 const SCEV *PrevExpr = SE.getSCEV(PrevIV);
2809 const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
2810 if (!SE.isLoopInvariant(IncExpr, L))
2811 continue;
2812
2813 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2814 LastIncExpr = IncExpr;
2815 break;
2816 }
2817 }
2818 // If we haven't found a chain, create a new one, unless we hit the max. Don't
2819 // bother for phi nodes, because they must be last in the chain.
2820 if (ChainIdx == NChains) {
2821 if (isa<PHINode>(UserInst))
2822 return;
2823 if (NChains >= MaxChains && !StressIVChain) {
2824 DEBUG(dbgs() << "IV Chain Limit\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain Limit\n"; } } while
(false)
;
2825 return;
2826 }
2827 LastIncExpr = OperExpr;
2828 // IVUsers may have skipped over sign/zero extensions. We don't currently
2829 // attempt to form chains involving extensions unless they can be hoisted
2830 // into this loop's AddRec.
2831 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2832 return;
2833 ++NChains;
2834 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2835 OperExprBase));
2836 ChainUsersVec.resize(NChains);
2837 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Head: (" << *UserInst << ") IV=" <<
*LastIncExpr << "\n"; } } while (false)
2838 << ") IV=" << *LastIncExpr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Head: (" << *UserInst << ") IV=" <<
*LastIncExpr << "\n"; } } while (false)
;
2839 } else {
2840 DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Inc: (" << *UserInst << ") IV+" <<
*LastIncExpr << "\n"; } } while (false)
2841 << ") IV+" << *LastIncExpr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "IV Chain#" << ChainIdx
<< " Inc: (" << *UserInst << ") IV+" <<
*LastIncExpr << "\n"; } } while (false)
;
2842 // Add this IV user to the end of the chain.
2843 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2844 }
2845 IVChain &Chain = IVChainVec[ChainIdx];
2846
2847 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2848 // This chain's NearUsers become FarUsers.
2849 if (!LastIncExpr->isZero()) {
2850 ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2851 NearUsers.end());
2852 NearUsers.clear();
2853 }
2854
2855 // All other uses of IVOperand become near uses of the chain.
2856 // We currently ignore intermediate values within SCEV expressions, assuming
2857 // they will eventually be used be the current chain, or can be computed
2858 // from one of the chain increments. To be more precise we could
2859 // transitively follow its user and only add leaf IV users to the set.
2860 for (User *U : IVOper->users()) {
2861 Instruction *OtherUse = dyn_cast<Instruction>(U);
2862 if (!OtherUse)
2863 continue;
2864 // Uses in the chain will no longer be uses if the chain is formed.
2865 // Include the head of the chain in this iteration (not Chain.begin()).
2866 IVChain::const_iterator IncIter = Chain.Incs.begin();
2867 IVChain::const_iterator IncEnd = Chain.Incs.end();
2868 for( ; IncIter != IncEnd; ++IncIter) {
2869 if (IncIter->UserInst == OtherUse)
2870 break;
2871 }
2872 if (IncIter != IncEnd)
2873 continue;
2874
2875 if (SE.isSCEVable(OtherUse->getType())
2876 && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2877 && IU.isIVUserOrOperand(OtherUse)) {
2878 continue;
2879 }
2880 NearUsers.insert(OtherUse);
2881 }
2882
2883 // Since this user is part of the chain, it's no longer considered a use
2884 // of the chain.
2885 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2886}
2887
2888/// Populate the vector of Chains.
2889///
2890/// This decreases ILP at the architecture level. Targets with ample registers,
2891/// multiple memory ports, and no register renaming probably don't want
2892/// this. However, such targets should probably disable LSR altogether.
2893///
2894/// The job of LSR is to make a reasonable choice of induction variables across
2895/// the loop. Subsequent passes can easily "unchain" computation exposing more
2896/// ILP *within the loop* if the target wants it.
2897///
2898/// Finding the best IV chain is potentially a scheduling problem. Since LSR
2899/// will not reorder memory operations, it will recognize this as a chain, but
2900/// will generate redundant IV increments. Ideally this would be corrected later
2901/// by a smart scheduler:
2902/// = A[i]
2903/// = A[i+x]
2904/// A[i] =
2905/// A[i+x] =
2906///
2907/// TODO: Walk the entire domtree within this loop, not just the path to the
2908/// loop latch. This will discover chains on side paths, but requires
2909/// maintaining multiple copies of the Chains state.
2910void LSRInstance::CollectChains() {
2911 DEBUG(dbgs() << "Collecting IV Chains.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Collecting IV Chains.\n";
} } while (false)
;
2912 SmallVector<ChainUsers, 8> ChainUsersVec;
2913
2914 SmallVector<BasicBlock *,8> LatchPath;
2915 BasicBlock *LoopHeader = L->getHeader();
2916 for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
2917 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2918 LatchPath.push_back(Rung->getBlock());
2919 }
2920 LatchPath.push_back(LoopHeader);
2921
2922 // Walk the instruction stream from the loop header to the loop latch.
2923 for (BasicBlock *BB : reverse(LatchPath)) {
2924 for (Instruction &I : *BB) {
2925 // Skip instructions that weren't seen by IVUsers analysis.
2926 if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
2927 continue;
2928
2929 // Ignore users that are part of a SCEV expression. This way we only
2930 // consider leaf IV Users. This effectively rediscovers a portion of
2931 // IVUsers analysis but in program order this time.
2932 if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
2933 continue;
2934
2935 // Remove this instruction from any NearUsers set it may be in.
2936 for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
2937 ChainIdx < NChains; ++ChainIdx) {
2938 ChainUsersVec[ChainIdx].NearUsers.erase(&I);
2939 }
2940 // Search for operands that can be chained.
2941 SmallPtrSet<Instruction*, 4> UniqueOperands;
2942 User::op_iterator IVOpEnd = I.op_end();
2943 User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
2944 while (IVOpIter != IVOpEnd) {
2945 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2946 if (UniqueOperands.insert(IVOpInst).second)
2947 ChainInstruction(&I, IVOpInst, ChainUsersVec);
2948 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2949 }
2950 } // Continue walking down the instructions.
2951 } // Continue walking down the domtree.
2952 // Visit phi backedges to determine if the chain can generate the IV postinc.
2953 for (BasicBlock::iterator I = L->getHeader()->begin();
2954 PHINode *PN = dyn_cast<PHINode>(I); ++I) {
2955 if (!SE.isSCEVable(PN->getType()))
2956 continue;
2957
2958 Instruction *IncV =
2959 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
2960 if (IncV)
2961 ChainInstruction(PN, IncV, ChainUsersVec);
2962 }
2963 // Remove any unprofitable chains.
2964 unsigned ChainIdx = 0;
2965 for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
2966 UsersIdx < NChains; ++UsersIdx) {
2967 if (!isProfitableChain(IVChainVec[UsersIdx],
2968 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
2969 continue;
2970 // Preserve the chain at UsesIdx.
2971 if (ChainIdx != UsersIdx)
2972 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2973 FinalizeChain(IVChainVec[ChainIdx]);
2974 ++ChainIdx;
2975 }
2976 IVChainVec.resize(ChainIdx);
2977}
2978
2979void LSRInstance::FinalizeChain(IVChain &Chain) {
2980 assert(!Chain.Incs.empty() && "empty IV chains are not allowed")((!Chain.Incs.empty() && "empty IV chains are not allowed"
) ? static_cast<void> (0) : __assert_fail ("!Chain.Incs.empty() && \"empty IV chains are not allowed\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2980, __PRETTY_FUNCTION__))
;
2981 DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Final Chain: " << *
Chain.Incs[0].UserInst << "\n"; } } while (false)
;
2982
2983 for (const IVInc &Inc : Chain) {
2984 DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Inc: " << *
Inc.UserInst << "\n"; } } while (false)
;
2985 auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
2986 assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand")((UseI != Inc.UserInst->op_end() && "cannot find IV operand"
) ? static_cast<void> (0) : __assert_fail ("UseI != Inc.UserInst->op_end() && \"cannot find IV operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 2986, __PRETTY_FUNCTION__))
;
2987 IVIncSet.insert(UseI);
2988 }
2989}
2990
2991/// Return true if the IVInc can be folded into an addressing mode.
2992static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
2993 Value *Operand, const TargetTransformInfo &TTI) {
2994 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
2995 if (!IncConst || !isAddressUse(UserInst, Operand))
2996 return false;
2997
2998 if (IncConst->getAPInt().getMinSignedBits() > 64)
2999 return false;
3000
3001 MemAccessTy AccessTy = getAccessType(UserInst);
3002 int64_t IncOffset = IncConst->getValue()->getSExtValue();
3003 if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
3004 IncOffset, /*HaseBaseReg=*/false))
3005 return false;
3006
3007 return true;
3008}
3009
3010/// Generate an add or subtract for each IVInc in a chain to materialize the IV
3011/// user's operand from the previous IV user's operand.
3012void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
3013 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3014 // Find the new IVOperand for the head of the chain. It may have been replaced
3015 // by LSR.
3016 const IVInc &Head = Chain.Incs[0];
3017 User::op_iterator IVOpEnd = Head.UserInst->op_end();
3018 // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
3019 User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
3020 IVOpEnd, L, SE);
3021 Value *IVSrc = nullptr;
3022 while (IVOpIter != IVOpEnd) {
3023 IVSrc = getWideOperand(*IVOpIter);
3024
3025 // If this operand computes the expression that the chain needs, we may use
3026 // it. (Check this after setting IVSrc which is used below.)
3027 //
3028 // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
3029 // narrow for the chain, so we can no longer use it. We do allow using a
3030 // wider phi, assuming the LSR checked for free truncation. In that case we
3031 // should already have a truncate on this operand such that
3032 // getSCEV(IVSrc) == IncExpr.
3033 if (SE.getSCEV(*IVOpIter) == Head.IncExpr
3034 || SE.getSCEV(IVSrc) == Head.IncExpr) {
3035 break;
3036 }
3037 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3038 }
3039 if (IVOpIter == IVOpEnd) {
3040 // Gracefully give up on this chain.
3041 DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Concealed chain head: " <<
*Head.UserInst << "\n"; } } while (false)
;
3042 return;
3043 }
3044
3045 DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generate chain at: " <<
*IVSrc << "\n"; } } while (false)
;
3046 Type *IVTy = IVSrc->getType();
3047 Type *IntTy = SE.getEffectiveSCEVType(IVTy);
3048 const SCEV *LeftOverExpr = nullptr;
3049 for (const IVInc &Inc : Chain) {
3050 Instruction *InsertPt = Inc.UserInst;
3051 if (isa<PHINode>(InsertPt))
3052 InsertPt = L->getLoopLatch()->getTerminator();
3053
3054 // IVOper will replace the current IV User's operand. IVSrc is the IV
3055 // value currently held in a register.
3056 Value *IVOper = IVSrc;
3057 if (!Inc.IncExpr->isZero()) {
3058 // IncExpr was the result of subtraction of two narrow values, so must
3059 // be signed.
3060 const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
3061 LeftOverExpr = LeftOverExpr ?
3062 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3063 }
3064 if (LeftOverExpr && !LeftOverExpr->isZero()) {
3065 // Expand the IV increment.
3066 Rewriter.clearPostInc();
3067 Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3068 const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
3069 SE.getUnknown(IncV));
3070 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3071
3072 // If an IV increment can't be folded, use it as the next IV value.
3073 if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
3074 assert(IVTy == IVOper->getType() && "inconsistent IV increment type")((IVTy == IVOper->getType() && "inconsistent IV increment type"
) ? static_cast<void> (0) : __assert_fail ("IVTy == IVOper->getType() && \"inconsistent IV increment type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3074, __PRETTY_FUNCTION__))
;
3075 IVSrc = IVOper;
3076 LeftOverExpr = nullptr;
3077 }
3078 }
3079 Type *OperTy = Inc.IVOperand->getType();
3080 if (IVTy != OperTy) {
3081 assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&((SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy
) && "cannot extend a chained IV") ? static_cast<void
> (0) : __assert_fail ("SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && \"cannot extend a chained IV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3082, __PRETTY_FUNCTION__))
3082 "cannot extend a chained IV")((SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy
) && "cannot extend a chained IV") ? static_cast<void
> (0) : __assert_fail ("SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && \"cannot extend a chained IV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3082, __PRETTY_FUNCTION__))
;
3083 IRBuilder<> Builder(InsertPt);
3084 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
3085 }
3086 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3087 DeadInsts.emplace_back(Inc.IVOperand);
3088 }
3089 // If LSR created a new, wider phi, we may also replace its postinc. We only
3090 // do this if we also found a wide value for the head of the chain.
3091 if (isa<PHINode>(Chain.tailUserInst())) {
3092 for (BasicBlock::iterator I = L->getHeader()->begin();
3093 PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
3094 if (!isCompatibleIVType(Phi, IVSrc))
3095 continue;
3096 Instruction *PostIncV = dyn_cast<Instruction>(
3097 Phi->getIncomingValueForBlock(L->getLoopLatch()));
3098 if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
3099 continue;
3100 Value *IVOper = IVSrc;
3101 Type *PostIncTy = PostIncV->getType();
3102 if (IVTy != PostIncTy) {
3103 assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types")((PostIncTy->isPointerTy() && "mixing int/ptr IV types"
) ? static_cast<void> (0) : __assert_fail ("PostIncTy->isPointerTy() && \"mixing int/ptr IV types\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3103, __PRETTY_FUNCTION__))
;
3104 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3105 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3106 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
3107 }
3108 Phi->replaceUsesOfWith(PostIncV, IVOper);
3109 DeadInsts.emplace_back(PostIncV);
3110 }
3111 }
3112}
3113
3114void LSRInstance::CollectFixupsAndInitialFormulae() {
3115 for (const IVStrideUse &U : IU) {
3116 Instruction *UserInst = U.getUser();
3117 // Skip IV users that are part of profitable IV Chains.
3118 User::op_iterator UseI =
3119 find(UserInst->operands(), U.getOperandValToReplace());
3120 assert(UseI != UserInst->op_end() && "cannot find IV operand")((UseI != UserInst->op_end() && "cannot find IV operand"
) ? static_cast<void> (0) : __assert_fail ("UseI != UserInst->op_end() && \"cannot find IV operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3120, __PRETTY_FUNCTION__))
;
3121 if (IVIncSet.count(UseI)) {
3122 DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Use is in profitable chain: "
<< **UseI << '\n'; } } while (false)
;
3123 continue;
3124 }
3125
3126 LSRUse::KindType Kind = LSRUse::Basic;
3127 MemAccessTy AccessTy;
3128 if (isAddressUse(UserInst, U.getOperandValToReplace())) {
3129 Kind = LSRUse::Address;
3130 AccessTy = getAccessType(UserInst);
3131 }
3132
3133 const SCEV *S = IU.getExpr(U);
3134 PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
3135
3136 // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
3137 // (N - i == 0), and this allows (N - i) to be the expression that we work
3138 // with rather than just N or i, so we can consider the register
3139 // requirements for both N and i at the same time. Limiting this code to
3140 // equality icmps is not a problem because all interesting loops use
3141 // equality icmps, thanks to IndVarSimplify.
3142 if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
3143 if (CI->isEquality()) {
3144 // Swap the operands if needed to put the OperandValToReplace on the
3145 // left, for consistency.
3146 Value *NV = CI->getOperand(1);
3147 if (NV == U.getOperandValToReplace()) {
3148 CI->setOperand(1, CI->getOperand(0));
3149 CI->setOperand(0, NV);
3150 NV = CI->getOperand(1);
3151 Changed = true;
3152 }
3153
3154 // x == y --> x - y == 0
3155 const SCEV *N = SE.getSCEV(NV);
3156 if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
3157 // S is normalized, so normalize N before folding it into S
3158 // to keep the result normalized.
3159 N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
3160 Kind = LSRUse::ICmpZero;
3161 S = SE.getMinusSCEV(N, S);
3162 }
3163
3164 // -1 and the negations of all interesting strides (except the negation
3165 // of -1) are now also interesting.
3166 for (size_t i = 0, e = Factors.size(); i != e; ++i)
3167 if (Factors[i] != -1)
3168 Factors.insert(-(uint64_t)Factors[i]);
3169 Factors.insert(-1);
3170 }
3171
3172 // Get or create an LSRUse.
3173 std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
3174 size_t LUIdx = P.first;
3175 int64_t Offset = P.second;
3176 LSRUse &LU = Uses[LUIdx];
3177
3178 // Record the fixup.
3179 LSRFixup &LF = LU.getNewFixup();
3180 LF.UserInst = UserInst;
3181 LF.OperandValToReplace = U.getOperandValToReplace();
3182 LF.PostIncLoops = TmpPostIncLoops;
3183 LF.Offset = Offset;
3184 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3185
3186 if (!LU.WidestFixupType ||
3187 SE.getTypeSizeInBits(LU.WidestFixupType) <
3188 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3189 LU.WidestFixupType = LF.OperandValToReplace->getType();
3190
3191 // If this is the first use of this LSRUse, give it a formula.
3192 if (LU.Formulae.empty()) {
3193 InsertInitialFormula(S, LU, LUIdx);
3194 CountRegisters(LU.Formulae.back(), LUIdx);
3195 }
3196 }
3197
3198 DEBUG(print_fixups(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { print_fixups(dbgs()); } } while (false)
;
3199}
3200
3201/// Insert a formula for the given expression into the given use, separating out
3202/// loop-variant portions from loop-invariant and loop-computable portions.
3203void
3204LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
3205 // Mark uses whose expressions cannot be expanded.
3206 if (!isSafeToExpand(S, SE))
3207 LU.RigidFormula = true;
3208
3209 Formula F;
3210 F.initialMatch(S, L, SE);
3211 bool Inserted = InsertFormula(LU, LUIdx, F);
3212 assert(Inserted && "Initial formula already exists!")((Inserted && "Initial formula already exists!") ? static_cast
<void> (0) : __assert_fail ("Inserted && \"Initial formula already exists!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3212, __PRETTY_FUNCTION__))
; (void)Inserted;
3213}
3214
3215/// Insert a simple single-register formula for the given expression into the
3216/// given use.
3217void
3218LSRInstance::InsertSupplementalFormula(const SCEV *S,
3219 LSRUse &LU, size_t LUIdx) {
3220 Formula F;
3221 F.BaseRegs.push_back(S);
3222 F.HasBaseReg = true;
3223 bool Inserted = InsertFormula(LU, LUIdx, F);
3224 assert(Inserted && "Supplemental formula already exists!")((Inserted && "Supplemental formula already exists!")
? static_cast<void> (0) : __assert_fail ("Inserted && \"Supplemental formula already exists!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3224, __PRETTY_FUNCTION__))
; (void)Inserted;
3225}
3226
3227/// Note which registers are used by the given formula, updating RegUses.
3228void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3229 if (F.ScaledReg)
3230 RegUses.countRegister(F.ScaledReg, LUIdx);
3231 for (const SCEV *BaseReg : F.BaseRegs)
3232 RegUses.countRegister(BaseReg, LUIdx);
3233}
3234
3235/// If the given formula has not yet been inserted, add it to the list, and
3236/// return true. Return false otherwise.
3237bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3238 // Do not insert formula that we will not be able to expand.
3239 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Formula is illegal") ? static_cast<void>
(0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Formula is illegal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3240, __PRETTY_FUNCTION__))
3240 "Formula is illegal")((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Formula is illegal") ? static_cast<void>
(0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Formula is illegal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3240, __PRETTY_FUNCTION__))
;
3241
3242 if (!LU.InsertFormula(F, *L))
3243 return false;
3244
3245 CountRegisters(F, LUIdx);
3246 return true;
3247}
3248
3249/// Check for other uses of loop-invariant values which we're tracking. These
3250/// other uses will pin these values in registers, making them less profitable
3251/// for elimination.
3252/// TODO: This currently misses non-constant addrec step registers.
3253/// TODO: Should this give more weight to users inside the loop?
3254void
3255LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3256 SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
3257 SmallPtrSet<const SCEV *, 32> Visited;
3258
3259 while (!Worklist.empty()) {
3260 const SCEV *S = Worklist.pop_back_val();
3261
3262 // Don't process the same SCEV twice
3263 if (!Visited.insert(S).second)
3264 continue;
3265
3266 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3267 Worklist.append(N->op_begin(), N->op_end());
3268 else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3269 Worklist.push_back(C->getOperand());
3270 else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3271 Worklist.push_back(D->getLHS());
3272 Worklist.push_back(D->getRHS());
3273 } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3274 const Value *V = US->getValue();
3275 if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3276 // Look for instructions defined outside the loop.
3277 if (L->contains(Inst)) continue;
3278 } else if (isa<UndefValue>(V))
3279 // Undef doesn't have a live range, so it doesn't matter.
3280 continue;
3281 for (const Use &U : V->uses()) {
3282 const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3283 // Ignore non-instructions.
3284 if (!UserInst)
3285 continue;
3286 // Ignore instructions in other functions (as can happen with
3287 // Constants).
3288 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3289 continue;
3290 // Ignore instructions not dominated by the loop.
3291 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3292 UserInst->getParent() :
3293 cast<PHINode>(UserInst)->getIncomingBlock(
3294 PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3295 if (!DT.dominates(L->getHeader(), UseBB))
3296 continue;
3297 // Don't bother if the instruction is in a BB which ends in an EHPad.
3298 if (UseBB->getTerminator()->isEHPad())
3299 continue;
3300 // Don't bother rewriting PHIs in catchswitch blocks.
3301 if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
3302 continue;
3303 // Ignore uses which are part of other SCEV expressions, to avoid
3304 // analyzing them multiple times.
3305 if (SE.isSCEVable(UserInst->getType())) {
3306 const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3307 // If the user is a no-op, look through to its uses.
3308 if (!isa<SCEVUnknown>(UserS))
3309 continue;
3310 if (UserS == US) {
3311 Worklist.push_back(
3312 SE.getUnknown(const_cast<Instruction *>(UserInst)));
3313 continue;
3314 }
3315 }
3316 // Ignore icmp instructions which are already being analyzed.
3317 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3318 unsigned OtherIdx = !U.getOperandNo();
3319 Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
3320 if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3321 continue;
3322 }
3323
3324 std::pair<size_t, int64_t> P = getUse(
3325 S, LSRUse::Basic, MemAccessTy());
3326 size_t LUIdx = P.first;
3327 int64_t Offset = P.second;
3328 LSRUse &LU = Uses[LUIdx];
3329 LSRFixup &LF = LU.getNewFixup();
3330 LF.UserInst = const_cast<Instruction *>(UserInst);
3331 LF.OperandValToReplace = U;
3332 LF.Offset = Offset;
3333 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3334 if (!LU.WidestFixupType ||
3335 SE.getTypeSizeInBits(LU.WidestFixupType) <
3336 SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3337 LU.WidestFixupType = LF.OperandValToReplace->getType();
3338 InsertSupplementalFormula(US, LU, LUIdx);
3339 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3340 break;
3341 }
3342 }
3343 }
3344}
3345
3346/// Split S into subexpressions which can be pulled out into separate
3347/// registers. If C is non-null, multiply each subexpression by C.
3348///
3349/// Return remainder expression after factoring the subexpressions captured by
3350/// Ops. If Ops is complete, return NULL.
3351static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3352 SmallVectorImpl<const SCEV *> &Ops,
3353 const Loop *L,
3354 ScalarEvolution &SE,
3355 unsigned Depth = 0) {
3356 // Arbitrarily cap recursion to protect compile time.
3357 if (Depth >= 3)
3358 return S;
3359
3360 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3361 // Break out add operands.
3362 for (const SCEV *S : Add->operands()) {
3363 const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
3364 if (Remainder)
3365 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3366 }
3367 return nullptr;
3368 } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3369 // Split a non-zero base out of an addrec.
3370 if (AR->getStart()->isZero() || !AR->isAffine())
3371 return S;
3372
3373 const SCEV *Remainder = CollectSubexprs(AR->getStart(),
3374 C, Ops, L, SE, Depth+1);
3375 // Split the non-zero AddRec unless it is part of a nested recurrence that
3376 // does not pertain to this loop.
3377 if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3378 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3379 Remainder = nullptr;
3380 }
3381 if (Remainder != AR->getStart()) {
3382 if (!Remainder)
3383 Remainder = SE.getConstant(AR->getType(), 0);
3384 return SE.getAddRecExpr(Remainder,
3385 AR->getStepRecurrence(SE),
3386 AR->getLoop(),
3387 //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3388 SCEV::FlagAnyWrap);
3389 }
3390 } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3391 // Break (C * (a + b + c)) into C*a + C*b + C*c.
3392 if (Mul->getNumOperands() != 2)
3393 return S;
3394 if (const SCEVConstant *Op0 =
3395 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3396 C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
3397 const SCEV *Remainder =
3398 CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
3399 if (Remainder)
3400 Ops.push_back(SE.getMulExpr(C, Remainder));
3401 return nullptr;
3402 }
3403 }
3404 return S;
3405}
3406
3407/// \brief Helper function for LSRInstance::GenerateReassociations.
3408void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3409 const Formula &Base,
3410 unsigned Depth, size_t Idx,
3411 bool IsScaledReg) {
3412 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3413 SmallVector<const SCEV *, 8> AddOps;
3414 const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3415 if (Remainder)
3416 AddOps.push_back(Remainder);
3417
3418 if (AddOps.size() == 1)
3419 return;
3420
3421 for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3422 JE = AddOps.end();
3423 J != JE; ++J) {
3424
3425 // Loop-variant "unknown" values are uninteresting; we won't be able to
3426 // do anything meaningful with them.
3427 if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3428 continue;
3429
3430 // Don't pull a constant into a register if the constant could be folded
3431 // into an immediate field.
3432 if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3433 LU.AccessTy, *J, Base.getNumRegs() > 1))
3434 continue;
3435
3436 // Collect all operands except *J.
3437 SmallVector<const SCEV *, 8> InnerAddOps(
3438 ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
3439 InnerAddOps.append(std::next(J),
3440 ((const SmallVector<const SCEV *, 8> &)AddOps).end());
3441
3442 // Don't leave just a constant behind in a register if the constant could
3443 // be folded into an immediate field.
3444 if (InnerAddOps.size() == 1 &&
3445 isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3446 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3447 continue;
3448
3449 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3450 if (InnerSum->isZero())
3451 continue;
3452 Formula F = Base;
3453
3454 // Add the remaining pieces of the add back into the new formula.
3455 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3456 if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3457 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3458 InnerSumSC->getValue()->getZExtValue())) {
3459 F.UnfoldedOffset =
3460 (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
3461 if (IsScaledReg)
3462 F.ScaledReg = nullptr;
3463 else
3464 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3465 } else if (IsScaledReg)
3466 F.ScaledReg = InnerSum;
3467 else
3468 F.BaseRegs[Idx] = InnerSum;
3469
3470 // Add J as its own register, or an unfolded immediate.
3471 const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3472 if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3473 TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3474 SC->getValue()->getZExtValue()))
3475 F.UnfoldedOffset =
3476 (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3477 else
3478 F.BaseRegs.push_back(*J);
3479 // We may have changed the number of register in base regs, adjust the
3480 // formula accordingly.
3481 F.canonicalize(*L);
3482
3483 if (InsertFormula(LU, LUIdx, F))
3484 // If that formula hadn't been seen before, recurse to find more like
3485 // it.
3486 GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3487 }
3488}
3489
3490/// Split out subexpressions from adds and the bases of addrecs.
3491void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3492 Formula Base, unsigned Depth) {
3493 assert(Base.isCanonical(*L) && "Input must be in the canonical form")((Base.isCanonical(*L) && "Input must be in the canonical form"
) ? static_cast<void> (0) : __assert_fail ("Base.isCanonical(*L) && \"Input must be in the canonical form\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3493, __PRETTY_FUNCTION__))
;
3494 // Arbitrarily cap recursion to protect compile time.
3495 if (Depth >= 3)
3496 return;
3497
3498 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3499 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3500
3501 if (Base.Scale == 1)
3502 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3503 /* Idx */ -1, /* IsScaledReg */ true);
3504}
3505
3506/// Generate a formula consisting of all of the loop-dominating registers added
3507/// into a single register.
3508void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3509 Formula Base) {
3510 // This method is only interesting on a plurality of registers.
3511 if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3512 return;
3513
3514 // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
3515 // processing the formula.
3516 Base.unscale();
3517 Formula F = Base;
3518 F.BaseRegs.clear();
3519 SmallVector<const SCEV *, 4> Ops;
3520 for (const SCEV *BaseReg : Base.BaseRegs) {
3521 if (SE.properlyDominates(BaseReg, L->getHeader()) &&
3522 !SE.hasComputableLoopEvolution(BaseReg, L))
3523 Ops.push_back(BaseReg);
3524 else
3525 F.BaseRegs.push_back(BaseReg);
3526 }
3527 if (Ops.size() > 1) {
3528 const SCEV *Sum = SE.getAddExpr(Ops);
3529 // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3530 // opportunity to fold something. For now, just ignore such cases
3531 // rather than proceed with zero in a register.
3532 if (!Sum->isZero()) {
3533 F.BaseRegs.push_back(Sum);
3534 F.canonicalize(*L);
3535 (void)InsertFormula(LU, LUIdx, F);
3536 }
3537 }
3538}
3539
3540/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
3541void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
3542 const Formula &Base, size_t Idx,
3543 bool IsScaledReg) {
3544 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3545 GlobalValue *GV = ExtractSymbol(G, SE);
3546 if (G->isZero() || !GV)
3547 return;
3548 Formula F = Base;
3549 F.BaseGV = GV;
3550 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3551 return;
3552 if (IsScaledReg)
3553 F.ScaledReg = G;
3554 else
3555 F.BaseRegs[Idx] = G;
3556 (void)InsertFormula(LU, LUIdx, F);
3557}
3558
3559/// Generate reuse formulae using symbolic offsets.
3560void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3561 Formula Base) {
3562 // We can't add a symbolic offset if the address already contains one.
3563 if (Base.BaseGV) return;
3564
3565 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3566 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3567 if (Base.Scale == 1)
3568 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
3569 /* IsScaledReg */ true);
3570}
3571
3572/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
3573void LSRInstance::GenerateConstantOffsetsImpl(
3574 LSRUse &LU, unsigned LUIdx, const Formula &Base,
3575 const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
3576 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3577 for (int64_t Offset : Worklist) {
3578 Formula F = Base;
3579 F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3580 if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
3581 LU.AccessTy, F)) {
3582 // Add the offset to the base register.
3583 const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
3584 // If it cancelled out, drop the base register, otherwise update it.
3585 if (NewG->isZero()) {
3586 if (IsScaledReg) {
3587 F.Scale = 0;
3588 F.ScaledReg = nullptr;
3589 } else
3590 F.deleteBaseReg(F.BaseRegs[Idx]);
3591 F.canonicalize(*L);
3592 } else if (IsScaledReg)
3593 F.ScaledReg = NewG;
3594 else
3595 F.BaseRegs[Idx] = NewG;
3596
3597 (void)InsertFormula(LU, LUIdx, F);
3598 }
3599 }
3600
3601 int64_t Imm = ExtractImmediate(G, SE);
3602 if (G->isZero() || Imm == 0)
3603 return;
3604 Formula F = Base;
3605 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3606 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3607 return;
3608 if (IsScaledReg)
3609 F.ScaledReg = G;
3610 else
3611 F.BaseRegs[Idx] = G;
3612 (void)InsertFormula(LU, LUIdx, F);
3613}
3614
3615/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
3616void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3617 Formula Base) {
3618 // TODO: For now, just add the min and max offset, because it usually isn't
3619 // worthwhile looking at everything inbetween.
3620 SmallVector<int64_t, 2> Worklist;
3621 Worklist.push_back(LU.MinOffset);
3622 if (LU.MaxOffset != LU.MinOffset)
3623 Worklist.push_back(LU.MaxOffset);
3624
3625 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3626 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3627 if (Base.Scale == 1)
3628 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
3629 /* IsScaledReg */ true);
3630}
3631
3632/// For ICmpZero, check to see if we can scale up the comparison. For example, x
3633/// == y -> x*c == y*c.
3634void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3635 Formula Base) {
3636 if (LU.Kind != LSRUse::ICmpZero) return;
3637
3638 // Determine the integer type for the base formula.
3639 Type *IntTy = Base.getType();
3640 if (!IntTy) return;
3641 if (SE.getTypeSizeInBits(IntTy) > 64) return;
3642
3643 // Don't do this if there is more than one offset.
3644 if (LU.MinOffset != LU.MaxOffset) return;
3645
3646 assert(!Base.BaseGV && "ICmpZero use is not legal!")((!Base.BaseGV && "ICmpZero use is not legal!") ? static_cast
<void> (0) : __assert_fail ("!Base.BaseGV && \"ICmpZero use is not legal!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3646, __PRETTY_FUNCTION__))
;
3647
3648 // Check each interesting stride.
3649 for (int64_t Factor : Factors) {
3650 // Check that the multiplication doesn't overflow.
3651 if (Base.BaseOffset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3652 continue;
3653 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3654 if (NewBaseOffset / Factor != Base.BaseOffset)
3655 continue;
3656 // If the offset will be truncated at this use, check that it is in bounds.
3657 if (!IntTy->isPointerTy() &&
3658 !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3659 continue;
3660
3661 // Check that multiplying with the use offset doesn't overflow.
3662 int64_t Offset = LU.MinOffset;
3663 if (Offset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3664 continue;
3665 Offset = (uint64_t)Offset * Factor;
3666 if (Offset / Factor != LU.MinOffset)
3667 continue;
3668 // If the offset will be truncated at this use, check that it is in bounds.
3669 if (!IntTy->isPointerTy() &&
3670 !ConstantInt::isValueValidForType(IntTy, Offset))
3671 continue;
3672
3673 Formula F = Base;
3674 F.BaseOffset = NewBaseOffset;
3675
3676 // Check that this scale is legal.
3677 if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3678 continue;
3679
3680 // Compensate for the use having MinOffset built into it.
3681 F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3682
3683 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3684
3685 // Check that multiplying with each base register doesn't overflow.
3686 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3687 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
3688 if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3689 goto next;
3690 }
3691
3692 // Check that multiplying with the scaled register doesn't overflow.
3693 if (F.ScaledReg) {
3694 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
3695 if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3696 continue;
3697 }
3698
3699 // Check that multiplying with the unfolded offset doesn't overflow.
3700 if (F.UnfoldedOffset != 0) {
3701 if (F.UnfoldedOffset == INT64_MIN(-9223372036854775807L -1) && Factor == -1)
3702 continue;
3703 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3704 if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3705 continue;
3706 // If the offset will be truncated, check that it is in bounds.
3707 if (!IntTy->isPointerTy() &&
3708 !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3709 continue;
3710 }
3711
3712 // If we make it here and it's legal, add it.
3713 (void)InsertFormula(LU, LUIdx, F);
3714 next:;
3715 }
3716}
3717
3718/// Generate stride factor reuse formulae by making use of scaled-offset address
3719/// modes, for example.
3720void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
3721 // Determine the integer type for the base formula.
3722 Type *IntTy = Base.getType();
3723 if (!IntTy) return;
3724
3725 // If this Formula already has a scaled register, we can't add another one.
3726 // Try to unscale the formula to generate a better scale.
3727 if (Base.Scale != 0 && !Base.unscale())
3728 return;
3729
3730 assert(Base.Scale == 0 && "unscale did not did its job!")((Base.Scale == 0 && "unscale did not did its job!") ?
static_cast<void> (0) : __assert_fail ("Base.Scale == 0 && \"unscale did not did its job!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 3730, __PRETTY_FUNCTION__))
;
3731
3732 // Check each interesting stride.
3733 for (int64_t Factor : Factors) {
3734 Base.Scale = Factor;
3735 Base.HasBaseReg = Base.BaseRegs.size() > 1;
3736 // Check whether this scale is going to be legal.
3737 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3738 Base)) {
3739 // As a special-case, handle special out-of-loop Basic users specially.
3740 // TODO: Reconsider this special case.
3741 if (LU.Kind == LSRUse::Basic &&
3742 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3743 LU.AccessTy, Base) &&
3744 LU.AllFixupsOutsideLoop)
3745 LU.Kind = LSRUse::Special;
3746 else
3747 continue;
3748 }
3749 // For an ICmpZero, negating a solitary base register won't lead to
3750 // new solutions.
3751 if (LU.Kind == LSRUse::ICmpZero &&
3752 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3753 continue;
3754 // For each addrec base reg, if its loop is current loop, apply the scale.
3755 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3756 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
3757 if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
3758 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3759 if (FactorS->isZero())
3760 continue;
3761 // Divide out the factor, ignoring high bits, since we'll be
3762 // scaling the value back up in the end.
3763 if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
3764 // TODO: This could be optimized to avoid all the copying.
3765 Formula F = Base;
3766 F.ScaledReg = Quotient;
3767 F.deleteBaseReg(F.BaseRegs[i]);
3768 // The canonical representation of 1*reg is reg, which is already in
3769 // Base. In that case, do not try to insert the formula, it will be
3770 // rejected anyway.
3771 if (F.Scale == 1 && (F.BaseRegs.empty() ||
3772 (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
3773 continue;
3774 // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
3775 // non canonical Formula with ScaledReg's loop not being L.
3776 if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
3777 F.canonicalize(*L);
3778 (void)InsertFormula(LU, LUIdx, F);
3779 }
3780 }
3781 }
3782 }
3783}
3784
3785/// Generate reuse formulae from different IV types.
3786void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
3787 // Don't bother truncating symbolic values.
3788 if (Base.BaseGV) return;
3789
3790 // Determine the integer type for the base formula.
3791 Type *DstTy = Base.getType();
3792 if (!DstTy) return;
3793 DstTy = SE.getEffectiveSCEVType(DstTy);
3794
3795 for (Type *SrcTy : Types) {
3796 if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
3797 Formula F = Base;
3798
3799 if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
3800 for (const SCEV *&BaseReg : F.BaseRegs)
3801 BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
3802
3803 // TODO: This assumes we've done basic processing on all uses and
3804 // have an idea what the register usage is.
3805 if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3806 continue;
3807
3808 F.canonicalize(*L);
3809 (void)InsertFormula(LU, LUIdx, F);
3810 }
3811 }
3812}
3813
3814namespace {
3815
3816/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
3817/// modifications so that the search phase doesn't have to worry about the data
3818/// structures moving underneath it.
3819struct WorkItem {
3820 size_t LUIdx;
3821 int64_t Imm;
3822 const SCEV *OrigReg;
3823
3824 WorkItem(size_t LI, int64_t I, const SCEV *R)
3825 : LUIdx(LI), Imm(I), OrigReg(R) {}
3826
3827 void print(raw_ostream &OS) const;
3828 void dump() const;
3829};
3830
3831} // end anonymous namespace
3832
3833void WorkItem::print(raw_ostream &OS) const {
3834 OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
3835 << " , add offset " << Imm;
3836}
3837
3838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3839LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void WorkItem::dump() const {
3840 print(errs()); errs() << '\n';
3841}
3842#endif
3843
3844/// Look for registers which are a constant distance apart and try to form reuse
3845/// opportunities between them.
3846void LSRInstance::GenerateCrossUseConstantOffsets() {
3847 // Group the registers by their value without any added constant offset.
3848 typedef std::map<int64_t, const SCEV *> ImmMapTy;
3849 DenseMap<const SCEV *, ImmMapTy> Map;
3850 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
3851 SmallVector<const SCEV *, 8> Sequence;
3852 for (const SCEV *Use : RegUses) {
3853 const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
3854 int64_t Imm = ExtractImmediate(Reg, SE);
3855 auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
3856 if (Pair.second)
3857 Sequence.push_back(Reg);
3858 Pair.first->second.insert(std::make_pair(Imm, Use));
3859 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
3860 }
3861
3862 // Now examine each set of registers with the same base value. Build up
3863 // a list of work to do and do the work in a separate step so that we're
3864 // not adding formulae and register counts while we're searching.
3865 SmallVector<WorkItem, 32> WorkItems;
3866 SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
3867 for (const SCEV *Reg : Sequence) {
3868 const ImmMapTy &Imms = Map.find(Reg)->second;
3869
3870 // It's not worthwhile looking for reuse if there's only one offset.
3871 if (Imms.size() == 1)
3872 continue;
3873
3874 DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
3875 for (const auto &Entry : Imms)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
3876 dbgs() << ' ' << Entry.first;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
3877 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Generating cross-use offsets for "
<< *Reg << ':'; for (const auto &Entry : Imms
) dbgs() << ' ' << Entry.first; dbgs() << '\n'
; } } while (false)
;
3878
3879 // Examine each offset.
3880 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3881 J != JE; ++J) {
3882 const SCEV *OrigReg = J->second;
3883
3884 int64_t JImm = J->first;
3885 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3886
3887 if (!isa<SCEVConstant>(OrigReg) &&
3888 UsedByIndicesMap[Reg].count() == 1) {
3889 DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Skipping cross-use reuse for "
<< *OrigReg << '\n'; } } while (false)
;
3890 continue;
3891 }
3892
3893 // Conservatively examine offsets between this orig reg a few selected
3894 // other orig regs.
3895 ImmMapTy::const_iterator OtherImms[] = {
3896 Imms.begin(), std::prev(Imms.end()),
3897 Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3898 2)
3899 };
3900 for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
3901 ImmMapTy::const_iterator M = OtherImms[i];
3902 if (M == J || M == JE) continue;
3903
3904 // Compute the difference between the two.
3905 int64_t Imm = (uint64_t)JImm - M->first;
3906 for (unsigned LUIdx : UsedByIndices.set_bits())
3907 // Make a memo of this use, offset, and register tuple.
3908 if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
3909 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
3910 }
3911 }
3912 }
3913
3914 Map.clear();
3915 Sequence.clear();
3916 UsedByIndicesMap.clear();
3917 UniqueItems.clear();
3918
3919 // Now iterate through the worklist and add new formulae.
3920 for (const WorkItem &WI : WorkItems) {
3921 size_t LUIdx = WI.LUIdx;
3922 LSRUse &LU = Uses[LUIdx];
3923 int64_t Imm = WI.Imm;
3924 const SCEV *OrigReg = WI.OrigReg;
3925
3926 Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
3927 const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3928 unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
3929
3930 // TODO: Use a more targeted data structure.
3931 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
3932 Formula F = LU.Formulae[L];
3933 // FIXME: The code for the scaled and unscaled registers looks
3934 // very similar but slightly different. Investigate if they
3935 // could be merged. That way, we would not have to unscale the
3936 // Formula.
3937 F.unscale();
3938 // Use the immediate in the scaled register.
3939 if (F.ScaledReg == OrigReg) {
3940 int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
3941 // Don't create 50 + reg(-50).
3942 if (F.referencesReg(SE.getSCEV(
3943 ConstantInt::get(IntTy, -(uint64_t)Offset))))
3944 continue;
3945 Formula NewF = F;
3946 NewF.BaseOffset = Offset;
3947 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3948 NewF))
3949 continue;
3950 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
3951
3952 // If the new scale is a constant in a register, and adding the constant
3953 // value to the immediate would produce a value closer to zero than the
3954 // immediate itself, then the formula isn't worthwhile.
3955 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
3956 if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
3957 (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
3958 .ule(std::abs(NewF.BaseOffset)))
3959 continue;
3960
3961 // OK, looks good.
3962 NewF.canonicalize(*this->L);
3963 (void)InsertFormula(LU, LUIdx, NewF);
3964 } else {
3965 // Use the immediate in a base register.
3966 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
3967 const SCEV *BaseReg = F.BaseRegs[N];
3968 if (BaseReg != OrigReg)
3969 continue;
3970 Formula NewF = F;
3971 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
3972 if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
3973 LU.Kind, LU.AccessTy, NewF)) {
3974 if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
3975 continue;
3976 NewF = F;
3977 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3978 }
3979 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
3980
3981 // If the new formula has a constant in a register, and adding the
3982 // constant value to the immediate would produce a value closer to
3983 // zero than the immediate itself, then the formula isn't worthwhile.
3984 for (const SCEV *NewReg : NewF.BaseRegs)
3985 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
3986 if ((C->getAPInt() + NewF.BaseOffset)
3987 .abs()
3988 .slt(std::abs(NewF.BaseOffset)) &&
3989 (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
3990 countTrailingZeros<uint64_t>(NewF.BaseOffset))
3991 goto skip_formula;
3992
3993 // Ok, looks good.
3994 NewF.canonicalize(*this->L);
3995 (void)InsertFormula(LU, LUIdx, NewF);
3996 break;
3997 skip_formula:;
3998 }
3999 }
4000 }
4001 }
4002}
4003
4004/// Generate formulae for each use.
4005void
4006LSRInstance::GenerateAllReuseFormulae() {
4007 // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
4008 // queries are more precise.
4009 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4010 LSRUse &LU = Uses[LUIdx];
4011 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4012 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4013 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4014 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4015 }
4016 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4017 LSRUse &LU = Uses[LUIdx];
4018 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4019 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4020 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4021 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4022 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4023 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4024 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4025 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4026 }
4027 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4028 LSRUse &LU = Uses[LUIdx];
4029 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4030 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4031 }
4032
4033 GenerateCrossUseConstantOffsets();
4034
4035 DEBUG(dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (false)
4036 "After generating reuse formulae:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (false)
4037 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "After generating reuse formulae:\n"
; print_uses(dbgs()); } } while (false)
;
4038}
4039
4040/// If there are multiple formulae with the same set of registers used
4041/// by other uses, pick the best one and delete the others.
4042void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4043 DenseSet<const SCEV *> VisitedRegs;
4044 SmallPtrSet<const SCEV *, 16> Regs;
4045 SmallPtrSet<const SCEV *, 16> LoserRegs;
4046#ifndef NDEBUG
4047 bool ChangedFormulae = false;
4048#endif
4049
4050 // Collect the best formula for each unique set of shared registers. This
4051 // is reset for each use.
4052 typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
4053 BestFormulaeTy;
4054 BestFormulaeTy BestFormulae;
4055
4056 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4057 LSRUse &LU = Uses[LUIdx];
4058 DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Filtering for use "; LU.print
(dbgs()); dbgs() << '\n'; } } while (false)
;
4059
4060 bool Any = false;
4061 for (size_t FIdx = 0, NumForms = LU.Formulae.size();
4062 FIdx != NumForms; ++FIdx) {
4063 Formula &F = LU.Formulae[FIdx];
4064
4065 // Some formulas are instant losers. For example, they may depend on
4066 // nonexistent AddRecs from other loops. These need to be filtered
4067 // immediately, otherwise heuristics could choose them over others leading
4068 // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
4069 // avoids the need to recompute this information across formulae using the
4070 // same bad AddRec. Passing LoserRegs is also essential unless we remove
4071 // the corresponding bad register from the Regs set.
4072 Cost CostF;
4073 Regs.clear();
4074 CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
4075 if (CostF.isLoser()) {
4076 // During initial formula generation, undesirable formulae are generated
4077 // by uses within other loops that have some non-trivial address mode or
4078 // use the postinc form of the IV. LSR needs to provide these formulae
4079 // as the basis of rediscovering the desired formula that uses an AddRec
4080 // corresponding to the existing phi. Once all formulae have been
4081 // generated, these initial losers may be pruned.
4082 DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering loser "; F.print
(dbgs()); dbgs() << "\n"; } } while (false)
4083 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering loser "; F.print
(dbgs()); dbgs() << "\n"; } } while (false)
;
4084 }
4085 else {
4086 SmallVector<const SCEV *, 4> Key;
4087 for (const SCEV *Reg : F.BaseRegs) {
4088 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4089 Key.push_back(Reg);
4090 }
4091 if (F.ScaledReg &&
4092 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
4093 Key.push_back(F.ScaledReg);
4094 // Unstable sort by host order ok, because this is only used for
4095 // uniquifying.
4096 std::sort(Key.begin(), Key.end());
4097
4098 std::pair<BestFormulaeTy::const_iterator, bool> P =
4099 BestFormulae.insert(std::make_pair(Key, FIdx));
4100 if (P.second)
4101 continue;
4102
4103 Formula &Best = LU.Formulae[P.first->second];
4104
4105 Cost CostBest;
4106 Regs.clear();
4107 CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
4108 if (CostF < CostBest)
4109 std::swap(F, Best);
4110 DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
4111 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
4112 " in favor of formula "; Best.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
4113 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Filtering out formula "
; F.print(dbgs()); dbgs() << "\n" " in favor of formula "
; Best.print(dbgs()); dbgs() << '\n'; } } while (false)
;
4114 }
4115#ifndef NDEBUG
4116 ChangedFormulae = true;
4117#endif
4118 LU.DeleteFormula(F);
4119 --FIdx;
4120 --NumForms;
4121 Any = true;
4122 }
4123
4124 // Now that we've filtered out some formulae, recompute the Regs set.
4125 if (Any)
4126 LU.RecomputeRegs(LUIdx, RegUses);
4127
4128 // Reset this to prepare for the next use.
4129 BestFormulae.clear();
4130 }
4131
4132 DEBUG(if (ChangedFormulae) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
4133 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
4134 "After filtering out undesirable candidates:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
4135 print_uses(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
4136 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { if (ChangedFormulae) { dbgs() << "\n"
"After filtering out undesirable candidates:\n"; print_uses(
dbgs()); }; } } while (false)
;
4137}
4138
4139// This is a rough guess that seems to work fairly well.
4140static const size_t ComplexityLimit = UINT16_MAX(65535);
4141
4142/// Estimate the worst-case number of solutions the solver might have to
4143/// consider. It almost never considers this many solutions because it prune the
4144/// search space, but the pruning isn't always sufficient.
4145size_t LSRInstance::EstimateSearchSpaceComplexity() const {
4146 size_t Power = 1;
4147 for (const LSRUse &LU : Uses) {
4148 size_t FSize = LU.Formulae.size();
4149 if (FSize >= ComplexityLimit) {
4150 Power = ComplexityLimit;
4151 break;
4152 }
4153 Power *= FSize;
4154 if (Power >= ComplexityLimit)
4155 break;
4156 }
4157 return Power;
4158}
4159
4160/// When one formula uses a superset of the registers of another formula, it
4161/// won't help reduce register pressure (though it may not necessarily hurt
4162/// register pressure); remove it to simplify the system.
4163void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4164 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4165 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (false)
;
4166
4167 DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (false)
4168 "which use a superset of registers used by other "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (false)
4169 "formulae.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other " "formulae.\n"
; } } while (false)
;
4170
4171 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4172 LSRUse &LU = Uses[LUIdx];
4173 bool Any = false;
4174 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4175 Formula &F = LU.Formulae[i];
4176 // Look for a formula with a constant or GV in a register. If the use
4177 // also has a formula with that same value in an immediate field,
4178 // delete the one that uses a register.
4179 for (SmallVectorImpl<const SCEV *>::const_iterator
4180 I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4181 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
4182 Formula NewF = F;
4183 NewF.BaseOffset += C->getValue()->getSExtValue();
4184 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4185 (I - F.BaseRegs.begin()));
4186 if (LU.HasFormulaWithSameRegs(NewF)) {
4187 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4188 LU.DeleteFormula(F);
4189 --i;
4190 --e;
4191 Any = true;
4192 break;
4193 }
4194 } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4195 if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4196 if (!F.BaseGV) {
4197 Formula NewF = F;
4198 NewF.BaseGV = GV;
4199 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4200 (I - F.BaseRegs.begin()));
4201 if (LU.HasFormulaWithSameRegs(NewF)) {
4202 DEBUG(dbgs() << " Deleting "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
4203 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4204 LU.DeleteFormula(F);
4205 --i;
4206 --e;
4207 Any = true;
4208 break;
4209 }
4210 }
4211 }
4212 }
4213 }
4214 if (Any)
4215 LU.RecomputeRegs(LUIdx, RegUses);
4216 }
4217
4218 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
4219 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4220 }
4221}
4222
4223/// When there are many registers for expressions like A, A+1, A+2, etc.,
4224/// allocate a single register for them.
4225void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4226 if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4227 return;
4228
4229 DEBUG(dbgs() << "The search space is too complex.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(false)
4230 "Narrowing the search space by assuming that uses separated "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(false)
4231 "by a constant offset will use the same registers.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n"; } } while
(false)
;
4232
4233 // This is especially useful for unrolled loops.
4234
4235 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4236 LSRUse &LU = Uses[LUIdx];
4237 for (const Formula &F : LU.Formulae) {
4238 if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4239 continue;
4240
4241 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4242 if (!LUThatHas)
4243 continue;
4244
4245 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
4246 LU.Kind, LU.AccessTy))
4247 continue;
4248
4249 DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting use "; LU.print
(dbgs()); dbgs() << '\n'; } } while (false)
;
4250
4251 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4252
4253 // Transfer the fixups of LU to LUThatHas.
4254 for (LSRFixup &Fixup : LU.Fixups) {
4255 Fixup.Offset += F.BaseOffset;
4256 LUThatHas->pushFixup(Fixup);
4257 DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New fixup has offset " <<
Fixup.Offset << '\n'; } } while (false)
;
4258 }
4259
4260 // Delete formulae from the new use which are no longer legal.
4261 bool Any = false;
4262 for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4263 Formula &F = LUThatHas->Formulae[i];
4264 if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4265 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4266 DEBUG(dbgs() << " Deleting "; F.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
4267 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4268 LUThatHas->DeleteFormula(F);
4269 --i;
4270 --e;
4271 Any = true;
4272 }
4273 }
4274
4275 if (Any)
4276 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4277
4278 // Delete the old use.
4279 DeleteUse(LU, LUIdx);
4280 --LUIdx;
4281 --NumUses;
4282 break;
4283 }
4284 }
4285
4286 DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4287}
4288
4289/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4290/// we've done more filtering, as it may be able to find more formulae to
4291/// eliminate.
4292void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4293 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4294 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (false)
;
4295
4296 DEBUG(dbgs() << "Narrowing the search space by re-filtering out "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n"; } } while (false)
4297 "undesirable dedicated registers.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n"; } } while (false)
;
4298
4299 FilterOutUndesirableDedicatedRegisters();
4300
4301 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
4302 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4303 }
4304}
4305
4306/// The function delete formulas with high registers number expectation.
4307/// Assuming we don't know the value of each formula (already delete
4308/// all inefficient), generate probability of not selecting for each
4309/// register.
4310/// For example,
4311/// Use1:
4312/// reg(a) + reg({0,+,1})
4313/// reg(a) + reg({-1,+,1}) + 1
4314/// reg({a,+,1})
4315/// Use2:
4316/// reg(b) + reg({0,+,1})
4317/// reg(b) + reg({-1,+,1}) + 1
4318/// reg({b,+,1})
4319/// Use3:
4320/// reg(c) + reg(b) + reg({0,+,1})
4321/// reg(c) + reg({b,+,1})
4322///
4323/// Probability of not selecting
4324/// Use1 Use2 Use3
4325/// reg(a) (1/3) * 1 * 1
4326/// reg(b) 1 * (1/3) * (1/2)
4327/// reg({0,+,1}) (2/3) * (2/3) * (1/2)
4328/// reg({-1,+,1}) (2/3) * (2/3) * 1
4329/// reg({a,+,1}) (2/3) * 1 * 1
4330/// reg({b,+,1}) 1 * (2/3) * (2/3)
4331/// reg(c) 1 * 1 * 0
4332///
4333/// Now count registers number mathematical expectation for each formula:
4334/// Note that for each use we exclude probability if not selecting for the use.
4335/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
4336/// probabilty 1/3 of not selecting for Use1).
4337/// Use1:
4338/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
4339/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
4340/// reg({a,+,1}) 1
4341/// Use2:
4342/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
4343/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
4344/// reg({b,+,1}) 2/3
4345/// Use3:
4346/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
4347/// reg(c) + reg({b,+,1}) 1 + 2/3
4348
4349void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4350 if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4351 return;
4352 // Ok, we have too many of formulae on our hands to conveniently handle.
4353 // Use a rough heuristic to thin out the list.
4354
4355 // Set of Regs wich will be 100% used in final solution.
4356 // Used in each formula of a solution (in example above this is reg(c)).
4357 // We can skip them in calculations.
4358 SmallPtrSet<const SCEV *, 4> UniqRegs;
4359 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (false)
;
4360
4361 // Map each register to probability of not selecting
4362 DenseMap <const SCEV *, float> RegNumMap;
4363 for (const SCEV *Reg : RegUses) {
4364 if (UniqRegs.count(Reg))
4365 continue;
4366 float PNotSel = 1;
4367 for (const LSRUse &LU : Uses) {
4368 if (!LU.Regs.count(Reg))
4369 continue;
4370 float P = LU.getNotSelectedProbability(Reg);
4371 if (P != 0.0)
4372 PNotSel *= P;
4373 else
4374 UniqRegs.insert(Reg);
4375 }
4376 RegNumMap.insert(std::make_pair(Reg, PNotSel));
4377 }
4378
4379 DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by deleting costly formulas\n"
; } } while (false)
;
4380
4381 // Delete formulas where registers number expectation is high.
4382 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4383 LSRUse &LU = Uses[LUIdx];
4384 // If nothing to delete - continue.
4385 if (LU.Formulae.size() < 2)
4386 continue;
4387 // This is temporary solution to test performance. Float should be
4388 // replaced with round independent type (based on integers) to avoid
4389 // different results for different target builds.
4390 float FMinRegNum = LU.Formulae[0].getNumRegs();
4391 float FMinARegNum = LU.Formulae[0].getNumRegs();
4392 size_t MinIdx = 0;
4393 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4394 Formula &F = LU.Formulae[i];
4395 float FRegNum = 0;
4396 float FARegNum = 0;
4397 for (const SCEV *BaseReg : F.BaseRegs) {
4398 if (UniqRegs.count(BaseReg))
4399 continue;
4400 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4401 if (isa<SCEVAddRecExpr>(BaseReg))
4402 FARegNum +=
4403 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4404 }
4405 if (const SCEV *ScaledReg = F.ScaledReg) {
4406 if (!UniqRegs.count(ScaledReg)) {
4407 FRegNum +=
4408 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4409 if (isa<SCEVAddRecExpr>(ScaledReg))
4410 FARegNum +=
4411 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4412 }
4413 }
4414 if (FMinRegNum > FRegNum ||
4415 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
4416 FMinRegNum = FRegNum;
4417 FMinARegNum = FARegNum;
4418 MinIdx = i;
4419 }
4420 }
4421 DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " The formula "; LU.Formulae
[MinIdx].print(dbgs()); dbgs() << " with min reg num " <<
FMinRegNum << '\n'; } } while (false)
4422 dbgs() << " with min reg num " << FMinRegNum << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " The formula "; LU.Formulae
[MinIdx].print(dbgs()); dbgs() << " with min reg num " <<
FMinRegNum << '\n'; } } while (false)
;
4423 if (MinIdx != 0)
4424 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
4425 while (LU.Formulae.size() != 1) {
4426 DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; LU.Formulae
.back().print(dbgs()); dbgs() << '\n'; } } while (false
)
4427 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; LU.Formulae
.back().print(dbgs()); dbgs() << '\n'; } } while (false
)
;
4428 LU.Formulae.pop_back();
4429 }
4430 LU.RecomputeRegs(LUIdx, RegUses);
4431 assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula")((LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula"
) ? static_cast<void> (0) : __assert_fail ("LU.Formulae.size() == 1 && \"Should be exactly 1 min regs formula\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4431, __PRETTY_FUNCTION__))
;
4432 Formula &F = LU.Formulae[0];
4433 DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Leaving only "; F.print
(dbgs()); dbgs() << '\n'; } } while (false)
;
4434 // When we choose the formula, the regs become unique.
4435 UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
4436 if (F.ScaledReg)
4437 UniqRegs.insert(F.ScaledReg);
4438 }
4439 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
4440 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4441}
4442
4443
4444/// Pick a register which seems likely to be profitable, and then in any use
4445/// which has any reference to that register, delete all formulae which do not
4446/// reference that register.
4447void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4448 // With all other options exhausted, loop until the system is simple
4449 // enough to handle.
4450 SmallPtrSet<const SCEV *, 4> Taken;
4451 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
1
Assuming the condition is true
2
Loop condition is true. Entering loop body
6
Assuming the condition is true
7
Loop condition is true. Entering loop body
11
Assuming the condition is true
12
Loop condition is true. Entering loop body
16
Assuming the condition is true
17
Loop condition is true. Entering loop body
4452 // Ok, we have too many of formulae on our hands to conveniently handle.
4453 // Use a rough heuristic to thin out the list.
4454 DEBUG(dbgs() << "The search space is too complex.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "The search space is too complex.\n"
; } } while (false)
;
4455
4456 // Pick the register which is used by the most LSRUses, which is likely
4457 // to be a good reuse register candidate.
4458 const SCEV *Best = nullptr;
18
'Best' initialized to a null pointer value
4459 unsigned BestNum = 0;
4460 for (const SCEV *Reg : RegUses) {
3
Assuming '__begin' is equal to '__end'
8
Assuming '__begin' is equal to '__end'
13
Assuming '__begin' is equal to '__end'
19
Assuming '__begin' is equal to '__end'
4461 if (Taken.count(Reg))
4462 continue;
4463 if (!Best) {
4464 Best = Reg;
4465 BestNum = RegUses.getUsedByIndices(Reg).count();
4466 } else {
4467 unsigned Count = RegUses.getUsedByIndices(Reg).count();
4468 if (Count > BestNum) {
4469 Best = Reg;
4470 BestNum = Count;
4471 }
4472 }
4473 }
4474
4475 DEBUG(dbgs() << "Narrowing the search space by assuming " << *Bestdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by assuming "
<< *Best << " will yield profitable reuse.\n"; }
} while (false)
20
Within the expansion of the macro 'DEBUG':
a
Assuming 'DebugFlag' is not equal to 0
b
Assuming the condition is true
c
Forming reference to null pointer
4476 << " will yield profitable reuse.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "Narrowing the search space by assuming "
<< *Best << " will yield profitable reuse.\n"; }
} while (false)
;
4477 Taken.insert(Best);
4478
4479 // In any use with formulae which references this register, delete formulae
4480 // which don't reference it.
4481 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4
Assuming 'LUIdx' is equal to 'NumUses'
5
Loop condition is false. Execution continues on line 4503
9
Assuming 'LUIdx' is equal to 'NumUses'
10
Loop condition is false. Execution continues on line 4503
14
Assuming 'LUIdx' is equal to 'NumUses'
15
Loop condition is false. Execution continues on line 4503
4482 LSRUse &LU = Uses[LUIdx];
4483 if (!LU.Regs.count(Best)) continue;
4484
4485 bool Any = false;
4486 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4487 Formula &F = LU.Formulae[i];
4488 if (!F.referencesReg(Best)) {
4489 DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << " Deleting "; F.print(dbgs
()); dbgs() << '\n'; } } while (false)
;
4490 LU.DeleteFormula(F);
4491 --e;
4492 --i;
4493 Any = true;
4494 assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?")((e != 0 && "Use has no formulae left! Is Regs inconsistent?"
) ? static_cast<void> (0) : __assert_fail ("e != 0 && \"Use has no formulae left! Is Regs inconsistent?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4494, __PRETTY_FUNCTION__))
;
4495 continue;
4496 }
4497 }
4498
4499 if (Any)
4500 LU.RecomputeRegs(LUIdx, RegUses);
4501 }
4502
4503 DEBUG(dbgs() << "After pre-selection:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
4504 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "After pre-selection:\n"; print_uses
(dbgs()); } } while (false)
;
4505 }
4506}
4507
4508/// If there are an extraordinary number of formulae to choose from, use some
4509/// rough heuristics to prune down the number of formulae. This keeps the main
4510/// solver from taking an extraordinary amount of time in some worst-case
4511/// scenarios.
4512void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4513 NarrowSearchSpaceByDetectingSupersets();
4514 NarrowSearchSpaceByCollapsingUnrolledCode();
4515 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4516 if (LSRExpNarrow)
4517 NarrowSearchSpaceByDeletingCostlyFormulas();
4518 else
4519 NarrowSearchSpaceByPickingWinnerRegs();
4520}
4521
4522/// This is the recursive solver.
4523void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4524 Cost &SolutionCost,
4525 SmallVectorImpl<const Formula *> &Workspace,
4526 const Cost &CurCost,
4527 const SmallPtrSet<const SCEV *, 16> &CurRegs,
4528 DenseSet<const SCEV *> &VisitedRegs) const {
4529 // Some ideas:
4530 // - prune more:
4531 // - use more aggressive filtering
4532 // - sort the formula so that the most profitable solutions are found first
4533 // - sort the uses too
4534 // - search faster:
4535 // - don't compute a cost, and then compare. compare while computing a cost
4536 // and bail early.
4537 // - track register sets with SmallBitVector
4538
4539 const LSRUse &LU = Uses[Workspace.size()];
4540
4541 // If this use references any register that's already a part of the
4542 // in-progress solution, consider it a requirement that a formula must
4543 // reference that register in order to be considered. This prunes out
4544 // unprofitable searching.
4545 SmallSetVector<const SCEV *, 4> ReqRegs;
4546 for (const SCEV *S : CurRegs)
4547 if (LU.Regs.count(S))
4548 ReqRegs.insert(S);
4549
4550 SmallPtrSet<const SCEV *, 16> NewRegs;
4551 Cost NewCost;
4552 for (const Formula &F : LU.Formulae) {
4553 // Ignore formulae which may not be ideal in terms of register reuse of
4554 // ReqRegs. The formula should use all required registers before
4555 // introducing new ones.
4556 int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
4557 for (const SCEV *Reg : ReqRegs) {
4558 if ((F.ScaledReg && F.ScaledReg == Reg) ||
4559 is_contained(F.BaseRegs, Reg)) {
4560 --NumReqRegsToFind;
4561 if (NumReqRegsToFind == 0)
4562 break;
4563 }
4564 }
4565 if (NumReqRegsToFind != 0) {
4566 // If none of the formulae satisfied the required registers, then we could
4567 // clear ReqRegs and try again. Currently, we simply give up in this case.
4568 continue;
4569 }
4570
4571 // Evaluate the cost of the current formula. If it's already worse than
4572 // the current best, prune the search at that point.
4573 NewCost = CurCost;
4574 NewRegs = CurRegs;
4575 NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
4576 if (NewCost < SolutionCost) {
4577 Workspace.push_back(&F);
4578 if (Workspace.size() != Uses.size()) {
4579 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4580 NewRegs, VisitedRegs);
4581 if (F.getNumRegs() == 1 && Workspace.size() == 1)
4582 VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4583 } else {
4584 DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4585 dbgs() << ".\n Regs:";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4586 for (const SCEV *S : NewRegs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4587 dbgs() << ' ' << *S;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
4588 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "New best at "; NewCost.print
(dbgs()); dbgs() << ".\n Regs:"; for (const SCEV *S : NewRegs
) dbgs() << ' ' << *S; dbgs() << '\n'; } } while
(false)
;
4589
4590 SolutionCost = NewCost;
4591 Solution = Workspace;
4592 }
4593 Workspace.pop_back();
4594 }
4595 }
4596}
4597
4598/// Choose one formula from each use. Return the results in the given Solution
4599/// vector.
4600void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4601 SmallVector<const Formula *, 8> Workspace;
4602 Cost SolutionCost;
4603 SolutionCost.Lose();
4604 Cost CurCost;
4605 SmallPtrSet<const SCEV *, 16> CurRegs;
4606 DenseSet<const SCEV *> VisitedRegs;
4607 Workspace.reserve(Uses.size());
4608
4609 // SolveRecurse does all the work.
4610 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4611 CurRegs, VisitedRegs);
4612 if (Solution.empty()) {
4613 DEBUG(dbgs() << "\nNo Satisfactory Solution\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nNo Satisfactory Solution\n"
; } } while (false)
;
4614 return;
4615 }
4616
4617 // Ok, we've now made all our decisions.
4618 DEBUG(dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4619 "The chosen solution requires "; SolutionCost.print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4620 dbgs() << ":\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4621 for (size_t i = 0, e = Uses.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4622 dbgs() << " ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4623 Uses[i].print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4624 dbgs() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4625 " ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4626 Solution[i]->print(dbgs());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4627 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
4628 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\n" "The chosen solution requires "
; SolutionCost.print(dbgs()); dbgs() << ":\n"; for (size_t
i = 0, e = Uses.size(); i != e; ++i) { dbgs() << " ";
Uses[i].print(dbgs()); dbgs() << "\n" " "; Solution
[i]->print(dbgs()); dbgs() << '\n'; }; } } while (false
)
;
4629
4630 assert(Solution.size() == Uses.size() && "Malformed solution!")((Solution.size() == Uses.size() && "Malformed solution!"
) ? static_cast<void> (0) : __assert_fail ("Solution.size() == Uses.size() && \"Malformed solution!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4630, __PRETTY_FUNCTION__))
;
4631}
4632
4633/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
4634/// we can go while still being dominated by the input positions. This helps
4635/// canonicalize the insert position, which encourages sharing.
4636BasicBlock::iterator
4637LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4638 const SmallVectorImpl<Instruction *> &Inputs)
4639 const {
4640 Instruction *Tentative = &*IP;
4641 while (true) {
4642 bool AllDominate = true;
4643 Instruction *BetterPos = nullptr;
4644 // Don't bother attempting to insert before a catchswitch, their basic block
4645 // cannot have other non-PHI instructions.
4646 if (isa<CatchSwitchInst>(Tentative))
4647 return IP;
4648
4649 for (Instruction *Inst : Inputs) {
4650 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
4651 AllDominate = false;
4652 break;
4653 }
4654 // Attempt to find an insert position in the middle of the block,
4655 // instead of at the end, so that it can be used for other expansions.
4656 if (Tentative->getParent() == Inst->getParent() &&
4657 (!BetterPos || !DT.dominates(Inst, BetterPos)))
4658 BetterPos = &*std::next(BasicBlock::iterator(Inst));
4659 }
4660 if (!AllDominate)
4661 break;
4662 if (BetterPos)
4663 IP = BetterPos->getIterator();
4664 else
4665 IP = Tentative->getIterator();
4666
4667 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4668 unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4669
4670 BasicBlock *IDom;
4671 for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
4672 if (!Rung) return IP;
4673 Rung = Rung->getIDom();
4674 if (!Rung) return IP;
4675 IDom = Rung->getBlock();
4676
4677 // Don't climb into a loop though.
4678 const Loop *IDomLoop = LI.getLoopFor(IDom);
4679 unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4680 if (IDomDepth <= IPLoopDepth &&
4681 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4682 break;
4683 }
4684
4685 Tentative = IDom->getTerminator();
4686 }
4687
4688 return IP;
4689}
4690
4691/// Determine an input position which will be dominated by the operands and
4692/// which will dominate the result.
4693BasicBlock::iterator
4694LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
4695 const LSRFixup &LF,
4696 const LSRUse &LU,
4697 SCEVExpander &Rewriter) const {
4698 // Collect some instructions which must be dominated by the
4699 // expanding replacement. These must be dominated by any operands that
4700 // will be required in the expansion.
4701 SmallVector<Instruction *, 4> Inputs;
4702 if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4703 Inputs.push_back(I);
4704 if (LU.Kind == LSRUse::ICmpZero)
4705 if (Instruction *I =
4706 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4707 Inputs.push_back(I);
4708 if (LF.PostIncLoops.count(L)) {
4709 if (LF.isUseFullyOutsideLoop(L))
4710 Inputs.push_back(L->getLoopLatch()->getTerminator());
4711 else
4712 Inputs.push_back(IVIncInsertPos);
4713 }
4714 // The expansion must also be dominated by the increment positions of any
4715 // loops it for which it is using post-inc mode.
4716 for (const Loop *PIL : LF.PostIncLoops) {
4717 if (PIL == L) continue;
4718
4719 // Be dominated by the loop exit.
4720 SmallVector<BasicBlock *, 4> ExitingBlocks;
4721 PIL->getExitingBlocks(ExitingBlocks);
4722 if (!ExitingBlocks.empty()) {
4723 BasicBlock *BB = ExitingBlocks[0];
4724 for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
4725 BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
4726 Inputs.push_back(BB->getTerminator());
4727 }
4728 }
4729
4730 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()((!isa<PHINode>(LowestIP) && !LowestIP->isEHPad
() && !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction") ? static_cast
<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4732, __PRETTY_FUNCTION__))
4731 && !isa<DbgInfoIntrinsic>(LowestIP) &&((!isa<PHINode>(LowestIP) && !LowestIP->isEHPad
() && !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction") ? static_cast
<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4732, __PRETTY_FUNCTION__))
4732 "Insertion point must be a normal instruction")((!isa<PHINode>(LowestIP) && !LowestIP->isEHPad
() && !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction") ? static_cast
<void> (0) : __assert_fail ("!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() && !isa<DbgInfoIntrinsic>(LowestIP) && \"Insertion point must be a normal instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4732, __PRETTY_FUNCTION__))
;
4733
4734 // Then, climb up the immediate dominator tree as far as we can go while
4735 // still being dominated by the input positions.
4736 BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
4737
4738 // Don't insert instructions before PHI nodes.
4739 while (isa<PHINode>(IP)) ++IP;
4740
4741 // Ignore landingpad instructions.
4742 while (IP->isEHPad()) ++IP;
4743
4744 // Ignore debug intrinsics.
4745 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
4746
4747 // Set IP below instructions recently inserted by SCEVExpander. This keeps the
4748 // IP consistent across expansions and allows the previously inserted
4749 // instructions to be reused by subsequent expansion.
4750 while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
4751 ++IP;
4752
4753 return IP;
4754}
4755
4756/// Emit instructions for the leading candidate expression for this LSRUse (this
4757/// is called "expanding").
4758Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
4759 const Formula &F, BasicBlock::iterator IP,
4760 SCEVExpander &Rewriter,
4761 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
4762 if (LU.RigidFormula)
4763 return LF.OperandValToReplace;
4764
4765 // Determine an input position which will be dominated by the operands and
4766 // which will dominate the result.
4767 IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
4768 Rewriter.setInsertPoint(&*IP);
4769
4770 // Inform the Rewriter if we have a post-increment use, so that it can
4771 // perform an advantageous expansion.
4772 Rewriter.setPostInc(LF.PostIncLoops);
4773
4774 // This is the type that the user actually needs.
4775 Type *OpTy = LF.OperandValToReplace->getType();
4776 // This will be the type that we'll initially expand to.
4777 Type *Ty = F.getType();
4778 if (!Ty)
4779 // No type known; just expand directly to the ultimate type.
4780 Ty = OpTy;
4781 else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
4782 // Expand directly to the ultimate type if it's the right size.
4783 Ty = OpTy;
4784 // This is the type to do integer arithmetic in.
4785 Type *IntTy = SE.getEffectiveSCEVType(Ty);
4786
4787 // Build up a list of operands to add together to form the full base.
4788 SmallVector<const SCEV *, 8> Ops;
4789
4790 // Expand the BaseRegs portion.
4791 for (const SCEV *Reg : F.BaseRegs) {
4792 assert(!Reg->isZero() && "Zero allocated in a base register!")((!Reg->isZero() && "Zero allocated in a base register!"
) ? static_cast<void> (0) : __assert_fail ("!Reg->isZero() && \"Zero allocated in a base register!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4792, __PRETTY_FUNCTION__))
;
4793
4794 // If we're expanding for a post-inc user, make the post-inc adjustment.
4795 Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
4796 Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
4797 }
4798
4799 // Expand the ScaledReg portion.
4800 Value *ICmpScaledV = nullptr;
4801 if (F.Scale != 0) {
4802 const SCEV *ScaledS = F.ScaledReg;
4803
4804 // If we're expanding for a post-inc user, make the post-inc adjustment.
4805 PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4806 ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
4807
4808 if (LU.Kind == LSRUse::ICmpZero) {
4809 // Expand ScaleReg as if it was part of the base regs.
4810 if (F.Scale == 1)
4811 Ops.push_back(
4812 SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
4813 else {
4814 // An interesting way of "folding" with an icmp is to use a negated
4815 // scale, which we'll implement by inserting it into the other operand
4816 // of the icmp.
4817 assert(F.Scale == -1 &&((F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"
) ? static_cast<void> (0) : __assert_fail ("F.Scale == -1 && \"The only scale supported by ICmpZero uses is -1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4818, __PRETTY_FUNCTION__))
4818 "The only scale supported by ICmpZero uses is -1!")((F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"
) ? static_cast<void> (0) : __assert_fail ("F.Scale == -1 && \"The only scale supported by ICmpZero uses is -1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4818, __PRETTY_FUNCTION__))
;
4819 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
4820 }
4821 } else {
4822 // Otherwise just expand the scaled register and an explicit scale,
4823 // which is expected to be matched as part of the address.
4824
4825 // Flush the operand list to suppress SCEVExpander hoisting address modes.
4826 // Unless the addressing mode will not be folded.
4827 if (!Ops.empty() && LU.Kind == LSRUse::Address &&
4828 isAMCompletelyFolded(TTI, LU, F)) {
4829 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
4830 Ops.clear();
4831 Ops.push_back(SE.getUnknown(FullV));
4832 }
4833 ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
4834 if (F.Scale != 1)
4835 ScaledS =
4836 SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
4837 Ops.push_back(ScaledS);
4838 }
4839 }
4840
4841 // Expand the GV portion.
4842 if (F.BaseGV) {
4843 // Flush the operand list to suppress SCEVExpander hoisting.
4844 if (!Ops.empty()) {
4845 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
4846 Ops.clear();
4847 Ops.push_back(SE.getUnknown(FullV));
4848 }
4849 Ops.push_back(SE.getUnknown(F.BaseGV));
4850 }
4851
4852 // Flush the operand list to suppress SCEVExpander hoisting of both folded and
4853 // unfolded offsets. LSR assumes they both live next to their uses.
4854 if (!Ops.empty()) {
4855 Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
4856 Ops.clear();
4857 Ops.push_back(SE.getUnknown(FullV));
4858 }
4859
4860 // Expand the immediate portion.
4861 int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
4862 if (Offset != 0) {
4863 if (LU.Kind == LSRUse::ICmpZero) {
4864 // The other interesting way of "folding" with an ICmpZero is to use a
4865 // negated immediate.
4866 if (!ICmpScaledV)
4867 ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
4868 else {
4869 Ops.push_back(SE.getUnknown(ICmpScaledV));
4870 ICmpScaledV = ConstantInt::get(IntTy, Offset);
4871 }
4872 } else {
4873 // Just add the immediate values. These again are expected to be matched
4874 // as part of the address.
4875 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
4876 }
4877 }
4878
4879 // Expand the unfolded offset portion.
4880 int64_t UnfoldedOffset = F.UnfoldedOffset;
4881 if (UnfoldedOffset != 0) {
4882 // Just add the immediate values.
4883 Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
4884 UnfoldedOffset)));
4885 }
4886
4887 // Emit instructions summing all the operands.
4888 const SCEV *FullS = Ops.empty() ?
4889 SE.getConstant(IntTy, 0) :
4890 SE.getAddExpr(Ops);
4891 Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
4892
4893 // We're done expanding now, so reset the rewriter.
4894 Rewriter.clearPostInc();
4895
4896 // An ICmpZero Formula represents an ICmp which we're handling as a
4897 // comparison against zero. Now that we've expanded an expression for that
4898 // form, update the ICmp's other operand.
4899 if (LU.Kind == LSRUse::ICmpZero) {
4900 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4901 DeadInsts.emplace_back(CI->getOperand(1));
4902 assert(!F.BaseGV && "ICmp does not support folding a global value and "((!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("!F.BaseGV && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4903, __PRETTY_FUNCTION__))
4903 "a scale at the same time!")((!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("!F.BaseGV && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4903, __PRETTY_FUNCTION__))
;
4904 if (F.Scale == -1) {
4905 if (ICmpScaledV->getType() != OpTy) {
4906 Instruction *Cast =
4907 CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
4908 OpTy, false),
4909 ICmpScaledV, OpTy, "tmp", CI);
4910 ICmpScaledV = Cast;
4911 }
4912 CI->setOperand(1, ICmpScaledV);
4913 } else {
4914 // A scale of 1 means that the scale has been expanded as part of the
4915 // base regs.
4916 assert((F.Scale == 0 || F.Scale == 1) &&(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4918, __PRETTY_FUNCTION__))
4917 "ICmp does not support folding a global value and "(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4918, __PRETTY_FUNCTION__))
4918 "a scale at the same time!")(((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and "
"a scale at the same time!") ? static_cast<void> (0) :
__assert_fail ("(F.Scale == 0 || F.Scale == 1) && \"ICmp does not support folding a global value and \" \"a scale at the same time!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 4918, __PRETTY_FUNCTION__))
;
4919 Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
4920 -(uint64_t)Offset);
4921 if (C->getType() != OpTy)
4922 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
4923 OpTy, false),
4924 C, OpTy);
4925
4926 CI->setOperand(1, C);
4927 }
4928 }
4929
4930 return FullV;
4931}
4932
4933/// Helper for Rewrite. PHI nodes are special because the use of their operands
4934/// effectively happens in their predecessor blocks, so the expression may need
4935/// to be expanded in multiple places.
4936void LSRInstance::RewriteForPHI(
4937 PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
4938 SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
4939 DenseMap<BasicBlock *, Value *> Inserted;
4940 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
4941 if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
4942 BasicBlock *BB = PN->getIncomingBlock(i);
4943
4944 // If this is a critical edge, split the edge so that we do not insert
4945 // the code on all predecessor/successor paths. We do this unless this
4946 // is the canonical backedge for this loop, which complicates post-inc
4947 // users.
4948 if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
4949 !isa<IndirectBrInst>(BB->getTerminator()) &&
4950 !isa<CatchSwitchInst>(BB->getTerminator())) {
4951 BasicBlock *Parent = PN->getParent();
4952 Loop *PNLoop = LI.getLoopFor(Parent);
4953 if (!PNLoop || Parent != PNLoop->getHeader()) {
4954 // Split the critical edge.
4955 BasicBlock *NewBB = nullptr;
4956 if (!Parent->isLandingPad()) {
4957 NewBB = SplitCriticalEdge(BB, Parent,
4958 CriticalEdgeSplittingOptions(&DT, &LI)
4959 .setMergeIdenticalEdges()
4960 .setDontDeleteUselessPHIs());
4961 } else {
4962 SmallVector<BasicBlock*, 2> NewBBs;
4963 SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
4964 NewBB = NewBBs[0];
4965 }
4966 // If NewBB==NULL, then SplitCriticalEdge refused to split because all
4967 // phi predecessors are identical. The simple thing to do is skip
4968 // splitting in this case rather than complicate the API.
4969 if (NewBB) {
4970 // If PN is outside of the loop and BB is in the loop, we want to
4971 // move the block to be immediately before the PHI block, not
4972 // immediately after BB.
4973 if (L->contains(BB) && !L->contains(PN))
4974 NewBB->moveBefore(PN->getParent());
4975
4976 // Splitting the edge can reduce the number of PHI entries we have.
4977 e = PN->getNumIncomingValues();
4978 BB = NewBB;
4979 i = PN->getBasicBlockIndex(BB);
4980 }
4981 }
4982 }
4983
4984 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
4985 Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
4986 if (!Pair.second)
4987 PN->setIncomingValue(i, Pair.first->second);
4988 else {
4989 Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(),
4990 Rewriter, DeadInsts);
4991
4992 // If this is reuse-by-noop-cast, insert the noop cast.
4993 Type *OpTy = LF.OperandValToReplace->getType();
4994 if (FullV->getType() != OpTy)
4995 FullV =
4996 CastInst::Create(CastInst::getCastOpcode(FullV, false,
4997 OpTy, false),
4998 FullV, LF.OperandValToReplace->getType(),
4999 "tmp", BB->getTerminator());
5000
5001 PN->setIncomingValue(i, FullV);
5002 Pair.first->second = FullV;
5003 }
5004 }
5005}
5006
5007/// Emit instructions for the leading candidate expression for this LSRUse (this
5008/// is called "expanding"), and update the UserInst to reference the newly
5009/// expanded value.
5010void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
5011 const Formula &F, SCEVExpander &Rewriter,
5012 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5013 // First, find an insertion point that dominates UserInst. For PHI nodes,
5014 // find the nearest block which dominates all the relevant uses.
5015 if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5016 RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
5017 } else {
5018 Value *FullV =
5019 Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
5020
5021 // If this is reuse-by-noop-cast, insert the noop cast.
5022 Type *OpTy = LF.OperandValToReplace->getType();
5023 if (FullV->getType() != OpTy) {
5024 Instruction *Cast =
5025 CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
5026 FullV, OpTy, "tmp", LF.UserInst);
5027 FullV = Cast;
5028 }
5029
5030 // Update the user. ICmpZero is handled specially here (for now) because
5031 // Expand may have updated one of the operands of the icmp already, and
5032 // its new value may happen to be equal to LF.OperandValToReplace, in
5033 // which case doing replaceUsesOfWith leads to replacing both operands
5034 // with the same value. TODO: Reorganize this.
5035 if (LU.Kind == LSRUse::ICmpZero)
5036 LF.UserInst->setOperand(0, FullV);
5037 else
5038 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5039 }
5040
5041 DeadInsts.emplace_back(LF.OperandValToReplace);
5042}
5043
5044/// Rewrite all the fixup locations with new values, following the chosen
5045/// solution.
5046void LSRInstance::ImplementSolution(
5047 const SmallVectorImpl<const Formula *> &Solution) {
5048 // Keep track of instructions we may have made dead, so that
5049 // we can remove them after we are done working.
5050 SmallVector<WeakTrackingVH, 16> DeadInsts;
5051
5052 SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
5053 "lsr");
5054#ifndef NDEBUG
5055 Rewriter.setDebugType(DEBUG_TYPE"loop-reduce");
5056#endif
5057 Rewriter.disableCanonicalMode();
5058 Rewriter.enableLSRMode();
5059 Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
5060
5061 // Mark phi nodes that terminate chains so the expander tries to reuse them.
5062 for (const IVChain &Chain : IVChainVec) {
5063 if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
5064 Rewriter.setChainedPhi(PN);
5065 }
5066
5067 // Expand the new value definitions and update the users.
5068 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
5069 for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
5070 Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
5071 Changed = true;
5072 }
5073
5074 for (const IVChain &Chain : IVChainVec) {
5075 GenerateIVChain(Chain, Rewriter, DeadInsts);
5076 Changed = true;
5077 }
5078 // Clean up after ourselves. This must be done before deleting any
5079 // instructions.
5080 Rewriter.clear();
5081
5082 Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
5083}
5084
5085LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
5086 DominatorTree &DT, LoopInfo &LI,
5087 const TargetTransformInfo &TTI)
5088 : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
5089 IVIncInsertPos(nullptr) {
5090 // If LoopSimplify form is not available, stay out of trouble.
5091 if (!L->isLoopSimplifyForm())
5092 return;
5093
5094 // If there's no interesting work to be done, bail early.
5095 if (IU.empty()) return;
5096
5097 // If there's too much analysis to be done, bail early. We won't be able to
5098 // model the problem anyway.
5099 unsigned NumUsers = 0;
5100 for (const IVStrideUse &U : IU) {
5101 if (++NumUsers > MaxIVUsers) {
5102 (void)U;
5103 DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR skipping loop, too many IV Users in "
<< U << "\n"; } } while (false)
;
5104 return;
5105 }
5106 // Bail out if we have a PHI on an EHPad that gets a value from a
5107 // CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
5108 // no good place to stick any instructions.
5109 if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
5110 auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
5111 if (isa<FuncletPadInst>(FirstNonPHI) ||
5112 isa<CatchSwitchInst>(FirstNonPHI))
5113 for (BasicBlock *PredBB : PN->blocks())
5114 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
5115 return;
5116 }
5117 }
5118
5119#ifndef NDEBUG
5120 // All dominating loops must have preheaders, or SCEVExpander may not be able
5121 // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
5122 //
5123 // IVUsers analysis should only create users that are dominated by simple loop
5124 // headers. Since this loop should dominate all of its users, its user list
5125 // should be empty if this loop itself is not within a simple loop nest.
5126 for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
5127 Rung; Rung = Rung->getIDom()) {
5128 BasicBlock *BB = Rung->getBlock();
5129 const Loop *DomLoop = LI.getLoopFor(BB);
5130 if (DomLoop && DomLoop->getHeader() == BB) {
5131 assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest")((DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest"
) ? static_cast<void> (0) : __assert_fail ("DomLoop->getLoopPreheader() && \"LSR needs a simplified loop nest\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5131, __PRETTY_FUNCTION__))
;
5132 }
5133 }
5134#endif // DEBUG
5135
5136 DEBUG(dbgs() << "\nLSR on loop ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (false)
5137 L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (false)
5138 dbgs() << ":\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "\nLSR on loop "; L->getHeader
()->printAsOperand(dbgs(), false); dbgs() << ":\n"; }
} while (false)
;
5139
5140 // First, perform some low-level loop optimizations.
5141 OptimizeShadowIV();
5142 OptimizeLoopTermCond();
5143
5144 // If loop preparation eliminates all interesting IV users, bail.
5145 if (IU.empty()) return;
5146
5147 // Skip nested loops until we can model them better with formulae.
5148 if (!L->empty()) {
5149 DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR skipping outer loop "
<< *L << "\n"; } } while (false)
;
5150 return;
5151 }
5152
5153 // Start collecting data and preparing for the solver.
5154 CollectChains();
5155 CollectInterestingTypesAndFactors();
5156 CollectFixupsAndInitialFormulae();
5157 CollectLoopInvariantFixupsAndFormulae();
5158
5159 assert(!Uses.empty() && "IVUsers reported at least one use")((!Uses.empty() && "IVUsers reported at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Uses.empty() && \"IVUsers reported at least one use\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5159, __PRETTY_FUNCTION__))
;
5160 DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR found " << Uses
.size() << " uses:\n"; print_uses(dbgs()); } } while (false
)
5161 print_uses(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-reduce")) { dbgs() << "LSR found " << Uses
.size() << " uses:\n"; print_uses(dbgs()); } } while (false
)
;
5162
5163 // Now use the reuse data to generate a bunch of interesting ways
5164 // to formulate the values needed for the uses.
5165 GenerateAllReuseFormulae();
5166
5167 FilterOutUndesirableDedicatedRegisters();
5168 NarrowSearchSpaceUsingHeuristics();
5169
5170 SmallVector<const Formula *, 8> Solution;
5171 Solve(Solution);
5172
5173 // Release memory that is no longer needed.
5174 Factors.clear();
5175 Types.clear();
5176 RegUses.clear();
5177
5178 if (Solution.empty())
5179 return;
5180
5181#ifndef NDEBUG
5182 // Formulae should be legal.
5183 for (const LSRUse &LU : Uses) {
5184 for (const Formula &F : LU.Formulae)
5185 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Illegal formula generated!") ? static_cast<
void> (0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Illegal formula generated!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5186, __PRETTY_FUNCTION__))
5186 F) && "Illegal formula generated!")((isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy
, F) && "Illegal formula generated!") ? static_cast<
void> (0) : __assert_fail ("isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && \"Illegal formula generated!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Transforms/Scalar/LoopStrengthReduce.cpp"
, 5186, __PRETTY_FUNCTION__))
;
5187 };
5188#endif
5189
5190 // Now that we've decided what we want, make it so.
5191 ImplementSolution(Solution);
5192}
5193
5194void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
5195 if (Factors.empty() && Types.empty()) return;
5196
5197 OS << "LSR has identified the following interesting factors and types: ";
5198 bool First = true;
5199
5200 for (int64_t Factor : Factors) {
5201 if (!First) OS << ", ";
5202 First = false;
5203 OS << '*' << Factor;
5204 }
5205
5206 for (Type *Ty : Types) {
5207 if (!First) OS << ", ";
5208 First = false;
5209 OS << '(' << *Ty << ')';
5210 }
5211 OS << '\n';
5212}
5213
5214void LSRInstance::print_fixups(raw_ostream &OS) const {
5215 OS << "LSR is examining the following fixup sites:\n";
5216 for (const LSRUse &LU : Uses)
5217 for (const LSRFixup &LF : LU.Fixups) {
5218 dbgs() << " ";
5219 LF.print(OS);
5220 OS << '\n';
5221 }
5222}
5223
5224void LSRInstance::print_uses(raw_ostream &OS) const {
5225 OS << "LSR is examining the following uses:\n";
5226 for (const LSRUse &LU : Uses) {
5227 dbgs() << " ";
5228 LU.print(OS);
5229 OS << '\n';
5230 for (const Formula &F : LU.Formulae) {
5231 OS << " ";
5232 F.print(OS);
5233 OS << '\n';
5234 }
5235 }
5236}
5237
5238void LSRInstance::print(raw_ostream &OS) const {
5239 print_factors_and_types(OS);
5240 print_fixups(OS);
5241 print_uses(OS);
5242}
5243
5244#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5245LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void LSRInstance::dump() const {
5246 print(errs()); errs() << '\n';
5247}
5248#endif
5249
5250namespace {
5251
5252class LoopStrengthReduce : public LoopPass {
5253public:
5254 static char ID; // Pass ID, replacement for typeid
5255
5256 LoopStrengthReduce();
5257
5258private:
5259 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
5260 void getAnalysisUsage(AnalysisUsage &AU) const override;
5261};
5262
5263} // end anonymous namespace
5264
5265LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
5266 initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
5267}
5268
5269void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
5270 // We split critical edges, so we change the CFG. However, we do update
5271 // many analyses if they are around.
5272 AU.addPreservedID(LoopSimplifyID);
5273
5274 AU.addRequired<LoopInfoWrapperPass>();
5275 AU.addPreserved<LoopInfoWrapperPass>();
5276 AU.addRequiredID(LoopSimplifyID);
5277 AU.addRequired<DominatorTreeWrapperPass>();
5278 AU.addPreserved<DominatorTreeWrapperPass>();
5279 AU.addRequired<ScalarEvolutionWrapperPass>();
5280 AU.addPreserved<ScalarEvolutionWrapperPass>();
5281 // Requiring LoopSimplify a second time here prevents IVUsers from running
5282 // twice, since LoopSimplify was invalidated by running ScalarEvolution.
5283 AU.addRequiredID(LoopSimplifyID);
5284 AU.addRequired<IVUsersWrapperPass>();
5285 AU.addPreserved<IVUsersWrapperPass>();
5286 AU.addRequired<TargetTransformInfoWrapperPass>();
5287}
5288
5289static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
5290 DominatorTree &DT, LoopInfo &LI,
5291 const TargetTransformInfo &TTI) {
5292 bool Changed = false;
5293
5294 // Run the main LSR transformation.
5295 Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
5296
5297 // Remove any extra phis created by processing inner loops.
5298 Changed |= DeleteDeadPHIs(L->getHeader());
5299 if (EnablePhiElim && L->isLoopSimplifyForm()) {
5300 SmallVector<WeakTrackingVH, 16> DeadInsts;
5301 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
5302 SCEVExpander Rewriter(SE, DL, "lsr");
5303#ifndef NDEBUG
5304 Rewriter.setDebugType(DEBUG_TYPE"loop-reduce");
5305#endif
5306 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
5307 if (numFolded) {
5308 Changed = true;
5309 DeleteTriviallyDeadInstructions(DeadInsts);
5310 DeleteDeadPHIs(L->getHeader());
5311 }
5312 }
5313 return Changed;
5314}
5315
5316bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
5317 if (skipLoop(L))
5318 return false;
5319
5320 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
5321 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5322 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5323 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5324 const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5325 *L->getHeader()->getParent());
5326 return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
5327}
5328
5329PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
5330 LoopStandardAnalysisResults &AR,
5331 LPMUpdater &) {
5332 if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
5333 AR.DT, AR.LI, AR.TTI))
5334 return PreservedAnalyses::all();
5335
5336 return getLoopPassPreservedAnalyses();
5337}
5338
5339char LoopStrengthReduce::ID = 0;
5340INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",static void *initializeLoopStrengthReducePassOnce(PassRegistry
&Registry) {
5341 "Loop Strength Reduction", false, false)static void *initializeLoopStrengthReducePassOnce(PassRegistry
&Registry) {
5342INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
5343INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
5344INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
5345INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)initializeIVUsersWrapperPassPass(Registry);
5346INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
5347INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
5348INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",PassInfo *PI = new PassInfo( "Loop Strength Reduction", "loop-reduce"
, &LoopStrengthReduce::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LoopStrengthReduce>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeLoopStrengthReducePassFlag
; void llvm::initializeLoopStrengthReducePass(PassRegistry &
Registry) { llvm::call_once(InitializeLoopStrengthReducePassFlag
, initializeLoopStrengthReducePassOnce, std::ref(Registry)); }
5349 "Loop Strength Reduction", false, false)PassInfo *PI = new PassInfo( "Loop Strength Reduction", "loop-reduce"
, &LoopStrengthReduce::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LoopStrengthReduce>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeLoopStrengthReducePassFlag
; void llvm::initializeLoopStrengthReducePass(PassRegistry &
Registry) { llvm::call_once(InitializeLoopStrengthReducePassFlag
, initializeLoopStrengthReducePassOnce, std::ref(Registry)); }
5350
5351Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }