File: | lib/Analysis/ScalarEvolution.cpp |
Location: | line 8800, column 15 |
Description: | Value stored to 'MaxBECount' during its initialization is never read |
1 | //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | // This file contains the implementation of the scalar evolution analysis |
11 | // engine, which is used primarily to analyze expressions involving induction |
12 | // variables in loops. |
13 | // |
14 | // There are several aspects to this library. First is the representation of |
15 | // scalar expressions, which are represented as subclasses of the SCEV class. |
16 | // These classes are used to represent certain types of subexpressions that we |
17 | // can handle. We only create one SCEV of a particular shape, so |
18 | // pointer-comparisons for equality are legal. |
19 | // |
20 | // One important aspect of the SCEV objects is that they are never cyclic, even |
21 | // if there is a cycle in the dataflow for an expression (ie, a PHI node). If |
22 | // the PHI node is one of the idioms that we can represent (e.g., a polynomial |
23 | // recurrence) then we represent it directly as a recurrence node, otherwise we |
24 | // represent it as a SCEVUnknown node. |
25 | // |
26 | // In addition to being able to represent expressions of various types, we also |
27 | // have folders that are used to build the *canonical* representation for a |
28 | // particular expression. These folders are capable of using a variety of |
29 | // rewrite rules to simplify the expressions. |
30 | // |
31 | // Once the folders are defined, we can implement the more interesting |
32 | // higher-level code, such as the code that recognizes PHI nodes of various |
33 | // types, computes the execution count of a loop, etc. |
34 | // |
35 | // TODO: We should use these routines and value representations to implement |
36 | // dependence analysis! |
37 | // |
38 | //===----------------------------------------------------------------------===// |
39 | // |
40 | // There are several good references for the techniques used in this analysis. |
41 | // |
42 | // Chains of recurrences -- a method to expedite the evaluation |
43 | // of closed-form functions |
44 | // Olaf Bachmann, Paul S. Wang, Eugene V. Zima |
45 | // |
46 | // On computational properties of chains of recurrences |
47 | // Eugene V. Zima |
48 | // |
49 | // Symbolic Evaluation of Chains of Recurrences for Loop Optimization |
50 | // Robert A. van Engelen |
51 | // |
52 | // Efficient Symbolic Analysis for Optimizing Compilers |
53 | // Robert A. van Engelen |
54 | // |
55 | // Using the chains of recurrences algebra for data dependence testing and |
56 | // induction variable substitution |
57 | // MS Thesis, Johnie Birch |
58 | // |
59 | //===----------------------------------------------------------------------===// |
60 | |
61 | #include "llvm/Analysis/ScalarEvolution.h" |
62 | #include "llvm/ADT/Optional.h" |
63 | #include "llvm/ADT/STLExtras.h" |
64 | #include "llvm/ADT/SmallPtrSet.h" |
65 | #include "llvm/ADT/Statistic.h" |
66 | #include "llvm/Analysis/AssumptionCache.h" |
67 | #include "llvm/Analysis/ConstantFolding.h" |
68 | #include "llvm/Analysis/InstructionSimplify.h" |
69 | #include "llvm/Analysis/LoopInfo.h" |
70 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
71 | #include "llvm/Analysis/TargetLibraryInfo.h" |
72 | #include "llvm/Analysis/ValueTracking.h" |
73 | #include "llvm/IR/ConstantRange.h" |
74 | #include "llvm/IR/Constants.h" |
75 | #include "llvm/IR/DataLayout.h" |
76 | #include "llvm/IR/DerivedTypes.h" |
77 | #include "llvm/IR/Dominators.h" |
78 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
79 | #include "llvm/IR/GlobalAlias.h" |
80 | #include "llvm/IR/GlobalVariable.h" |
81 | #include "llvm/IR/InstIterator.h" |
82 | #include "llvm/IR/Instructions.h" |
83 | #include "llvm/IR/LLVMContext.h" |
84 | #include "llvm/IR/Metadata.h" |
85 | #include "llvm/IR/Operator.h" |
86 | #include "llvm/IR/PatternMatch.h" |
87 | #include "llvm/Support/CommandLine.h" |
88 | #include "llvm/Support/Debug.h" |
89 | #include "llvm/Support/ErrorHandling.h" |
90 | #include "llvm/Support/MathExtras.h" |
91 | #include "llvm/Support/raw_ostream.h" |
92 | #include "llvm/Support/SaveAndRestore.h" |
93 | #include <algorithm> |
94 | using namespace llvm; |
95 | |
96 | #define DEBUG_TYPE"scalar-evolution" "scalar-evolution" |
97 | |
98 | STATISTIC(NumArrayLenItCounts,static llvm::Statistic NumArrayLenItCounts = { "scalar-evolution" , "Number of trip counts computed with array length", 0, 0 } |
99 | "Number of trip counts computed with array length")static llvm::Statistic NumArrayLenItCounts = { "scalar-evolution" , "Number of trip counts computed with array length", 0, 0 }; |
100 | STATISTIC(NumTripCountsComputed,static llvm::Statistic NumTripCountsComputed = { "scalar-evolution" , "Number of loops with predictable loop counts", 0, 0 } |
101 | "Number of loops with predictable loop counts")static llvm::Statistic NumTripCountsComputed = { "scalar-evolution" , "Number of loops with predictable loop counts", 0, 0 }; |
102 | STATISTIC(NumTripCountsNotComputed,static llvm::Statistic NumTripCountsNotComputed = { "scalar-evolution" , "Number of loops without predictable loop counts", 0, 0 } |
103 | "Number of loops without predictable loop counts")static llvm::Statistic NumTripCountsNotComputed = { "scalar-evolution" , "Number of loops without predictable loop counts", 0, 0 }; |
104 | STATISTIC(NumBruteForceTripCountsComputed,static llvm::Statistic NumBruteForceTripCountsComputed = { "scalar-evolution" , "Number of loops with trip counts computed by force", 0, 0 } |
105 | "Number of loops with trip counts computed by force")static llvm::Statistic NumBruteForceTripCountsComputed = { "scalar-evolution" , "Number of loops with trip counts computed by force", 0, 0 }; |
106 | |
107 | static cl::opt<unsigned> |
108 | MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, |
109 | cl::desc("Maximum number of iterations SCEV will " |
110 | "symbolically execute a constant " |
111 | "derived loop"), |
112 | cl::init(100)); |
113 | |
114 | // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. |
115 | static cl::opt<bool> |
116 | VerifySCEV("verify-scev", |
117 | cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); |
118 | static cl::opt<bool> |
119 | VerifySCEVMap("verify-scev-maps", |
120 | cl::desc("Verify no dangling value in ScalarEvolution's " |
121 | "ExprValueMap (slow)")); |
122 | |
123 | //===----------------------------------------------------------------------===// |
124 | // SCEV class definitions |
125 | //===----------------------------------------------------------------------===// |
126 | |
127 | //===----------------------------------------------------------------------===// |
128 | // Implementation of the SCEV class. |
129 | // |
130 | |
131 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) |
132 | void SCEV::dump() const { |
133 | print(dbgs()); |
134 | dbgs() << '\n'; |
135 | } |
136 | |
137 | void SCEV::print(raw_ostream &OS) const { |
138 | switch (static_cast<SCEVTypes>(getSCEVType())) { |
139 | case scConstant: |
140 | cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false); |
141 | return; |
142 | case scTruncate: { |
143 | const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); |
144 | const SCEV *Op = Trunc->getOperand(); |
145 | OS << "(trunc " << *Op->getType() << " " << *Op << " to " |
146 | << *Trunc->getType() << ")"; |
147 | return; |
148 | } |
149 | case scZeroExtend: { |
150 | const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this); |
151 | const SCEV *Op = ZExt->getOperand(); |
152 | OS << "(zext " << *Op->getType() << " " << *Op << " to " |
153 | << *ZExt->getType() << ")"; |
154 | return; |
155 | } |
156 | case scSignExtend: { |
157 | const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this); |
158 | const SCEV *Op = SExt->getOperand(); |
159 | OS << "(sext " << *Op->getType() << " " << *Op << " to " |
160 | << *SExt->getType() << ")"; |
161 | return; |
162 | } |
163 | case scAddRecExpr: { |
164 | const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this); |
165 | OS << "{" << *AR->getOperand(0); |
166 | for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) |
167 | OS << ",+," << *AR->getOperand(i); |
168 | OS << "}<"; |
169 | if (AR->hasNoUnsignedWrap()) |
170 | OS << "nuw><"; |
171 | if (AR->hasNoSignedWrap()) |
172 | OS << "nsw><"; |
173 | if (AR->hasNoSelfWrap() && |
174 | !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) |
175 | OS << "nw><"; |
176 | AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
177 | OS << ">"; |
178 | return; |
179 | } |
180 | case scAddExpr: |
181 | case scMulExpr: |
182 | case scUMaxExpr: |
183 | case scSMaxExpr: { |
184 | const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); |
185 | const char *OpStr = nullptr; |
186 | switch (NAry->getSCEVType()) { |
187 | case scAddExpr: OpStr = " + "; break; |
188 | case scMulExpr: OpStr = " * "; break; |
189 | case scUMaxExpr: OpStr = " umax "; break; |
190 | case scSMaxExpr: OpStr = " smax "; break; |
191 | } |
192 | OS << "("; |
193 | for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); |
194 | I != E; ++I) { |
195 | OS << **I; |
196 | if (std::next(I) != E) |
197 | OS << OpStr; |
198 | } |
199 | OS << ")"; |
200 | switch (NAry->getSCEVType()) { |
201 | case scAddExpr: |
202 | case scMulExpr: |
203 | if (NAry->hasNoUnsignedWrap()) |
204 | OS << "<nuw>"; |
205 | if (NAry->hasNoSignedWrap()) |
206 | OS << "<nsw>"; |
207 | } |
208 | return; |
209 | } |
210 | case scUDivExpr: { |
211 | const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this); |
212 | OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; |
213 | return; |
214 | } |
215 | case scUnknown: { |
216 | const SCEVUnknown *U = cast<SCEVUnknown>(this); |
217 | Type *AllocTy; |
218 | if (U->isSizeOf(AllocTy)) { |
219 | OS << "sizeof(" << *AllocTy << ")"; |
220 | return; |
221 | } |
222 | if (U->isAlignOf(AllocTy)) { |
223 | OS << "alignof(" << *AllocTy << ")"; |
224 | return; |
225 | } |
226 | |
227 | Type *CTy; |
228 | Constant *FieldNo; |
229 | if (U->isOffsetOf(CTy, FieldNo)) { |
230 | OS << "offsetof(" << *CTy << ", "; |
231 | FieldNo->printAsOperand(OS, false); |
232 | OS << ")"; |
233 | return; |
234 | } |
235 | |
236 | // Otherwise just print it normally. |
237 | U->getValue()->printAsOperand(OS, false); |
238 | return; |
239 | } |
240 | case scCouldNotCompute: |
241 | OS << "***COULDNOTCOMPUTE***"; |
242 | return; |
243 | } |
244 | llvm_unreachable("Unknown SCEV kind!")::llvm::llvm_unreachable_internal("Unknown SCEV kind!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 244); |
245 | } |
246 | |
247 | Type *SCEV::getType() const { |
248 | switch (static_cast<SCEVTypes>(getSCEVType())) { |
249 | case scConstant: |
250 | return cast<SCEVConstant>(this)->getType(); |
251 | case scTruncate: |
252 | case scZeroExtend: |
253 | case scSignExtend: |
254 | return cast<SCEVCastExpr>(this)->getType(); |
255 | case scAddRecExpr: |
256 | case scMulExpr: |
257 | case scUMaxExpr: |
258 | case scSMaxExpr: |
259 | return cast<SCEVNAryExpr>(this)->getType(); |
260 | case scAddExpr: |
261 | return cast<SCEVAddExpr>(this)->getType(); |
262 | case scUDivExpr: |
263 | return cast<SCEVUDivExpr>(this)->getType(); |
264 | case scUnknown: |
265 | return cast<SCEVUnknown>(this)->getType(); |
266 | case scCouldNotCompute: |
267 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!")::llvm::llvm_unreachable_internal("Attempt to use a SCEVCouldNotCompute object!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 267); |
268 | } |
269 | llvm_unreachable("Unknown SCEV kind!")::llvm::llvm_unreachable_internal("Unknown SCEV kind!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 269); |
270 | } |
271 | |
272 | bool SCEV::isZero() const { |
273 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) |
274 | return SC->getValue()->isZero(); |
275 | return false; |
276 | } |
277 | |
278 | bool SCEV::isOne() const { |
279 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) |
280 | return SC->getValue()->isOne(); |
281 | return false; |
282 | } |
283 | |
284 | bool SCEV::isAllOnesValue() const { |
285 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) |
286 | return SC->getValue()->isAllOnesValue(); |
287 | return false; |
288 | } |
289 | |
290 | bool SCEV::isNonConstantNegative() const { |
291 | const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this); |
292 | if (!Mul) return false; |
293 | |
294 | // If there is a constant factor, it will be first. |
295 | const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); |
296 | if (!SC) return false; |
297 | |
298 | // Return true if the value is negative, this matches things like (-42 * V). |
299 | return SC->getAPInt().isNegative(); |
300 | } |
301 | |
302 | SCEVCouldNotCompute::SCEVCouldNotCompute() : |
303 | SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} |
304 | |
305 | bool SCEVCouldNotCompute::classof(const SCEV *S) { |
306 | return S->getSCEVType() == scCouldNotCompute; |
307 | } |
308 | |
309 | const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { |
310 | FoldingSetNodeID ID; |
311 | ID.AddInteger(scConstant); |
312 | ID.AddPointer(V); |
313 | void *IP = nullptr; |
314 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
315 | SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); |
316 | UniqueSCEVs.InsertNode(S, IP); |
317 | return S; |
318 | } |
319 | |
320 | const SCEV *ScalarEvolution::getConstant(const APInt &Val) { |
321 | return getConstant(ConstantInt::get(getContext(), Val)); |
322 | } |
323 | |
324 | const SCEV * |
325 | ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { |
326 | IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); |
327 | return getConstant(ConstantInt::get(ITy, V, isSigned)); |
328 | } |
329 | |
330 | SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, |
331 | unsigned SCEVTy, const SCEV *op, Type *ty) |
332 | : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} |
333 | |
334 | SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, |
335 | const SCEV *op, Type *ty) |
336 | : SCEVCastExpr(ID, scTruncate, op, ty) { |
337 | assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot truncate non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 339, __PRETTY_FUNCTION__)) |
338 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot truncate non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 339, __PRETTY_FUNCTION__)) |
339 | "Cannot truncate non-integer value!")(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot truncate non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 339, __PRETTY_FUNCTION__)); |
340 | } |
341 | |
342 | SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, |
343 | const SCEV *op, Type *ty) |
344 | : SCEVCastExpr(ID, scZeroExtend, op, ty) { |
345 | assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot zero extend non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot zero extend non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 347, __PRETTY_FUNCTION__)) |
346 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot zero extend non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot zero extend non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 347, __PRETTY_FUNCTION__)) |
347 | "Cannot zero extend non-integer value!")(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot zero extend non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot zero extend non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 347, __PRETTY_FUNCTION__)); |
348 | } |
349 | |
350 | SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, |
351 | const SCEV *op, Type *ty) |
352 | : SCEVCastExpr(ID, scSignExtend, op, ty) { |
353 | assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot sign extend non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot sign extend non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 355, __PRETTY_FUNCTION__)) |
354 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot sign extend non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot sign extend non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 355, __PRETTY_FUNCTION__)) |
355 | "Cannot sign extend non-integer value!")(((Op->getType()->isIntegerTy() || Op->getType()-> isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy ()) && "Cannot sign extend non-integer value!") ? static_cast <void> (0) : __assert_fail ("(Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot sign extend non-integer value!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 355, __PRETTY_FUNCTION__)); |
356 | } |
357 | |
358 | void SCEVUnknown::deleted() { |
359 | // Clear this SCEVUnknown from various maps. |
360 | SE->forgetMemoizedResults(this); |
361 | |
362 | // Remove this SCEVUnknown from the uniquing map. |
363 | SE->UniqueSCEVs.RemoveNode(this); |
364 | |
365 | // Release the value. |
366 | setValPtr(nullptr); |
367 | } |
368 | |
369 | void SCEVUnknown::allUsesReplacedWith(Value *New) { |
370 | // Clear this SCEVUnknown from various maps. |
371 | SE->forgetMemoizedResults(this); |
372 | |
373 | // Remove this SCEVUnknown from the uniquing map. |
374 | SE->UniqueSCEVs.RemoveNode(this); |
375 | |
376 | // Update this SCEVUnknown to point to the new value. This is needed |
377 | // because there may still be outstanding SCEVs which still point to |
378 | // this SCEVUnknown. |
379 | setValPtr(New); |
380 | } |
381 | |
382 | bool SCEVUnknown::isSizeOf(Type *&AllocTy) const { |
383 | if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) |
384 | if (VCE->getOpcode() == Instruction::PtrToInt) |
385 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) |
386 | if (CE->getOpcode() == Instruction::GetElementPtr && |
387 | CE->getOperand(0)->isNullValue() && |
388 | CE->getNumOperands() == 2) |
389 | if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) |
390 | if (CI->isOne()) { |
391 | AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) |
392 | ->getElementType(); |
393 | return true; |
394 | } |
395 | |
396 | return false; |
397 | } |
398 | |
399 | bool SCEVUnknown::isAlignOf(Type *&AllocTy) const { |
400 | if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) |
401 | if (VCE->getOpcode() == Instruction::PtrToInt) |
402 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) |
403 | if (CE->getOpcode() == Instruction::GetElementPtr && |
404 | CE->getOperand(0)->isNullValue()) { |
405 | Type *Ty = |
406 | cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); |
407 | if (StructType *STy = dyn_cast<StructType>(Ty)) |
408 | if (!STy->isPacked() && |
409 | CE->getNumOperands() == 3 && |
410 | CE->getOperand(1)->isNullValue()) { |
411 | if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) |
412 | if (CI->isOne() && |
413 | STy->getNumElements() == 2 && |
414 | STy->getElementType(0)->isIntegerTy(1)) { |
415 | AllocTy = STy->getElementType(1); |
416 | return true; |
417 | } |
418 | } |
419 | } |
420 | |
421 | return false; |
422 | } |
423 | |
424 | bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { |
425 | if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) |
426 | if (VCE->getOpcode() == Instruction::PtrToInt) |
427 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) |
428 | if (CE->getOpcode() == Instruction::GetElementPtr && |
429 | CE->getNumOperands() == 3 && |
430 | CE->getOperand(0)->isNullValue() && |
431 | CE->getOperand(1)->isNullValue()) { |
432 | Type *Ty = |
433 | cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); |
434 | // Ignore vector types here so that ScalarEvolutionExpander doesn't |
435 | // emit getelementptrs that index into vectors. |
436 | if (Ty->isStructTy() || Ty->isArrayTy()) { |
437 | CTy = Ty; |
438 | FieldNo = CE->getOperand(2); |
439 | return true; |
440 | } |
441 | } |
442 | |
443 | return false; |
444 | } |
445 | |
446 | //===----------------------------------------------------------------------===// |
447 | // SCEV Utilities |
448 | //===----------------------------------------------------------------------===// |
449 | |
450 | namespace { |
451 | /// SCEVComplexityCompare - Return true if the complexity of the LHS is less |
452 | /// than the complexity of the RHS. This comparator is used to canonicalize |
453 | /// expressions. |
454 | class SCEVComplexityCompare { |
455 | const LoopInfo *const LI; |
456 | public: |
457 | explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} |
458 | |
459 | // Return true or false if LHS is less than, or at least RHS, respectively. |
460 | bool operator()(const SCEV *LHS, const SCEV *RHS) const { |
461 | return compare(LHS, RHS) < 0; |
462 | } |
463 | |
464 | // Return negative, zero, or positive, if LHS is less than, equal to, or |
465 | // greater than RHS, respectively. A three-way result allows recursive |
466 | // comparisons to be more efficient. |
467 | int compare(const SCEV *LHS, const SCEV *RHS) const { |
468 | // Fast-path: SCEVs are uniqued so we can do a quick equality check. |
469 | if (LHS == RHS) |
470 | return 0; |
471 | |
472 | // Primarily, sort the SCEVs by their getSCEVType(). |
473 | unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); |
474 | if (LType != RType) |
475 | return (int)LType - (int)RType; |
476 | |
477 | // Aside from the getSCEVType() ordering, the particular ordering |
478 | // isn't very important except that it's beneficial to be consistent, |
479 | // so that (a + b) and (b + a) don't end up as different expressions. |
480 | switch (static_cast<SCEVTypes>(LType)) { |
481 | case scUnknown: { |
482 | const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); |
483 | const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); |
484 | |
485 | // Sort SCEVUnknown values with some loose heuristics. TODO: This is |
486 | // not as complete as it could be. |
487 | const Value *LV = LU->getValue(), *RV = RU->getValue(); |
488 | |
489 | // Order pointer values after integer values. This helps SCEVExpander |
490 | // form GEPs. |
491 | bool LIsPointer = LV->getType()->isPointerTy(), |
492 | RIsPointer = RV->getType()->isPointerTy(); |
493 | if (LIsPointer != RIsPointer) |
494 | return (int)LIsPointer - (int)RIsPointer; |
495 | |
496 | // Compare getValueID values. |
497 | unsigned LID = LV->getValueID(), |
498 | RID = RV->getValueID(); |
499 | if (LID != RID) |
500 | return (int)LID - (int)RID; |
501 | |
502 | // Sort arguments by their position. |
503 | if (const Argument *LA = dyn_cast<Argument>(LV)) { |
504 | const Argument *RA = cast<Argument>(RV); |
505 | unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); |
506 | return (int)LArgNo - (int)RArgNo; |
507 | } |
508 | |
509 | // For instructions, compare their loop depth, and their operand |
510 | // count. This is pretty loose. |
511 | if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { |
512 | const Instruction *RInst = cast<Instruction>(RV); |
513 | |
514 | // Compare loop depths. |
515 | const BasicBlock *LParent = LInst->getParent(), |
516 | *RParent = RInst->getParent(); |
517 | if (LParent != RParent) { |
518 | unsigned LDepth = LI->getLoopDepth(LParent), |
519 | RDepth = LI->getLoopDepth(RParent); |
520 | if (LDepth != RDepth) |
521 | return (int)LDepth - (int)RDepth; |
522 | } |
523 | |
524 | // Compare the number of operands. |
525 | unsigned LNumOps = LInst->getNumOperands(), |
526 | RNumOps = RInst->getNumOperands(); |
527 | return (int)LNumOps - (int)RNumOps; |
528 | } |
529 | |
530 | return 0; |
531 | } |
532 | |
533 | case scConstant: { |
534 | const SCEVConstant *LC = cast<SCEVConstant>(LHS); |
535 | const SCEVConstant *RC = cast<SCEVConstant>(RHS); |
536 | |
537 | // Compare constant values. |
538 | const APInt &LA = LC->getAPInt(); |
539 | const APInt &RA = RC->getAPInt(); |
540 | unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); |
541 | if (LBitWidth != RBitWidth) |
542 | return (int)LBitWidth - (int)RBitWidth; |
543 | return LA.ult(RA) ? -1 : 1; |
544 | } |
545 | |
546 | case scAddRecExpr: { |
547 | const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); |
548 | const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); |
549 | |
550 | // Compare addrec loop depths. |
551 | const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); |
552 | if (LLoop != RLoop) { |
553 | unsigned LDepth = LLoop->getLoopDepth(), |
554 | RDepth = RLoop->getLoopDepth(); |
555 | if (LDepth != RDepth) |
556 | return (int)LDepth - (int)RDepth; |
557 | } |
558 | |
559 | // Addrec complexity grows with operand count. |
560 | unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); |
561 | if (LNumOps != RNumOps) |
562 | return (int)LNumOps - (int)RNumOps; |
563 | |
564 | // Lexicographically compare. |
565 | for (unsigned i = 0; i != LNumOps; ++i) { |
566 | long X = compare(LA->getOperand(i), RA->getOperand(i)); |
567 | if (X != 0) |
568 | return X; |
569 | } |
570 | |
571 | return 0; |
572 | } |
573 | |
574 | case scAddExpr: |
575 | case scMulExpr: |
576 | case scSMaxExpr: |
577 | case scUMaxExpr: { |
578 | const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); |
579 | const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); |
580 | |
581 | // Lexicographically compare n-ary expressions. |
582 | unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); |
583 | if (LNumOps != RNumOps) |
584 | return (int)LNumOps - (int)RNumOps; |
585 | |
586 | for (unsigned i = 0; i != LNumOps; ++i) { |
587 | if (i >= RNumOps) |
588 | return 1; |
589 | long X = compare(LC->getOperand(i), RC->getOperand(i)); |
590 | if (X != 0) |
591 | return X; |
592 | } |
593 | return (int)LNumOps - (int)RNumOps; |
594 | } |
595 | |
596 | case scUDivExpr: { |
597 | const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); |
598 | const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); |
599 | |
600 | // Lexicographically compare udiv expressions. |
601 | long X = compare(LC->getLHS(), RC->getLHS()); |
602 | if (X != 0) |
603 | return X; |
604 | return compare(LC->getRHS(), RC->getRHS()); |
605 | } |
606 | |
607 | case scTruncate: |
608 | case scZeroExtend: |
609 | case scSignExtend: { |
610 | const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); |
611 | const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); |
612 | |
613 | // Compare cast expressions by operand. |
614 | return compare(LC->getOperand(), RC->getOperand()); |
615 | } |
616 | |
617 | case scCouldNotCompute: |
618 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!")::llvm::llvm_unreachable_internal("Attempt to use a SCEVCouldNotCompute object!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 618); |
619 | } |
620 | llvm_unreachable("Unknown SCEV kind!")::llvm::llvm_unreachable_internal("Unknown SCEV kind!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 620); |
621 | } |
622 | }; |
623 | } // end anonymous namespace |
624 | |
625 | /// Given a list of SCEV objects, order them by their complexity, and group |
626 | /// objects of the same complexity together by value. When this routine is |
627 | /// finished, we know that any duplicates in the vector are consecutive and that |
628 | /// complexity is monotonically increasing. |
629 | /// |
630 | /// Note that we go take special precautions to ensure that we get deterministic |
631 | /// results from this routine. In other words, we don't want the results of |
632 | /// this to depend on where the addresses of various SCEV objects happened to |
633 | /// land in memory. |
634 | /// |
635 | static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, |
636 | LoopInfo *LI) { |
637 | if (Ops.size() < 2) return; // Noop |
638 | if (Ops.size() == 2) { |
639 | // This is the common case, which also happens to be trivially simple. |
640 | // Special case it. |
641 | const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; |
642 | if (SCEVComplexityCompare(LI)(RHS, LHS)) |
643 | std::swap(LHS, RHS); |
644 | return; |
645 | } |
646 | |
647 | // Do the rough sort by complexity. |
648 | std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI)); |
649 | |
650 | // Now that we are sorted by complexity, group elements of the same |
651 | // complexity. Note that this is, at worst, N^2, but the vector is likely to |
652 | // be extremely short in practice. Note that we take this approach because we |
653 | // do not want to depend on the addresses of the objects we are grouping. |
654 | for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { |
655 | const SCEV *S = Ops[i]; |
656 | unsigned Complexity = S->getSCEVType(); |
657 | |
658 | // If there are any objects of the same complexity and same value as this |
659 | // one, group them. |
660 | for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { |
661 | if (Ops[j] == S) { // Found a duplicate. |
662 | // Move it to immediately after i'th element. |
663 | std::swap(Ops[i+1], Ops[j]); |
664 | ++i; // no need to rescan it. |
665 | if (i == e-2) return; // Done! |
666 | } |
667 | } |
668 | } |
669 | } |
670 | |
671 | // Returns the size of the SCEV S. |
672 | static inline int sizeOfSCEV(const SCEV *S) { |
673 | struct FindSCEVSize { |
674 | int Size; |
675 | FindSCEVSize() : Size(0) {} |
676 | |
677 | bool follow(const SCEV *S) { |
678 | ++Size; |
679 | // Keep looking at all operands of S. |
680 | return true; |
681 | } |
682 | bool isDone() const { |
683 | return false; |
684 | } |
685 | }; |
686 | |
687 | FindSCEVSize F; |
688 | SCEVTraversal<FindSCEVSize> ST(F); |
689 | ST.visitAll(S); |
690 | return F.Size; |
691 | } |
692 | |
693 | namespace { |
694 | |
695 | struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { |
696 | public: |
697 | // Computes the Quotient and Remainder of the division of Numerator by |
698 | // Denominator. |
699 | static void divide(ScalarEvolution &SE, const SCEV *Numerator, |
700 | const SCEV *Denominator, const SCEV **Quotient, |
701 | const SCEV **Remainder) { |
702 | assert(Numerator && Denominator && "Uninitialized SCEV")((Numerator && Denominator && "Uninitialized SCEV" ) ? static_cast<void> (0) : __assert_fail ("Numerator && Denominator && \"Uninitialized SCEV\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 702, __PRETTY_FUNCTION__)); |
703 | |
704 | SCEVDivision D(SE, Numerator, Denominator); |
705 | |
706 | // Check for the trivial case here to avoid having to check for it in the |
707 | // rest of the code. |
708 | if (Numerator == Denominator) { |
709 | *Quotient = D.One; |
710 | *Remainder = D.Zero; |
711 | return; |
712 | } |
713 | |
714 | if (Numerator->isZero()) { |
715 | *Quotient = D.Zero; |
716 | *Remainder = D.Zero; |
717 | return; |
718 | } |
719 | |
720 | // A simple case when N/1. The quotient is N. |
721 | if (Denominator->isOne()) { |
722 | *Quotient = Numerator; |
723 | *Remainder = D.Zero; |
724 | return; |
725 | } |
726 | |
727 | // Split the Denominator when it is a product. |
728 | if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) { |
729 | const SCEV *Q, *R; |
730 | *Quotient = Numerator; |
731 | for (const SCEV *Op : T->operands()) { |
732 | divide(SE, *Quotient, Op, &Q, &R); |
733 | *Quotient = Q; |
734 | |
735 | // Bail out when the Numerator is not divisible by one of the terms of |
736 | // the Denominator. |
737 | if (!R->isZero()) { |
738 | *Quotient = D.Zero; |
739 | *Remainder = Numerator; |
740 | return; |
741 | } |
742 | } |
743 | *Remainder = D.Zero; |
744 | return; |
745 | } |
746 | |
747 | D.visit(Numerator); |
748 | *Quotient = D.Quotient; |
749 | *Remainder = D.Remainder; |
750 | } |
751 | |
752 | // Except in the trivial case described above, we do not know how to divide |
753 | // Expr by Denominator for the following functions with empty implementation. |
754 | void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} |
755 | void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} |
756 | void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} |
757 | void visitUDivExpr(const SCEVUDivExpr *Numerator) {} |
758 | void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} |
759 | void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} |
760 | void visitUnknown(const SCEVUnknown *Numerator) {} |
761 | void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} |
762 | |
763 | void visitConstant(const SCEVConstant *Numerator) { |
764 | if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { |
765 | APInt NumeratorVal = Numerator->getAPInt(); |
766 | APInt DenominatorVal = D->getAPInt(); |
767 | uint32_t NumeratorBW = NumeratorVal.getBitWidth(); |
768 | uint32_t DenominatorBW = DenominatorVal.getBitWidth(); |
769 | |
770 | if (NumeratorBW > DenominatorBW) |
771 | DenominatorVal = DenominatorVal.sext(NumeratorBW); |
772 | else if (NumeratorBW < DenominatorBW) |
773 | NumeratorVal = NumeratorVal.sext(DenominatorBW); |
774 | |
775 | APInt QuotientVal(NumeratorVal.getBitWidth(), 0); |
776 | APInt RemainderVal(NumeratorVal.getBitWidth(), 0); |
777 | APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal); |
778 | Quotient = SE.getConstant(QuotientVal); |
779 | Remainder = SE.getConstant(RemainderVal); |
780 | return; |
781 | } |
782 | } |
783 | |
784 | void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { |
785 | const SCEV *StartQ, *StartR, *StepQ, *StepR; |
786 | if (!Numerator->isAffine()) |
787 | return cannotDivide(Numerator); |
788 | divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); |
789 | divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); |
790 | // Bail out if the types do not match. |
791 | Type *Ty = Denominator->getType(); |
792 | if (Ty != StartQ->getType() || Ty != StartR->getType() || |
793 | Ty != StepQ->getType() || Ty != StepR->getType()) |
794 | return cannotDivide(Numerator); |
795 | Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), |
796 | Numerator->getNoWrapFlags()); |
797 | Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), |
798 | Numerator->getNoWrapFlags()); |
799 | } |
800 | |
801 | void visitAddExpr(const SCEVAddExpr *Numerator) { |
802 | SmallVector<const SCEV *, 2> Qs, Rs; |
803 | Type *Ty = Denominator->getType(); |
804 | |
805 | for (const SCEV *Op : Numerator->operands()) { |
806 | const SCEV *Q, *R; |
807 | divide(SE, Op, Denominator, &Q, &R); |
808 | |
809 | // Bail out if types do not match. |
810 | if (Ty != Q->getType() || Ty != R->getType()) |
811 | return cannotDivide(Numerator); |
812 | |
813 | Qs.push_back(Q); |
814 | Rs.push_back(R); |
815 | } |
816 | |
817 | if (Qs.size() == 1) { |
818 | Quotient = Qs[0]; |
819 | Remainder = Rs[0]; |
820 | return; |
821 | } |
822 | |
823 | Quotient = SE.getAddExpr(Qs); |
824 | Remainder = SE.getAddExpr(Rs); |
825 | } |
826 | |
827 | void visitMulExpr(const SCEVMulExpr *Numerator) { |
828 | SmallVector<const SCEV *, 2> Qs; |
829 | Type *Ty = Denominator->getType(); |
830 | |
831 | bool FoundDenominatorTerm = false; |
832 | for (const SCEV *Op : Numerator->operands()) { |
833 | // Bail out if types do not match. |
834 | if (Ty != Op->getType()) |
835 | return cannotDivide(Numerator); |
836 | |
837 | if (FoundDenominatorTerm) { |
838 | Qs.push_back(Op); |
839 | continue; |
840 | } |
841 | |
842 | // Check whether Denominator divides one of the product operands. |
843 | const SCEV *Q, *R; |
844 | divide(SE, Op, Denominator, &Q, &R); |
845 | if (!R->isZero()) { |
846 | Qs.push_back(Op); |
847 | continue; |
848 | } |
849 | |
850 | // Bail out if types do not match. |
851 | if (Ty != Q->getType()) |
852 | return cannotDivide(Numerator); |
853 | |
854 | FoundDenominatorTerm = true; |
855 | Qs.push_back(Q); |
856 | } |
857 | |
858 | if (FoundDenominatorTerm) { |
859 | Remainder = Zero; |
860 | if (Qs.size() == 1) |
861 | Quotient = Qs[0]; |
862 | else |
863 | Quotient = SE.getMulExpr(Qs); |
864 | return; |
865 | } |
866 | |
867 | if (!isa<SCEVUnknown>(Denominator)) |
868 | return cannotDivide(Numerator); |
869 | |
870 | // The Remainder is obtained by replacing Denominator by 0 in Numerator. |
871 | ValueToValueMap RewriteMap; |
872 | RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = |
873 | cast<SCEVConstant>(Zero)->getValue(); |
874 | Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); |
875 | |
876 | if (Remainder->isZero()) { |
877 | // The Quotient is obtained by replacing Denominator by 1 in Numerator. |
878 | RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = |
879 | cast<SCEVConstant>(One)->getValue(); |
880 | Quotient = |
881 | SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); |
882 | return; |
883 | } |
884 | |
885 | // Quotient is (Numerator - Remainder) divided by Denominator. |
886 | const SCEV *Q, *R; |
887 | const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); |
888 | // This SCEV does not seem to simplify: fail the division here. |
889 | if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) |
890 | return cannotDivide(Numerator); |
891 | divide(SE, Diff, Denominator, &Q, &R); |
892 | if (R != Zero) |
893 | return cannotDivide(Numerator); |
894 | Quotient = Q; |
895 | } |
896 | |
897 | private: |
898 | SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, |
899 | const SCEV *Denominator) |
900 | : SE(S), Denominator(Denominator) { |
901 | Zero = SE.getZero(Denominator->getType()); |
902 | One = SE.getOne(Denominator->getType()); |
903 | |
904 | // We generally do not know how to divide Expr by Denominator. We |
905 | // initialize the division to a "cannot divide" state to simplify the rest |
906 | // of the code. |
907 | cannotDivide(Numerator); |
908 | } |
909 | |
910 | // Convenience function for giving up on the division. We set the quotient to |
911 | // be equal to zero and the remainder to be equal to the numerator. |
912 | void cannotDivide(const SCEV *Numerator) { |
913 | Quotient = Zero; |
914 | Remainder = Numerator; |
915 | } |
916 | |
917 | ScalarEvolution &SE; |
918 | const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; |
919 | }; |
920 | |
921 | } |
922 | |
923 | //===----------------------------------------------------------------------===// |
924 | // Simple SCEV method implementations |
925 | //===----------------------------------------------------------------------===// |
926 | |
927 | /// Compute BC(It, K). The result has width W. Assume, K > 0. |
928 | static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, |
929 | ScalarEvolution &SE, |
930 | Type *ResultTy) { |
931 | // Handle the simplest case efficiently. |
932 | if (K == 1) |
933 | return SE.getTruncateOrZeroExtend(It, ResultTy); |
934 | |
935 | // We are using the following formula for BC(It, K): |
936 | // |
937 | // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! |
938 | // |
939 | // Suppose, W is the bitwidth of the return value. We must be prepared for |
940 | // overflow. Hence, we must assure that the result of our computation is |
941 | // equal to the accurate one modulo 2^W. Unfortunately, division isn't |
942 | // safe in modular arithmetic. |
943 | // |
944 | // However, this code doesn't use exactly that formula; the formula it uses |
945 | // is something like the following, where T is the number of factors of 2 in |
946 | // K! (i.e. trailing zeros in the binary representation of K!), and ^ is |
947 | // exponentiation: |
948 | // |
949 | // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) |
950 | // |
951 | // This formula is trivially equivalent to the previous formula. However, |
952 | // this formula can be implemented much more efficiently. The trick is that |
953 | // K! / 2^T is odd, and exact division by an odd number *is* safe in modular |
954 | // arithmetic. To do exact division in modular arithmetic, all we have |
955 | // to do is multiply by the inverse. Therefore, this step can be done at |
956 | // width W. |
957 | // |
958 | // The next issue is how to safely do the division by 2^T. The way this |
959 | // is done is by doing the multiplication step at a width of at least W + T |
960 | // bits. This way, the bottom W+T bits of the product are accurate. Then, |
961 | // when we perform the division by 2^T (which is equivalent to a right shift |
962 | // by T), the bottom W bits are accurate. Extra bits are okay; they'll get |
963 | // truncated out after the division by 2^T. |
964 | // |
965 | // In comparison to just directly using the first formula, this technique |
966 | // is much more efficient; using the first formula requires W * K bits, |
967 | // but this formula less than W + K bits. Also, the first formula requires |
968 | // a division step, whereas this formula only requires multiplies and shifts. |
969 | // |
970 | // It doesn't matter whether the subtraction step is done in the calculation |
971 | // width or the input iteration count's width; if the subtraction overflows, |
972 | // the result must be zero anyway. We prefer here to do it in the width of |
973 | // the induction variable because it helps a lot for certain cases; CodeGen |
974 | // isn't smart enough to ignore the overflow, which leads to much less |
975 | // efficient code if the width of the subtraction is wider than the native |
976 | // register width. |
977 | // |
978 | // (It's possible to not widen at all by pulling out factors of 2 before |
979 | // the multiplication; for example, K=2 can be calculated as |
980 | // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires |
981 | // extra arithmetic, so it's not an obvious win, and it gets |
982 | // much more complicated for K > 3.) |
983 | |
984 | // Protection from insane SCEVs; this bound is conservative, |
985 | // but it probably doesn't matter. |
986 | if (K > 1000) |
987 | return SE.getCouldNotCompute(); |
988 | |
989 | unsigned W = SE.getTypeSizeInBits(ResultTy); |
990 | |
991 | // Calculate K! / 2^T and T; we divide out the factors of two before |
992 | // multiplying for calculating K! / 2^T to avoid overflow. |
993 | // Other overflow doesn't matter because we only care about the bottom |
994 | // W bits of the result. |
995 | APInt OddFactorial(W, 1); |
996 | unsigned T = 1; |
997 | for (unsigned i = 3; i <= K; ++i) { |
998 | APInt Mult(W, i); |
999 | unsigned TwoFactors = Mult.countTrailingZeros(); |
1000 | T += TwoFactors; |
1001 | Mult = Mult.lshr(TwoFactors); |
1002 | OddFactorial *= Mult; |
1003 | } |
1004 | |
1005 | // We need at least W + T bits for the multiplication step |
1006 | unsigned CalculationBits = W + T; |
1007 | |
1008 | // Calculate 2^T, at width T+W. |
1009 | APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); |
1010 | |
1011 | // Calculate the multiplicative inverse of K! / 2^T; |
1012 | // this multiplication factor will perform the exact division by |
1013 | // K! / 2^T. |
1014 | APInt Mod = APInt::getSignedMinValue(W+1); |
1015 | APInt MultiplyFactor = OddFactorial.zext(W+1); |
1016 | MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); |
1017 | MultiplyFactor = MultiplyFactor.trunc(W); |
1018 | |
1019 | // Calculate the product, at width T+W |
1020 | IntegerType *CalculationTy = IntegerType::get(SE.getContext(), |
1021 | CalculationBits); |
1022 | const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); |
1023 | for (unsigned i = 1; i != K; ++i) { |
1024 | const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); |
1025 | Dividend = SE.getMulExpr(Dividend, |
1026 | SE.getTruncateOrZeroExtend(S, CalculationTy)); |
1027 | } |
1028 | |
1029 | // Divide by 2^T |
1030 | const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); |
1031 | |
1032 | // Truncate the result, and divide by K! / 2^T. |
1033 | |
1034 | return SE.getMulExpr(SE.getConstant(MultiplyFactor), |
1035 | SE.getTruncateOrZeroExtend(DivResult, ResultTy)); |
1036 | } |
1037 | |
1038 | /// Return the value of this chain of recurrences at the specified iteration |
1039 | /// number. We can evaluate this recurrence by multiplying each element in the |
1040 | /// chain by the binomial coefficient corresponding to it. In other words, we |
1041 | /// can evaluate {A,+,B,+,C,+,D} as: |
1042 | /// |
1043 | /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) |
1044 | /// |
1045 | /// where BC(It, k) stands for binomial coefficient. |
1046 | /// |
1047 | const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, |
1048 | ScalarEvolution &SE) const { |
1049 | const SCEV *Result = getStart(); |
1050 | for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { |
1051 | // The computation is correct in the face of overflow provided that the |
1052 | // multiplication is performed _after_ the evaluation of the binomial |
1053 | // coefficient. |
1054 | const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); |
1055 | if (isa<SCEVCouldNotCompute>(Coeff)) |
1056 | return Coeff; |
1057 | |
1058 | Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); |
1059 | } |
1060 | return Result; |
1061 | } |
1062 | |
1063 | //===----------------------------------------------------------------------===// |
1064 | // SCEV Expression folder implementations |
1065 | //===----------------------------------------------------------------------===// |
1066 | |
1067 | const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, |
1068 | Type *Ty) { |
1069 | assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&((getTypeSizeInBits(Op->getType()) > getTypeSizeInBits( Ty) && "This is not a truncating conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && \"This is not a truncating conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1070, __PRETTY_FUNCTION__)) |
1070 | "This is not a truncating conversion!")((getTypeSizeInBits(Op->getType()) > getTypeSizeInBits( Ty) && "This is not a truncating conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && \"This is not a truncating conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1070, __PRETTY_FUNCTION__)); |
1071 | assert(isSCEVable(Ty) &&((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1072, __PRETTY_FUNCTION__)) |
1072 | "This is not a conversion to a SCEVable type!")((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1072, __PRETTY_FUNCTION__)); |
1073 | Ty = getEffectiveSCEVType(Ty); |
1074 | |
1075 | FoldingSetNodeID ID; |
1076 | ID.AddInteger(scTruncate); |
1077 | ID.AddPointer(Op); |
1078 | ID.AddPointer(Ty); |
1079 | void *IP = nullptr; |
1080 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
1081 | |
1082 | // Fold if the operand is constant. |
1083 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
1084 | return getConstant( |
1085 | cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty))); |
1086 | |
1087 | // trunc(trunc(x)) --> trunc(x) |
1088 | if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) |
1089 | return getTruncateExpr(ST->getOperand(), Ty); |
1090 | |
1091 | // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing |
1092 | if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) |
1093 | return getTruncateOrSignExtend(SS->getOperand(), Ty); |
1094 | |
1095 | // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing |
1096 | if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) |
1097 | return getTruncateOrZeroExtend(SZ->getOperand(), Ty); |
1098 | |
1099 | // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can |
1100 | // eliminate all the truncates, or we replace other casts with truncates. |
1101 | if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) { |
1102 | SmallVector<const SCEV *, 4> Operands; |
1103 | bool hasTrunc = false; |
1104 | for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { |
1105 | const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); |
1106 | if (!isa<SCEVCastExpr>(SA->getOperand(i))) |
1107 | hasTrunc = isa<SCEVTruncateExpr>(S); |
1108 | Operands.push_back(S); |
1109 | } |
1110 | if (!hasTrunc) |
1111 | return getAddExpr(Operands); |
1112 | UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. |
1113 | } |
1114 | |
1115 | // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can |
1116 | // eliminate all the truncates, or we replace other casts with truncates. |
1117 | if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) { |
1118 | SmallVector<const SCEV *, 4> Operands; |
1119 | bool hasTrunc = false; |
1120 | for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { |
1121 | const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); |
1122 | if (!isa<SCEVCastExpr>(SM->getOperand(i))) |
1123 | hasTrunc = isa<SCEVTruncateExpr>(S); |
1124 | Operands.push_back(S); |
1125 | } |
1126 | if (!hasTrunc) |
1127 | return getMulExpr(Operands); |
1128 | UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. |
1129 | } |
1130 | |
1131 | // If the input value is a chrec scev, truncate the chrec's operands. |
1132 | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { |
1133 | SmallVector<const SCEV *, 4> Operands; |
1134 | for (const SCEV *Op : AddRec->operands()) |
1135 | Operands.push_back(getTruncateExpr(Op, Ty)); |
1136 | return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); |
1137 | } |
1138 | |
1139 | // The cast wasn't folded; create an explicit cast node. We can reuse |
1140 | // the existing insert position since if we get here, we won't have |
1141 | // made any changes which would invalidate it. |
1142 | SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), |
1143 | Op, Ty); |
1144 | UniqueSCEVs.InsertNode(S, IP); |
1145 | return S; |
1146 | } |
1147 | |
1148 | // Get the limit of a recurrence such that incrementing by Step cannot cause |
1149 | // signed overflow as long as the value of the recurrence within the |
1150 | // loop does not exceed this limit before incrementing. |
1151 | static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, |
1152 | ICmpInst::Predicate *Pred, |
1153 | ScalarEvolution *SE) { |
1154 | unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); |
1155 | if (SE->isKnownPositive(Step)) { |
1156 | *Pred = ICmpInst::ICMP_SLT; |
1157 | return SE->getConstant(APInt::getSignedMinValue(BitWidth) - |
1158 | SE->getSignedRange(Step).getSignedMax()); |
1159 | } |
1160 | if (SE->isKnownNegative(Step)) { |
1161 | *Pred = ICmpInst::ICMP_SGT; |
1162 | return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - |
1163 | SE->getSignedRange(Step).getSignedMin()); |
1164 | } |
1165 | return nullptr; |
1166 | } |
1167 | |
1168 | // Get the limit of a recurrence such that incrementing by Step cannot cause |
1169 | // unsigned overflow as long as the value of the recurrence within the loop does |
1170 | // not exceed this limit before incrementing. |
1171 | static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, |
1172 | ICmpInst::Predicate *Pred, |
1173 | ScalarEvolution *SE) { |
1174 | unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); |
1175 | *Pred = ICmpInst::ICMP_ULT; |
1176 | |
1177 | return SE->getConstant(APInt::getMinValue(BitWidth) - |
1178 | SE->getUnsignedRange(Step).getUnsignedMax()); |
1179 | } |
1180 | |
1181 | namespace { |
1182 | |
1183 | struct ExtendOpTraitsBase { |
1184 | typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); |
1185 | }; |
1186 | |
1187 | // Used to make code generic over signed and unsigned overflow. |
1188 | template <typename ExtendOp> struct ExtendOpTraits { |
1189 | // Members present: |
1190 | // |
1191 | // static const SCEV::NoWrapFlags WrapType; |
1192 | // |
1193 | // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; |
1194 | // |
1195 | // static const SCEV *getOverflowLimitForStep(const SCEV *Step, |
1196 | // ICmpInst::Predicate *Pred, |
1197 | // ScalarEvolution *SE); |
1198 | }; |
1199 | |
1200 | template <> |
1201 | struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase { |
1202 | static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; |
1203 | |
1204 | static const GetExtendExprTy GetExtendExpr; |
1205 | |
1206 | static const SCEV *getOverflowLimitForStep(const SCEV *Step, |
1207 | ICmpInst::Predicate *Pred, |
1208 | ScalarEvolution *SE) { |
1209 | return getSignedOverflowLimitForStep(Step, Pred, SE); |
1210 | } |
1211 | }; |
1212 | |
1213 | const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< |
1214 | SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; |
1215 | |
1216 | template <> |
1217 | struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { |
1218 | static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; |
1219 | |
1220 | static const GetExtendExprTy GetExtendExpr; |
1221 | |
1222 | static const SCEV *getOverflowLimitForStep(const SCEV *Step, |
1223 | ICmpInst::Predicate *Pred, |
1224 | ScalarEvolution *SE) { |
1225 | return getUnsignedOverflowLimitForStep(Step, Pred, SE); |
1226 | } |
1227 | }; |
1228 | |
1229 | const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< |
1230 | SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; |
1231 | } |
1232 | |
1233 | // The recurrence AR has been shown to have no signed/unsigned wrap or something |
1234 | // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as |
1235 | // easily prove NSW/NUW for its preincrement or postincrement sibling. This |
1236 | // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + |
1237 | // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the |
1238 | // expression "Step + sext/zext(PreIncAR)" is congruent with |
1239 | // "sext/zext(PostIncAR)" |
1240 | template <typename ExtendOpTy> |
1241 | static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, |
1242 | ScalarEvolution *SE) { |
1243 | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; |
1244 | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; |
1245 | |
1246 | const Loop *L = AR->getLoop(); |
1247 | const SCEV *Start = AR->getStart(); |
1248 | const SCEV *Step = AR->getStepRecurrence(*SE); |
1249 | |
1250 | // Check for a simple looking step prior to loop entry. |
1251 | const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); |
1252 | if (!SA) |
1253 | return nullptr; |
1254 | |
1255 | // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV |
1256 | // subtraction is expensive. For this purpose, perform a quick and dirty |
1257 | // difference, by checking for Step in the operand list. |
1258 | SmallVector<const SCEV *, 4> DiffOps; |
1259 | for (const SCEV *Op : SA->operands()) |
1260 | if (Op != Step) |
1261 | DiffOps.push_back(Op); |
1262 | |
1263 | if (DiffOps.size() == SA->getNumOperands()) |
1264 | return nullptr; |
1265 | |
1266 | // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + |
1267 | // `Step`: |
1268 | |
1269 | // 1. NSW/NUW flags on the step increment. |
1270 | auto PreStartFlags = |
1271 | ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); |
1272 | const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); |
1273 | const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( |
1274 | SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); |
1275 | |
1276 | // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies |
1277 | // "S+X does not sign/unsign-overflow". |
1278 | // |
1279 | |
1280 | const SCEV *BECount = SE->getBackedgeTakenCount(L); |
1281 | if (PreAR && PreAR->getNoWrapFlags(WrapType) && |
1282 | !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount)) |
1283 | return PreStart; |
1284 | |
1285 | // 2. Direct overflow check on the step operation's expression. |
1286 | unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); |
1287 | Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); |
1288 | const SCEV *OperandExtendedStart = |
1289 | SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy), |
1290 | (SE->*GetExtendExpr)(Step, WideTy)); |
1291 | if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) { |
1292 | if (PreAR && AR->getNoWrapFlags(WrapType)) { |
1293 | // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW |
1294 | // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then |
1295 | // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. |
1296 | const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType); |
1297 | } |
1298 | return PreStart; |
1299 | } |
1300 | |
1301 | // 3. Loop precondition. |
1302 | ICmpInst::Predicate Pred; |
1303 | const SCEV *OverflowLimit = |
1304 | ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); |
1305 | |
1306 | if (OverflowLimit && |
1307 | SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) |
1308 | return PreStart; |
1309 | |
1310 | return nullptr; |
1311 | } |
1312 | |
1313 | // Get the normalized zero or sign extended expression for this AddRec's Start. |
1314 | template <typename ExtendOpTy> |
1315 | static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, |
1316 | ScalarEvolution *SE) { |
1317 | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; |
1318 | |
1319 | const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE); |
1320 | if (!PreStart) |
1321 | return (SE->*GetExtendExpr)(AR->getStart(), Ty); |
1322 | |
1323 | return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty), |
1324 | (SE->*GetExtendExpr)(PreStart, Ty)); |
1325 | } |
1326 | |
1327 | // Try to prove away overflow by looking at "nearby" add recurrences. A |
1328 | // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it |
1329 | // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. |
1330 | // |
1331 | // Formally: |
1332 | // |
1333 | // {S,+,X} == {S-T,+,X} + T |
1334 | // => Ext({S,+,X}) == Ext({S-T,+,X} + T) |
1335 | // |
1336 | // If ({S-T,+,X} + T) does not overflow ... (1) |
1337 | // |
1338 | // RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) |
1339 | // |
1340 | // If {S-T,+,X} does not overflow ... (2) |
1341 | // |
1342 | // RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) |
1343 | // == {Ext(S-T)+Ext(T),+,Ext(X)} |
1344 | // |
1345 | // If (S-T)+T does not overflow ... (3) |
1346 | // |
1347 | // RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} |
1348 | // == {Ext(S),+,Ext(X)} == LHS |
1349 | // |
1350 | // Thus, if (1), (2) and (3) are true for some T, then |
1351 | // Ext({S,+,X}) == {Ext(S),+,Ext(X)} |
1352 | // |
1353 | // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) |
1354 | // does not overflow" restricted to the 0th iteration. Therefore we only need |
1355 | // to check for (1) and (2). |
1356 | // |
1357 | // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T |
1358 | // is `Delta` (defined below). |
1359 | // |
1360 | template <typename ExtendOpTy> |
1361 | bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, |
1362 | const SCEV *Step, |
1363 | const Loop *L) { |
1364 | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; |
1365 | |
1366 | // We restrict `Start` to a constant to prevent SCEV from spending too much |
1367 | // time here. It is correct (but more expensive) to continue with a |
1368 | // non-constant `Start` and do a general SCEV subtraction to compute |
1369 | // `PreStart` below. |
1370 | // |
1371 | const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); |
1372 | if (!StartC) |
1373 | return false; |
1374 | |
1375 | APInt StartAI = StartC->getAPInt(); |
1376 | |
1377 | for (unsigned Delta : {-2, -1, 1, 2}) { |
1378 | const SCEV *PreStart = getConstant(StartAI - Delta); |
1379 | |
1380 | FoldingSetNodeID ID; |
1381 | ID.AddInteger(scAddRecExpr); |
1382 | ID.AddPointer(PreStart); |
1383 | ID.AddPointer(Step); |
1384 | ID.AddPointer(L); |
1385 | void *IP = nullptr; |
1386 | const auto *PreAR = |
1387 | static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
1388 | |
1389 | // Give up if we don't already have the add recurrence we need because |
1390 | // actually constructing an add recurrence is relatively expensive. |
1391 | if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) |
1392 | const SCEV *DeltaS = getConstant(StartC->getType(), Delta); |
1393 | ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; |
1394 | const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep( |
1395 | DeltaS, &Pred, this); |
1396 | if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) |
1397 | return true; |
1398 | } |
1399 | } |
1400 | |
1401 | return false; |
1402 | } |
1403 | |
1404 | const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, |
1405 | Type *Ty) { |
1406 | assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&((getTypeSizeInBits(Op->getType()) < getTypeSizeInBits( Ty) && "This is not an extending conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && \"This is not an extending conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1407, __PRETTY_FUNCTION__)) |
1407 | "This is not an extending conversion!")((getTypeSizeInBits(Op->getType()) < getTypeSizeInBits( Ty) && "This is not an extending conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && \"This is not an extending conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1407, __PRETTY_FUNCTION__)); |
1408 | assert(isSCEVable(Ty) &&((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1409, __PRETTY_FUNCTION__)) |
1409 | "This is not a conversion to a SCEVable type!")((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1409, __PRETTY_FUNCTION__)); |
1410 | Ty = getEffectiveSCEVType(Ty); |
1411 | |
1412 | // Fold if the operand is constant. |
1413 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
1414 | return getConstant( |
1415 | cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty))); |
1416 | |
1417 | // zext(zext(x)) --> zext(x) |
1418 | if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) |
1419 | return getZeroExtendExpr(SZ->getOperand(), Ty); |
1420 | |
1421 | // Before doing any expensive analysis, check to see if we've already |
1422 | // computed a SCEV for this Op and Ty. |
1423 | FoldingSetNodeID ID; |
1424 | ID.AddInteger(scZeroExtend); |
1425 | ID.AddPointer(Op); |
1426 | ID.AddPointer(Ty); |
1427 | void *IP = nullptr; |
1428 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
1429 | |
1430 | // zext(trunc(x)) --> zext(x) or x or trunc(x) |
1431 | if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { |
1432 | // It's possible the bits taken off by the truncate were all zero bits. If |
1433 | // so, we should be able to simplify this further. |
1434 | const SCEV *X = ST->getOperand(); |
1435 | ConstantRange CR = getUnsignedRange(X); |
1436 | unsigned TruncBits = getTypeSizeInBits(ST->getType()); |
1437 | unsigned NewBits = getTypeSizeInBits(Ty); |
1438 | if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( |
1439 | CR.zextOrTrunc(NewBits))) |
1440 | return getTruncateOrZeroExtend(X, Ty); |
1441 | } |
1442 | |
1443 | // If the input value is a chrec scev, and we can prove that the value |
1444 | // did not overflow the old, smaller, value, we can zero extend all of the |
1445 | // operands (often constants). This allows analysis of something like |
1446 | // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } |
1447 | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) |
1448 | if (AR->isAffine()) { |
1449 | const SCEV *Start = AR->getStart(); |
1450 | const SCEV *Step = AR->getStepRecurrence(*this); |
1451 | unsigned BitWidth = getTypeSizeInBits(AR->getType()); |
1452 | const Loop *L = AR->getLoop(); |
1453 | |
1454 | if (!AR->hasNoUnsignedWrap()) { |
1455 | auto NewFlags = proveNoWrapViaConstantRanges(AR); |
1456 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags); |
1457 | } |
1458 | |
1459 | // If we have special knowledge that this addrec won't overflow, |
1460 | // we don't need to do any further analysis. |
1461 | if (AR->hasNoUnsignedWrap()) |
1462 | return getAddRecExpr( |
1463 | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), |
1464 | getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1465 | |
1466 | // Check whether the backedge-taken count is SCEVCouldNotCompute. |
1467 | // Note that this serves two purposes: It filters out loops that are |
1468 | // simply not analyzable, and it covers the case where this code is |
1469 | // being called from within backedge-taken count analysis, such that |
1470 | // attempting to ask for the backedge-taken count would likely result |
1471 | // in infinite recursion. In the later case, the analysis code will |
1472 | // cope with a conservative value, and it will take care to purge |
1473 | // that value once it has finished. |
1474 | const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); |
1475 | if (!isa<SCEVCouldNotCompute>(MaxBECount)) { |
1476 | // Manually compute the final value for AR, checking for |
1477 | // overflow. |
1478 | |
1479 | // Check whether the backedge-taken count can be losslessly casted to |
1480 | // the addrec's type. The count is always unsigned. |
1481 | const SCEV *CastedMaxBECount = |
1482 | getTruncateOrZeroExtend(MaxBECount, Start->getType()); |
1483 | const SCEV *RecastedMaxBECount = |
1484 | getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); |
1485 | if (MaxBECount == RecastedMaxBECount) { |
1486 | Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); |
1487 | // Check whether Start+Step*MaxBECount has no unsigned overflow. |
1488 | const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); |
1489 | const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy); |
1490 | const SCEV *WideStart = getZeroExtendExpr(Start, WideTy); |
1491 | const SCEV *WideMaxBECount = |
1492 | getZeroExtendExpr(CastedMaxBECount, WideTy); |
1493 | const SCEV *OperandExtendedAdd = |
1494 | getAddExpr(WideStart, |
1495 | getMulExpr(WideMaxBECount, |
1496 | getZeroExtendExpr(Step, WideTy))); |
1497 | if (ZAdd == OperandExtendedAdd) { |
1498 | // Cache knowledge of AR NUW, which is propagated to this AddRec. |
1499 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); |
1500 | // Return the expression with the addrec on the outside. |
1501 | return getAddRecExpr( |
1502 | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), |
1503 | getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1504 | } |
1505 | // Similar to above, only this time treat the step value as signed. |
1506 | // This covers loops that count down. |
1507 | OperandExtendedAdd = |
1508 | getAddExpr(WideStart, |
1509 | getMulExpr(WideMaxBECount, |
1510 | getSignExtendExpr(Step, WideTy))); |
1511 | if (ZAdd == OperandExtendedAdd) { |
1512 | // Cache knowledge of AR NW, which is propagated to this AddRec. |
1513 | // Negative step causes unsigned wrap, but it still can't self-wrap. |
1514 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); |
1515 | // Return the expression with the addrec on the outside. |
1516 | return getAddRecExpr( |
1517 | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), |
1518 | getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1519 | } |
1520 | } |
1521 | } |
1522 | |
1523 | // Normally, in the cases we can prove no-overflow via a |
1524 | // backedge guarding condition, we can also compute a backedge |
1525 | // taken count for the loop. The exceptions are assumptions and |
1526 | // guards present in the loop -- SCEV is not great at exploiting |
1527 | // these to compute max backedge taken counts, but can still use |
1528 | // these to prove lack of overflow. Use this fact to avoid |
1529 | // doing extra work that may not pay off. |
1530 | if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards || |
1531 | !AC.assumptions().empty()) { |
1532 | // If the backedge is guarded by a comparison with the pre-inc |
1533 | // value the addrec is safe. Also, if the entry is guarded by |
1534 | // a comparison with the start value and the backedge is |
1535 | // guarded by a comparison with the post-inc value, the addrec |
1536 | // is safe. |
1537 | if (isKnownPositive(Step)) { |
1538 | const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - |
1539 | getUnsignedRange(Step).getUnsignedMax()); |
1540 | if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || |
1541 | (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && |
1542 | isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, |
1543 | AR->getPostIncExpr(*this), N))) { |
1544 | // Cache knowledge of AR NUW, which is propagated to this |
1545 | // AddRec. |
1546 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); |
1547 | // Return the expression with the addrec on the outside. |
1548 | return getAddRecExpr( |
1549 | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), |
1550 | getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1551 | } |
1552 | } else if (isKnownNegative(Step)) { |
1553 | const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - |
1554 | getSignedRange(Step).getSignedMin()); |
1555 | if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || |
1556 | (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && |
1557 | isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, |
1558 | AR->getPostIncExpr(*this), N))) { |
1559 | // Cache knowledge of AR NW, which is propagated to this |
1560 | // AddRec. Negative step causes unsigned wrap, but it |
1561 | // still can't self-wrap. |
1562 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); |
1563 | // Return the expression with the addrec on the outside. |
1564 | return getAddRecExpr( |
1565 | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), |
1566 | getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1567 | } |
1568 | } |
1569 | } |
1570 | |
1571 | if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { |
1572 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); |
1573 | return getAddRecExpr( |
1574 | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), |
1575 | getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1576 | } |
1577 | } |
1578 | |
1579 | if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { |
1580 | // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw> |
1581 | if (SA->hasNoUnsignedWrap()) { |
1582 | // If the addition does not unsign overflow then we can, by definition, |
1583 | // commute the zero extension with the addition operation. |
1584 | SmallVector<const SCEV *, 4> Ops; |
1585 | for (const auto *Op : SA->operands()) |
1586 | Ops.push_back(getZeroExtendExpr(Op, Ty)); |
1587 | return getAddExpr(Ops, SCEV::FlagNUW); |
1588 | } |
1589 | } |
1590 | |
1591 | // The cast wasn't folded; create an explicit cast node. |
1592 | // Recompute the insert position, as it may have been invalidated. |
1593 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
1594 | SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), |
1595 | Op, Ty); |
1596 | UniqueSCEVs.InsertNode(S, IP); |
1597 | return S; |
1598 | } |
1599 | |
1600 | const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, |
1601 | Type *Ty) { |
1602 | assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&((getTypeSizeInBits(Op->getType()) < getTypeSizeInBits( Ty) && "This is not an extending conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && \"This is not an extending conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1603, __PRETTY_FUNCTION__)) |
1603 | "This is not an extending conversion!")((getTypeSizeInBits(Op->getType()) < getTypeSizeInBits( Ty) && "This is not an extending conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && \"This is not an extending conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1603, __PRETTY_FUNCTION__)); |
1604 | assert(isSCEVable(Ty) &&((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1605, __PRETTY_FUNCTION__)) |
1605 | "This is not a conversion to a SCEVable type!")((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1605, __PRETTY_FUNCTION__)); |
1606 | Ty = getEffectiveSCEVType(Ty); |
1607 | |
1608 | // Fold if the operand is constant. |
1609 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
1610 | return getConstant( |
1611 | cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty))); |
1612 | |
1613 | // sext(sext(x)) --> sext(x) |
1614 | if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) |
1615 | return getSignExtendExpr(SS->getOperand(), Ty); |
1616 | |
1617 | // sext(zext(x)) --> zext(x) |
1618 | if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) |
1619 | return getZeroExtendExpr(SZ->getOperand(), Ty); |
1620 | |
1621 | // Before doing any expensive analysis, check to see if we've already |
1622 | // computed a SCEV for this Op and Ty. |
1623 | FoldingSetNodeID ID; |
1624 | ID.AddInteger(scSignExtend); |
1625 | ID.AddPointer(Op); |
1626 | ID.AddPointer(Ty); |
1627 | void *IP = nullptr; |
1628 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
1629 | |
1630 | // sext(trunc(x)) --> sext(x) or x or trunc(x) |
1631 | if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { |
1632 | // It's possible the bits taken off by the truncate were all sign bits. If |
1633 | // so, we should be able to simplify this further. |
1634 | const SCEV *X = ST->getOperand(); |
1635 | ConstantRange CR = getSignedRange(X); |
1636 | unsigned TruncBits = getTypeSizeInBits(ST->getType()); |
1637 | unsigned NewBits = getTypeSizeInBits(Ty); |
1638 | if (CR.truncate(TruncBits).signExtend(NewBits).contains( |
1639 | CR.sextOrTrunc(NewBits))) |
1640 | return getTruncateOrSignExtend(X, Ty); |
1641 | } |
1642 | |
1643 | // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 |
1644 | if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { |
1645 | if (SA->getNumOperands() == 2) { |
1646 | auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); |
1647 | auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1)); |
1648 | if (SMul && SC1) { |
1649 | if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { |
1650 | const APInt &C1 = SC1->getAPInt(); |
1651 | const APInt &C2 = SC2->getAPInt(); |
1652 | if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && |
1653 | C2.ugt(C1) && C2.isPowerOf2()) |
1654 | return getAddExpr(getSignExtendExpr(SC1, Ty), |
1655 | getSignExtendExpr(SMul, Ty)); |
1656 | } |
1657 | } |
1658 | } |
1659 | |
1660 | // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> |
1661 | if (SA->hasNoSignedWrap()) { |
1662 | // If the addition does not sign overflow then we can, by definition, |
1663 | // commute the sign extension with the addition operation. |
1664 | SmallVector<const SCEV *, 4> Ops; |
1665 | for (const auto *Op : SA->operands()) |
1666 | Ops.push_back(getSignExtendExpr(Op, Ty)); |
1667 | return getAddExpr(Ops, SCEV::FlagNSW); |
1668 | } |
1669 | } |
1670 | // If the input value is a chrec scev, and we can prove that the value |
1671 | // did not overflow the old, smaller, value, we can sign extend all of the |
1672 | // operands (often constants). This allows analysis of something like |
1673 | // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } |
1674 | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) |
1675 | if (AR->isAffine()) { |
1676 | const SCEV *Start = AR->getStart(); |
1677 | const SCEV *Step = AR->getStepRecurrence(*this); |
1678 | unsigned BitWidth = getTypeSizeInBits(AR->getType()); |
1679 | const Loop *L = AR->getLoop(); |
1680 | |
1681 | if (!AR->hasNoSignedWrap()) { |
1682 | auto NewFlags = proveNoWrapViaConstantRanges(AR); |
1683 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags); |
1684 | } |
1685 | |
1686 | // If we have special knowledge that this addrec won't overflow, |
1687 | // we don't need to do any further analysis. |
1688 | if (AR->hasNoSignedWrap()) |
1689 | return getAddRecExpr( |
1690 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), |
1691 | getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); |
1692 | |
1693 | // Check whether the backedge-taken count is SCEVCouldNotCompute. |
1694 | // Note that this serves two purposes: It filters out loops that are |
1695 | // simply not analyzable, and it covers the case where this code is |
1696 | // being called from within backedge-taken count analysis, such that |
1697 | // attempting to ask for the backedge-taken count would likely result |
1698 | // in infinite recursion. In the later case, the analysis code will |
1699 | // cope with a conservative value, and it will take care to purge |
1700 | // that value once it has finished. |
1701 | const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); |
1702 | if (!isa<SCEVCouldNotCompute>(MaxBECount)) { |
1703 | // Manually compute the final value for AR, checking for |
1704 | // overflow. |
1705 | |
1706 | // Check whether the backedge-taken count can be losslessly casted to |
1707 | // the addrec's type. The count is always unsigned. |
1708 | const SCEV *CastedMaxBECount = |
1709 | getTruncateOrZeroExtend(MaxBECount, Start->getType()); |
1710 | const SCEV *RecastedMaxBECount = |
1711 | getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); |
1712 | if (MaxBECount == RecastedMaxBECount) { |
1713 | Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); |
1714 | // Check whether Start+Step*MaxBECount has no signed overflow. |
1715 | const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); |
1716 | const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy); |
1717 | const SCEV *WideStart = getSignExtendExpr(Start, WideTy); |
1718 | const SCEV *WideMaxBECount = |
1719 | getZeroExtendExpr(CastedMaxBECount, WideTy); |
1720 | const SCEV *OperandExtendedAdd = |
1721 | getAddExpr(WideStart, |
1722 | getMulExpr(WideMaxBECount, |
1723 | getSignExtendExpr(Step, WideTy))); |
1724 | if (SAdd == OperandExtendedAdd) { |
1725 | // Cache knowledge of AR NSW, which is propagated to this AddRec. |
1726 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); |
1727 | // Return the expression with the addrec on the outside. |
1728 | return getAddRecExpr( |
1729 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), |
1730 | getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1731 | } |
1732 | // Similar to above, only this time treat the step value as unsigned. |
1733 | // This covers loops that count up with an unsigned step. |
1734 | OperandExtendedAdd = |
1735 | getAddExpr(WideStart, |
1736 | getMulExpr(WideMaxBECount, |
1737 | getZeroExtendExpr(Step, WideTy))); |
1738 | if (SAdd == OperandExtendedAdd) { |
1739 | // If AR wraps around then |
1740 | // |
1741 | // abs(Step) * MaxBECount > unsigned-max(AR->getType()) |
1742 | // => SAdd != OperandExtendedAdd |
1743 | // |
1744 | // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> |
1745 | // (SAdd == OperandExtendedAdd => AR is NW) |
1746 | |
1747 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); |
1748 | |
1749 | // Return the expression with the addrec on the outside. |
1750 | return getAddRecExpr( |
1751 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), |
1752 | getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1753 | } |
1754 | } |
1755 | } |
1756 | |
1757 | // Normally, in the cases we can prove no-overflow via a |
1758 | // backedge guarding condition, we can also compute a backedge |
1759 | // taken count for the loop. The exceptions are assumptions and |
1760 | // guards present in the loop -- SCEV is not great at exploiting |
1761 | // these to compute max backedge taken counts, but can still use |
1762 | // these to prove lack of overflow. Use this fact to avoid |
1763 | // doing extra work that may not pay off. |
1764 | |
1765 | if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards || |
1766 | !AC.assumptions().empty()) { |
1767 | // If the backedge is guarded by a comparison with the pre-inc |
1768 | // value the addrec is safe. Also, if the entry is guarded by |
1769 | // a comparison with the start value and the backedge is |
1770 | // guarded by a comparison with the post-inc value, the addrec |
1771 | // is safe. |
1772 | ICmpInst::Predicate Pred; |
1773 | const SCEV *OverflowLimit = |
1774 | getSignedOverflowLimitForStep(Step, &Pred, this); |
1775 | if (OverflowLimit && |
1776 | (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || |
1777 | (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && |
1778 | isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this), |
1779 | OverflowLimit)))) { |
1780 | // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. |
1781 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); |
1782 | return getAddRecExpr( |
1783 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), |
1784 | getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1785 | } |
1786 | } |
1787 | |
1788 | // If Start and Step are constants, check if we can apply this |
1789 | // transformation: |
1790 | // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 |
1791 | auto *SC1 = dyn_cast<SCEVConstant>(Start); |
1792 | auto *SC2 = dyn_cast<SCEVConstant>(Step); |
1793 | if (SC1 && SC2) { |
1794 | const APInt &C1 = SC1->getAPInt(); |
1795 | const APInt &C2 = SC2->getAPInt(); |
1796 | if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && |
1797 | C2.isPowerOf2()) { |
1798 | Start = getSignExtendExpr(Start, Ty); |
1799 | const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, |
1800 | AR->getNoWrapFlags()); |
1801 | return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); |
1802 | } |
1803 | } |
1804 | |
1805 | if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) { |
1806 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); |
1807 | return getAddRecExpr( |
1808 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), |
1809 | getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); |
1810 | } |
1811 | } |
1812 | |
1813 | // If the input value is provably positive and we could not simplify |
1814 | // away the sext build a zext instead. |
1815 | if (isKnownNonNegative(Op)) |
1816 | return getZeroExtendExpr(Op, Ty); |
1817 | |
1818 | // The cast wasn't folded; create an explicit cast node. |
1819 | // Recompute the insert position, as it may have been invalidated. |
1820 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
1821 | SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), |
1822 | Op, Ty); |
1823 | UniqueSCEVs.InsertNode(S, IP); |
1824 | return S; |
1825 | } |
1826 | |
1827 | /// getAnyExtendExpr - Return a SCEV for the given operand extended with |
1828 | /// unspecified bits out to the given type. |
1829 | /// |
1830 | const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, |
1831 | Type *Ty) { |
1832 | assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&((getTypeSizeInBits(Op->getType()) < getTypeSizeInBits( Ty) && "This is not an extending conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && \"This is not an extending conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1833, __PRETTY_FUNCTION__)) |
1833 | "This is not an extending conversion!")((getTypeSizeInBits(Op->getType()) < getTypeSizeInBits( Ty) && "This is not an extending conversion!") ? static_cast <void> (0) : __assert_fail ("getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && \"This is not an extending conversion!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1833, __PRETTY_FUNCTION__)); |
1834 | assert(isSCEVable(Ty) &&((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1835, __PRETTY_FUNCTION__)) |
1835 | "This is not a conversion to a SCEVable type!")((isSCEVable(Ty) && "This is not a conversion to a SCEVable type!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(Ty) && \"This is not a conversion to a SCEVable type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1835, __PRETTY_FUNCTION__)); |
1836 | Ty = getEffectiveSCEVType(Ty); |
1837 | |
1838 | // Sign-extend negative constants. |
1839 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
1840 | if (SC->getAPInt().isNegative()) |
1841 | return getSignExtendExpr(Op, Ty); |
1842 | |
1843 | // Peel off a truncate cast. |
1844 | if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { |
1845 | const SCEV *NewOp = T->getOperand(); |
1846 | if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) |
1847 | return getAnyExtendExpr(NewOp, Ty); |
1848 | return getTruncateOrNoop(NewOp, Ty); |
1849 | } |
1850 | |
1851 | // Next try a zext cast. If the cast is folded, use it. |
1852 | const SCEV *ZExt = getZeroExtendExpr(Op, Ty); |
1853 | if (!isa<SCEVZeroExtendExpr>(ZExt)) |
1854 | return ZExt; |
1855 | |
1856 | // Next try a sext cast. If the cast is folded, use it. |
1857 | const SCEV *SExt = getSignExtendExpr(Op, Ty); |
1858 | if (!isa<SCEVSignExtendExpr>(SExt)) |
1859 | return SExt; |
1860 | |
1861 | // Force the cast to be folded into the operands of an addrec. |
1862 | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { |
1863 | SmallVector<const SCEV *, 4> Ops; |
1864 | for (const SCEV *Op : AR->operands()) |
1865 | Ops.push_back(getAnyExtendExpr(Op, Ty)); |
1866 | return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); |
1867 | } |
1868 | |
1869 | // If the expression is obviously signed, use the sext cast value. |
1870 | if (isa<SCEVSMaxExpr>(Op)) |
1871 | return SExt; |
1872 | |
1873 | // Absent any other information, use the zext cast value. |
1874 | return ZExt; |
1875 | } |
1876 | |
1877 | /// Process the given Ops list, which is a list of operands to be added under |
1878 | /// the given scale, update the given map. This is a helper function for |
1879 | /// getAddRecExpr. As an example of what it does, given a sequence of operands |
1880 | /// that would form an add expression like this: |
1881 | /// |
1882 | /// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) |
1883 | /// |
1884 | /// where A and B are constants, update the map with these values: |
1885 | /// |
1886 | /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) |
1887 | /// |
1888 | /// and add 13 + A*B*29 to AccumulatedConstant. |
1889 | /// This will allow getAddRecExpr to produce this: |
1890 | /// |
1891 | /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) |
1892 | /// |
1893 | /// This form often exposes folding opportunities that are hidden in |
1894 | /// the original operand list. |
1895 | /// |
1896 | /// Return true iff it appears that any interesting folding opportunities |
1897 | /// may be exposed. This helps getAddRecExpr short-circuit extra work in |
1898 | /// the common case where no interesting opportunities are present, and |
1899 | /// is also used as a check to avoid infinite recursion. |
1900 | /// |
1901 | static bool |
1902 | CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, |
1903 | SmallVectorImpl<const SCEV *> &NewOps, |
1904 | APInt &AccumulatedConstant, |
1905 | const SCEV *const *Ops, size_t NumOperands, |
1906 | const APInt &Scale, |
1907 | ScalarEvolution &SE) { |
1908 | bool Interesting = false; |
1909 | |
1910 | // Iterate over the add operands. They are sorted, with constants first. |
1911 | unsigned i = 0; |
1912 | while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { |
1913 | ++i; |
1914 | // Pull a buried constant out to the outside. |
1915 | if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) |
1916 | Interesting = true; |
1917 | AccumulatedConstant += Scale * C->getAPInt(); |
1918 | } |
1919 | |
1920 | // Next comes everything else. We're especially interested in multiplies |
1921 | // here, but they're in the middle, so just visit the rest with one loop. |
1922 | for (; i != NumOperands; ++i) { |
1923 | const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); |
1924 | if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { |
1925 | APInt NewScale = |
1926 | Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt(); |
1927 | if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { |
1928 | // A multiplication of a constant with another add; recurse. |
1929 | const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); |
1930 | Interesting |= |
1931 | CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, |
1932 | Add->op_begin(), Add->getNumOperands(), |
1933 | NewScale, SE); |
1934 | } else { |
1935 | // A multiplication of a constant with some other value. Update |
1936 | // the map. |
1937 | SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); |
1938 | const SCEV *Key = SE.getMulExpr(MulOps); |
1939 | auto Pair = M.insert({Key, NewScale}); |
1940 | if (Pair.second) { |
1941 | NewOps.push_back(Pair.first->first); |
1942 | } else { |
1943 | Pair.first->second += NewScale; |
1944 | // The map already had an entry for this value, which may indicate |
1945 | // a folding opportunity. |
1946 | Interesting = true; |
1947 | } |
1948 | } |
1949 | } else { |
1950 | // An ordinary operand. Update the map. |
1951 | std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = |
1952 | M.insert({Ops[i], Scale}); |
1953 | if (Pair.second) { |
1954 | NewOps.push_back(Pair.first->first); |
1955 | } else { |
1956 | Pair.first->second += Scale; |
1957 | // The map already had an entry for this value, which may indicate |
1958 | // a folding opportunity. |
1959 | Interesting = true; |
1960 | } |
1961 | } |
1962 | } |
1963 | |
1964 | return Interesting; |
1965 | } |
1966 | |
1967 | // We're trying to construct a SCEV of type `Type' with `Ops' as operands and |
1968 | // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of |
1969 | // can't-overflow flags for the operation if possible. |
1970 | static SCEV::NoWrapFlags |
1971 | StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, |
1972 | const SmallVectorImpl<const SCEV *> &Ops, |
1973 | SCEV::NoWrapFlags Flags) { |
1974 | using namespace std::placeholders; |
1975 | typedef OverflowingBinaryOperator OBO; |
1976 | |
1977 | bool CanAnalyze = |
1978 | Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; |
1979 | (void)CanAnalyze; |
1980 | assert(CanAnalyze && "don't call from other places!")((CanAnalyze && "don't call from other places!") ? static_cast <void> (0) : __assert_fail ("CanAnalyze && \"don't call from other places!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 1980, __PRETTY_FUNCTION__)); |
1981 | |
1982 | int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; |
1983 | SCEV::NoWrapFlags SignOrUnsignWrap = |
1984 | ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); |
1985 | |
1986 | // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. |
1987 | auto IsKnownNonNegative = [&](const SCEV *S) { |
1988 | return SE->isKnownNonNegative(S); |
1989 | }; |
1990 | |
1991 | if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)) |
1992 | Flags = |
1993 | ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); |
1994 | |
1995 | SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); |
1996 | |
1997 | if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr && |
1998 | Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) { |
1999 | |
2000 | // (A + C) --> (A + C)<nsw> if the addition does not sign overflow |
2001 | // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow |
2002 | |
2003 | const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt(); |
2004 | if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { |
2005 | auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
2006 | Instruction::Add, C, OBO::NoSignedWrap); |
2007 | if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) |
2008 | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); |
2009 | } |
2010 | if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { |
2011 | auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
2012 | Instruction::Add, C, OBO::NoUnsignedWrap); |
2013 | if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) |
2014 | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); |
2015 | } |
2016 | } |
2017 | |
2018 | return Flags; |
2019 | } |
2020 | |
2021 | /// Get a canonical add expression, or something simpler if possible. |
2022 | const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, |
2023 | SCEV::NoWrapFlags Flags) { |
2024 | assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&((!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && "only nuw or nsw allowed" ) ? static_cast<void> (0) : __assert_fail ("!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && \"only nuw or nsw allowed\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2025, __PRETTY_FUNCTION__)) |
2025 | "only nuw or nsw allowed")((!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && "only nuw or nsw allowed" ) ? static_cast<void> (0) : __assert_fail ("!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && \"only nuw or nsw allowed\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2025, __PRETTY_FUNCTION__)); |
2026 | assert(!Ops.empty() && "Cannot get empty add!")((!Ops.empty() && "Cannot get empty add!") ? static_cast <void> (0) : __assert_fail ("!Ops.empty() && \"Cannot get empty add!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2026, __PRETTY_FUNCTION__)); |
2027 | if (Ops.size() == 1) return Ops[0]; |
2028 | #ifndef NDEBUG |
2029 | Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); |
2030 | for (unsigned i = 1, e = Ops.size(); i != e; ++i) |
2031 | assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!") ? static_cast<void > (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVAddExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2032, __PRETTY_FUNCTION__)) |
2032 | "SCEVAddExpr operand types don't match!")((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!") ? static_cast<void > (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVAddExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2032, __PRETTY_FUNCTION__)); |
2033 | #endif |
2034 | |
2035 | // Sort by complexity, this groups all similar expression types together. |
2036 | GroupByComplexity(Ops, &LI); |
2037 | |
2038 | Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); |
2039 | |
2040 | // If there are any constants, fold them together. |
2041 | unsigned Idx = 0; |
2042 | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { |
2043 | ++Idx; |
2044 | assert(Idx < Ops.size())((Idx < Ops.size()) ? static_cast<void> (0) : __assert_fail ("Idx < Ops.size()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2044, __PRETTY_FUNCTION__)); |
2045 | while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { |
2046 | // We found two constants, fold them together! |
2047 | Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt()); |
2048 | if (Ops.size() == 2) return Ops[0]; |
2049 | Ops.erase(Ops.begin()+1); // Erase the folded element |
2050 | LHSC = cast<SCEVConstant>(Ops[0]); |
2051 | } |
2052 | |
2053 | // If we are left with a constant zero being added, strip it off. |
2054 | if (LHSC->getValue()->isZero()) { |
2055 | Ops.erase(Ops.begin()); |
2056 | --Idx; |
2057 | } |
2058 | |
2059 | if (Ops.size() == 1) return Ops[0]; |
2060 | } |
2061 | |
2062 | // Okay, check to see if the same value occurs in the operand list more than |
2063 | // once. If so, merge them together into an multiply expression. Since we |
2064 | // sorted the list, these values are required to be adjacent. |
2065 | Type *Ty = Ops[0]->getType(); |
2066 | bool FoundMatch = false; |
2067 | for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) |
2068 | if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 |
2069 | // Scan ahead to count how many equal operands there are. |
2070 | unsigned Count = 2; |
2071 | while (i+Count != e && Ops[i+Count] == Ops[i]) |
2072 | ++Count; |
2073 | // Merge the values into a multiply. |
2074 | const SCEV *Scale = getConstant(Ty, Count); |
2075 | const SCEV *Mul = getMulExpr(Scale, Ops[i]); |
2076 | if (Ops.size() == Count) |
2077 | return Mul; |
2078 | Ops[i] = Mul; |
2079 | Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); |
2080 | --i; e -= Count - 1; |
2081 | FoundMatch = true; |
2082 | } |
2083 | if (FoundMatch) |
2084 | return getAddExpr(Ops, Flags); |
2085 | |
2086 | // Check for truncates. If all the operands are truncated from the same |
2087 | // type, see if factoring out the truncate would permit the result to be |
2088 | // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) |
2089 | // if the contents of the resulting outer trunc fold to something simple. |
2090 | for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { |
2091 | const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); |
2092 | Type *DstType = Trunc->getType(); |
2093 | Type *SrcType = Trunc->getOperand()->getType(); |
2094 | SmallVector<const SCEV *, 8> LargeOps; |
2095 | bool Ok = true; |
2096 | // Check all the operands to see if they can be represented in the |
2097 | // source type of the truncate. |
2098 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) { |
2099 | if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) { |
2100 | if (T->getOperand()->getType() != SrcType) { |
2101 | Ok = false; |
2102 | break; |
2103 | } |
2104 | LargeOps.push_back(T->getOperand()); |
2105 | } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { |
2106 | LargeOps.push_back(getAnyExtendExpr(C, SrcType)); |
2107 | } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { |
2108 | SmallVector<const SCEV *, 8> LargeMulOps; |
2109 | for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { |
2110 | if (const SCEVTruncateExpr *T = |
2111 | dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { |
2112 | if (T->getOperand()->getType() != SrcType) { |
2113 | Ok = false; |
2114 | break; |
2115 | } |
2116 | LargeMulOps.push_back(T->getOperand()); |
2117 | } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) { |
2118 | LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); |
2119 | } else { |
2120 | Ok = false; |
2121 | break; |
2122 | } |
2123 | } |
2124 | if (Ok) |
2125 | LargeOps.push_back(getMulExpr(LargeMulOps)); |
2126 | } else { |
2127 | Ok = false; |
2128 | break; |
2129 | } |
2130 | } |
2131 | if (Ok) { |
2132 | // Evaluate the expression in the larger type. |
2133 | const SCEV *Fold = getAddExpr(LargeOps, Flags); |
2134 | // If it folds to something simple, use it. Otherwise, don't. |
2135 | if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) |
2136 | return getTruncateExpr(Fold, DstType); |
2137 | } |
2138 | } |
2139 | |
2140 | // Skip past any other cast SCEVs. |
2141 | while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) |
2142 | ++Idx; |
2143 | |
2144 | // If there are add operands they would be next. |
2145 | if (Idx < Ops.size()) { |
2146 | bool DeletedAdd = false; |
2147 | while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { |
2148 | // If we have an add, expand the add operands onto the end of the operands |
2149 | // list. |
2150 | Ops.erase(Ops.begin()+Idx); |
2151 | Ops.append(Add->op_begin(), Add->op_end()); |
2152 | DeletedAdd = true; |
2153 | } |
2154 | |
2155 | // If we deleted at least one add, we added operands to the end of the list, |
2156 | // and they are not necessarily sorted. Recurse to resort and resimplify |
2157 | // any operands we just acquired. |
2158 | if (DeletedAdd) |
2159 | return getAddExpr(Ops); |
2160 | } |
2161 | |
2162 | // Skip over the add expression until we get to a multiply. |
2163 | while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) |
2164 | ++Idx; |
2165 | |
2166 | // Check to see if there are any folding opportunities present with |
2167 | // operands multiplied by constant values. |
2168 | if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { |
2169 | uint64_t BitWidth = getTypeSizeInBits(Ty); |
2170 | DenseMap<const SCEV *, APInt> M; |
2171 | SmallVector<const SCEV *, 8> NewOps; |
2172 | APInt AccumulatedConstant(BitWidth, 0); |
2173 | if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, |
2174 | Ops.data(), Ops.size(), |
2175 | APInt(BitWidth, 1), *this)) { |
2176 | struct APIntCompare { |
2177 | bool operator()(const APInt &LHS, const APInt &RHS) const { |
2178 | return LHS.ult(RHS); |
2179 | } |
2180 | }; |
2181 | |
2182 | // Some interesting folding opportunity is present, so its worthwhile to |
2183 | // re-generate the operands list. Group the operands by constant scale, |
2184 | // to avoid multiplying by the same constant scale multiple times. |
2185 | std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; |
2186 | for (const SCEV *NewOp : NewOps) |
2187 | MulOpLists[M.find(NewOp)->second].push_back(NewOp); |
2188 | // Re-generate the operands list. |
2189 | Ops.clear(); |
2190 | if (AccumulatedConstant != 0) |
2191 | Ops.push_back(getConstant(AccumulatedConstant)); |
2192 | for (auto &MulOp : MulOpLists) |
2193 | if (MulOp.first != 0) |
2194 | Ops.push_back(getMulExpr(getConstant(MulOp.first), |
2195 | getAddExpr(MulOp.second))); |
2196 | if (Ops.empty()) |
2197 | return getZero(Ty); |
2198 | if (Ops.size() == 1) |
2199 | return Ops[0]; |
2200 | return getAddExpr(Ops); |
2201 | } |
2202 | } |
2203 | |
2204 | // If we are adding something to a multiply expression, make sure the |
2205 | // something is not already an operand of the multiply. If so, merge it into |
2206 | // the multiply. |
2207 | for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) { |
2208 | const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); |
2209 | for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { |
2210 | const SCEV *MulOpSCEV = Mul->getOperand(MulOp); |
2211 | if (isa<SCEVConstant>(MulOpSCEV)) |
2212 | continue; |
2213 | for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) |
2214 | if (MulOpSCEV == Ops[AddOp]) { |
2215 | // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) |
2216 | const SCEV *InnerMul = Mul->getOperand(MulOp == 0); |
2217 | if (Mul->getNumOperands() != 2) { |
2218 | // If the multiply has more than two operands, we must get the |
2219 | // Y*Z term. |
2220 | SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), |
2221 | Mul->op_begin()+MulOp); |
2222 | MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); |
2223 | InnerMul = getMulExpr(MulOps); |
2224 | } |
2225 | const SCEV *One = getOne(Ty); |
2226 | const SCEV *AddOne = getAddExpr(One, InnerMul); |
2227 | const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); |
2228 | if (Ops.size() == 2) return OuterMul; |
2229 | if (AddOp < Idx) { |
2230 | Ops.erase(Ops.begin()+AddOp); |
2231 | Ops.erase(Ops.begin()+Idx-1); |
2232 | } else { |
2233 | Ops.erase(Ops.begin()+Idx); |
2234 | Ops.erase(Ops.begin()+AddOp-1); |
2235 | } |
2236 | Ops.push_back(OuterMul); |
2237 | return getAddExpr(Ops); |
2238 | } |
2239 | |
2240 | // Check this multiply against other multiplies being added together. |
2241 | for (unsigned OtherMulIdx = Idx+1; |
2242 | OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]); |
2243 | ++OtherMulIdx) { |
2244 | const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]); |
2245 | // If MulOp occurs in OtherMul, we can fold the two multiplies |
2246 | // together. |
2247 | for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); |
2248 | OMulOp != e; ++OMulOp) |
2249 | if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { |
2250 | // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) |
2251 | const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); |
2252 | if (Mul->getNumOperands() != 2) { |
2253 | SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), |
2254 | Mul->op_begin()+MulOp); |
2255 | MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); |
2256 | InnerMul1 = getMulExpr(MulOps); |
2257 | } |
2258 | const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); |
2259 | if (OtherMul->getNumOperands() != 2) { |
2260 | SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), |
2261 | OtherMul->op_begin()+OMulOp); |
2262 | MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); |
2263 | InnerMul2 = getMulExpr(MulOps); |
2264 | } |
2265 | const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); |
2266 | const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); |
2267 | if (Ops.size() == 2) return OuterMul; |
2268 | Ops.erase(Ops.begin()+Idx); |
2269 | Ops.erase(Ops.begin()+OtherMulIdx-1); |
2270 | Ops.push_back(OuterMul); |
2271 | return getAddExpr(Ops); |
2272 | } |
2273 | } |
2274 | } |
2275 | } |
2276 | |
2277 | // If there are any add recurrences in the operands list, see if any other |
2278 | // added values are loop invariant. If so, we can fold them into the |
2279 | // recurrence. |
2280 | while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) |
2281 | ++Idx; |
2282 | |
2283 | // Scan over all recurrences, trying to fold loop invariants into them. |
2284 | for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { |
2285 | // Scan all of the other operands to this add and add them to the vector if |
2286 | // they are loop invariant w.r.t. the recurrence. |
2287 | SmallVector<const SCEV *, 8> LIOps; |
2288 | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); |
2289 | const Loop *AddRecLoop = AddRec->getLoop(); |
2290 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) |
2291 | if (isLoopInvariant(Ops[i], AddRecLoop)) { |
2292 | LIOps.push_back(Ops[i]); |
2293 | Ops.erase(Ops.begin()+i); |
2294 | --i; --e; |
2295 | } |
2296 | |
2297 | // If we found some loop invariants, fold them into the recurrence. |
2298 | if (!LIOps.empty()) { |
2299 | // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} |
2300 | LIOps.push_back(AddRec->getStart()); |
2301 | |
2302 | SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), |
2303 | AddRec->op_end()); |
2304 | // This follows from the fact that the no-wrap flags on the outer add |
2305 | // expression are applicable on the 0th iteration, when the add recurrence |
2306 | // will be equal to its start value. |
2307 | AddRecOps[0] = getAddExpr(LIOps, Flags); |
2308 | |
2309 | // Build the new addrec. Propagate the NUW and NSW flags if both the |
2310 | // outer add and the inner addrec are guaranteed to have no overflow. |
2311 | // Always propagate NW. |
2312 | Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); |
2313 | const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); |
2314 | |
2315 | // If all of the other operands were loop invariant, we are done. |
2316 | if (Ops.size() == 1) return NewRec; |
2317 | |
2318 | // Otherwise, add the folded AddRec by the non-invariant parts. |
2319 | for (unsigned i = 0;; ++i) |
2320 | if (Ops[i] == AddRec) { |
2321 | Ops[i] = NewRec; |
2322 | break; |
2323 | } |
2324 | return getAddExpr(Ops); |
2325 | } |
2326 | |
2327 | // Okay, if there weren't any loop invariants to be folded, check to see if |
2328 | // there are multiple AddRec's with the same loop induction variable being |
2329 | // added together. If so, we can fold them. |
2330 | for (unsigned OtherIdx = Idx+1; |
2331 | OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); |
2332 | ++OtherIdx) |
2333 | if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { |
2334 | // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> |
2335 | SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), |
2336 | AddRec->op_end()); |
2337 | for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); |
2338 | ++OtherIdx) |
2339 | if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) |
2340 | if (OtherAddRec->getLoop() == AddRecLoop) { |
2341 | for (unsigned i = 0, e = OtherAddRec->getNumOperands(); |
2342 | i != e; ++i) { |
2343 | if (i >= AddRecOps.size()) { |
2344 | AddRecOps.append(OtherAddRec->op_begin()+i, |
2345 | OtherAddRec->op_end()); |
2346 | break; |
2347 | } |
2348 | AddRecOps[i] = getAddExpr(AddRecOps[i], |
2349 | OtherAddRec->getOperand(i)); |
2350 | } |
2351 | Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; |
2352 | } |
2353 | // Step size has changed, so we cannot guarantee no self-wraparound. |
2354 | Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); |
2355 | return getAddExpr(Ops); |
2356 | } |
2357 | |
2358 | // Otherwise couldn't fold anything into this recurrence. Move onto the |
2359 | // next one. |
2360 | } |
2361 | |
2362 | // Okay, it looks like we really DO need an add expr. Check to see if we |
2363 | // already have one, otherwise create a new one. |
2364 | FoldingSetNodeID ID; |
2365 | ID.AddInteger(scAddExpr); |
2366 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) |
2367 | ID.AddPointer(Ops[i]); |
2368 | void *IP = nullptr; |
2369 | SCEVAddExpr *S = |
2370 | static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
2371 | if (!S) { |
2372 | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
2373 | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
2374 | S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), |
2375 | O, Ops.size()); |
2376 | UniqueSCEVs.InsertNode(S, IP); |
2377 | } |
2378 | S->setNoWrapFlags(Flags); |
2379 | return S; |
2380 | } |
2381 | |
2382 | static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { |
2383 | uint64_t k = i*j; |
2384 | if (j > 1 && k / j != i) Overflow = true; |
2385 | return k; |
2386 | } |
2387 | |
2388 | /// Compute the result of "n choose k", the binomial coefficient. If an |
2389 | /// intermediate computation overflows, Overflow will be set and the return will |
2390 | /// be garbage. Overflow is not cleared on absence of overflow. |
2391 | static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { |
2392 | // We use the multiplicative formula: |
2393 | // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . |
2394 | // At each iteration, we take the n-th term of the numeral and divide by the |
2395 | // (k-n)th term of the denominator. This division will always produce an |
2396 | // integral result, and helps reduce the chance of overflow in the |
2397 | // intermediate computations. However, we can still overflow even when the |
2398 | // final result would fit. |
2399 | |
2400 | if (n == 0 || n == k) return 1; |
2401 | if (k > n) return 0; |
2402 | |
2403 | if (k > n/2) |
2404 | k = n-k; |
2405 | |
2406 | uint64_t r = 1; |
2407 | for (uint64_t i = 1; i <= k; ++i) { |
2408 | r = umul_ov(r, n-(i-1), Overflow); |
2409 | r /= i; |
2410 | } |
2411 | return r; |
2412 | } |
2413 | |
2414 | /// Determine if any of the operands in this SCEV are a constant or if |
2415 | /// any of the add or multiply expressions in this SCEV contain a constant. |
2416 | static bool containsConstantSomewhere(const SCEV *StartExpr) { |
2417 | SmallVector<const SCEV *, 4> Ops; |
2418 | Ops.push_back(StartExpr); |
2419 | while (!Ops.empty()) { |
2420 | const SCEV *CurrentExpr = Ops.pop_back_val(); |
2421 | if (isa<SCEVConstant>(*CurrentExpr)) |
2422 | return true; |
2423 | |
2424 | if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) { |
2425 | const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr); |
2426 | Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end()); |
2427 | } |
2428 | } |
2429 | return false; |
2430 | } |
2431 | |
2432 | /// Get a canonical multiply expression, or something simpler if possible. |
2433 | const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, |
2434 | SCEV::NoWrapFlags Flags) { |
2435 | assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&((Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed") ? static_cast<void> (0) : __assert_fail ("Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && \"only nuw or nsw allowed\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2436, __PRETTY_FUNCTION__)) |
2436 | "only nuw or nsw allowed")((Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed") ? static_cast<void> (0) : __assert_fail ("Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && \"only nuw or nsw allowed\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2436, __PRETTY_FUNCTION__)); |
2437 | assert(!Ops.empty() && "Cannot get empty mul!")((!Ops.empty() && "Cannot get empty mul!") ? static_cast <void> (0) : __assert_fail ("!Ops.empty() && \"Cannot get empty mul!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2437, __PRETTY_FUNCTION__)); |
2438 | if (Ops.size() == 1) return Ops[0]; |
2439 | #ifndef NDEBUG |
2440 | Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); |
2441 | for (unsigned i = 1, e = Ops.size(); i != e; ++i) |
2442 | assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVMulExpr operand types don't match!") ? static_cast<void > (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVMulExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2443, __PRETTY_FUNCTION__)) |
2443 | "SCEVMulExpr operand types don't match!")((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVMulExpr operand types don't match!") ? static_cast<void > (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVMulExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2443, __PRETTY_FUNCTION__)); |
2444 | #endif |
2445 | |
2446 | // Sort by complexity, this groups all similar expression types together. |
2447 | GroupByComplexity(Ops, &LI); |
2448 | |
2449 | Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); |
2450 | |
2451 | // If there are any constants, fold them together. |
2452 | unsigned Idx = 0; |
2453 | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { |
2454 | |
2455 | // C1*(C2+V) -> C1*C2 + C1*V |
2456 | if (Ops.size() == 2) |
2457 | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) |
2458 | // If any of Add's ops are Adds or Muls with a constant, |
2459 | // apply this transformation as well. |
2460 | if (Add->getNumOperands() == 2) |
2461 | if (containsConstantSomewhere(Add)) |
2462 | return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), |
2463 | getMulExpr(LHSC, Add->getOperand(1))); |
2464 | |
2465 | ++Idx; |
2466 | while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { |
2467 | // We found two constants, fold them together! |
2468 | ConstantInt *Fold = |
2469 | ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt()); |
2470 | Ops[0] = getConstant(Fold); |
2471 | Ops.erase(Ops.begin()+1); // Erase the folded element |
2472 | if (Ops.size() == 1) return Ops[0]; |
2473 | LHSC = cast<SCEVConstant>(Ops[0]); |
2474 | } |
2475 | |
2476 | // If we are left with a constant one being multiplied, strip it off. |
2477 | if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) { |
2478 | Ops.erase(Ops.begin()); |
2479 | --Idx; |
2480 | } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { |
2481 | // If we have a multiply of zero, it will always be zero. |
2482 | return Ops[0]; |
2483 | } else if (Ops[0]->isAllOnesValue()) { |
2484 | // If we have a mul by -1 of an add, try distributing the -1 among the |
2485 | // add operands. |
2486 | if (Ops.size() == 2) { |
2487 | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { |
2488 | SmallVector<const SCEV *, 4> NewOps; |
2489 | bool AnyFolded = false; |
2490 | for (const SCEV *AddOp : Add->operands()) { |
2491 | const SCEV *Mul = getMulExpr(Ops[0], AddOp); |
2492 | if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; |
2493 | NewOps.push_back(Mul); |
2494 | } |
2495 | if (AnyFolded) |
2496 | return getAddExpr(NewOps); |
2497 | } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { |
2498 | // Negation preserves a recurrence's no self-wrap property. |
2499 | SmallVector<const SCEV *, 4> Operands; |
2500 | for (const SCEV *AddRecOp : AddRec->operands()) |
2501 | Operands.push_back(getMulExpr(Ops[0], AddRecOp)); |
2502 | |
2503 | return getAddRecExpr(Operands, AddRec->getLoop(), |
2504 | AddRec->getNoWrapFlags(SCEV::FlagNW)); |
2505 | } |
2506 | } |
2507 | } |
2508 | |
2509 | if (Ops.size() == 1) |
2510 | return Ops[0]; |
2511 | } |
2512 | |
2513 | // Skip over the add expression until we get to a multiply. |
2514 | while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) |
2515 | ++Idx; |
2516 | |
2517 | // If there are mul operands inline them all into this expression. |
2518 | if (Idx < Ops.size()) { |
2519 | bool DeletedMul = false; |
2520 | while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { |
2521 | // If we have an mul, expand the mul operands onto the end of the operands |
2522 | // list. |
2523 | Ops.erase(Ops.begin()+Idx); |
2524 | Ops.append(Mul->op_begin(), Mul->op_end()); |
2525 | DeletedMul = true; |
2526 | } |
2527 | |
2528 | // If we deleted at least one mul, we added operands to the end of the list, |
2529 | // and they are not necessarily sorted. Recurse to resort and resimplify |
2530 | // any operands we just acquired. |
2531 | if (DeletedMul) |
2532 | return getMulExpr(Ops); |
2533 | } |
2534 | |
2535 | // If there are any add recurrences in the operands list, see if any other |
2536 | // added values are loop invariant. If so, we can fold them into the |
2537 | // recurrence. |
2538 | while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) |
2539 | ++Idx; |
2540 | |
2541 | // Scan over all recurrences, trying to fold loop invariants into them. |
2542 | for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { |
2543 | // Scan all of the other operands to this mul and add them to the vector if |
2544 | // they are loop invariant w.r.t. the recurrence. |
2545 | SmallVector<const SCEV *, 8> LIOps; |
2546 | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); |
2547 | const Loop *AddRecLoop = AddRec->getLoop(); |
2548 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) |
2549 | if (isLoopInvariant(Ops[i], AddRecLoop)) { |
2550 | LIOps.push_back(Ops[i]); |
2551 | Ops.erase(Ops.begin()+i); |
2552 | --i; --e; |
2553 | } |
2554 | |
2555 | // If we found some loop invariants, fold them into the recurrence. |
2556 | if (!LIOps.empty()) { |
2557 | // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} |
2558 | SmallVector<const SCEV *, 4> NewOps; |
2559 | NewOps.reserve(AddRec->getNumOperands()); |
2560 | const SCEV *Scale = getMulExpr(LIOps); |
2561 | for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) |
2562 | NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); |
2563 | |
2564 | // Build the new addrec. Propagate the NUW and NSW flags if both the |
2565 | // outer mul and the inner addrec are guaranteed to have no overflow. |
2566 | // |
2567 | // No self-wrap cannot be guaranteed after changing the step size, but |
2568 | // will be inferred if either NUW or NSW is true. |
2569 | Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); |
2570 | const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); |
2571 | |
2572 | // If all of the other operands were loop invariant, we are done. |
2573 | if (Ops.size() == 1) return NewRec; |
2574 | |
2575 | // Otherwise, multiply the folded AddRec by the non-invariant parts. |
2576 | for (unsigned i = 0;; ++i) |
2577 | if (Ops[i] == AddRec) { |
2578 | Ops[i] = NewRec; |
2579 | break; |
2580 | } |
2581 | return getMulExpr(Ops); |
2582 | } |
2583 | |
2584 | // Okay, if there weren't any loop invariants to be folded, check to see if |
2585 | // there are multiple AddRec's with the same loop induction variable being |
2586 | // multiplied together. If so, we can fold them. |
2587 | |
2588 | // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> |
2589 | // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ |
2590 | // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z |
2591 | // ]]],+,...up to x=2n}. |
2592 | // Note that the arguments to choose() are always integers with values |
2593 | // known at compile time, never SCEV objects. |
2594 | // |
2595 | // The implementation avoids pointless extra computations when the two |
2596 | // addrec's are of different length (mathematically, it's equivalent to |
2597 | // an infinite stream of zeros on the right). |
2598 | bool OpsModified = false; |
2599 | for (unsigned OtherIdx = Idx+1; |
2600 | OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); |
2601 | ++OtherIdx) { |
2602 | const SCEVAddRecExpr *OtherAddRec = |
2603 | dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]); |
2604 | if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) |
2605 | continue; |
2606 | |
2607 | bool Overflow = false; |
2608 | Type *Ty = AddRec->getType(); |
2609 | bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; |
2610 | SmallVector<const SCEV*, 7> AddRecOps; |
2611 | for (int x = 0, xe = AddRec->getNumOperands() + |
2612 | OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { |
2613 | const SCEV *Term = getZero(Ty); |
2614 | for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { |
2615 | uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); |
2616 | for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), |
2617 | ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); |
2618 | z < ze && !Overflow; ++z) { |
2619 | uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); |
2620 | uint64_t Coeff; |
2621 | if (LargerThan64Bits) |
2622 | Coeff = umul_ov(Coeff1, Coeff2, Overflow); |
2623 | else |
2624 | Coeff = Coeff1*Coeff2; |
2625 | const SCEV *CoeffTerm = getConstant(Ty, Coeff); |
2626 | const SCEV *Term1 = AddRec->getOperand(y-z); |
2627 | const SCEV *Term2 = OtherAddRec->getOperand(z); |
2628 | Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); |
2629 | } |
2630 | } |
2631 | AddRecOps.push_back(Term); |
2632 | } |
2633 | if (!Overflow) { |
2634 | const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), |
2635 | SCEV::FlagAnyWrap); |
2636 | if (Ops.size() == 2) return NewAddRec; |
2637 | Ops[Idx] = NewAddRec; |
2638 | Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; |
2639 | OpsModified = true; |
2640 | AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec); |
2641 | if (!AddRec) |
2642 | break; |
2643 | } |
2644 | } |
2645 | if (OpsModified) |
2646 | return getMulExpr(Ops); |
2647 | |
2648 | // Otherwise couldn't fold anything into this recurrence. Move onto the |
2649 | // next one. |
2650 | } |
2651 | |
2652 | // Okay, it looks like we really DO need an mul expr. Check to see if we |
2653 | // already have one, otherwise create a new one. |
2654 | FoldingSetNodeID ID; |
2655 | ID.AddInteger(scMulExpr); |
2656 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) |
2657 | ID.AddPointer(Ops[i]); |
2658 | void *IP = nullptr; |
2659 | SCEVMulExpr *S = |
2660 | static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
2661 | if (!S) { |
2662 | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
2663 | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
2664 | S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), |
2665 | O, Ops.size()); |
2666 | UniqueSCEVs.InsertNode(S, IP); |
2667 | } |
2668 | S->setNoWrapFlags(Flags); |
2669 | return S; |
2670 | } |
2671 | |
2672 | /// Get a canonical unsigned division expression, or something simpler if |
2673 | /// possible. |
2674 | const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, |
2675 | const SCEV *RHS) { |
2676 | assert(getEffectiveSCEVType(LHS->getType()) ==((getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType (RHS->getType()) && "SCEVUDivExpr operand types don't match!" ) ? static_cast<void> (0) : __assert_fail ("getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType(RHS->getType()) && \"SCEVUDivExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2678, __PRETTY_FUNCTION__)) |
2677 | getEffectiveSCEVType(RHS->getType()) &&((getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType (RHS->getType()) && "SCEVUDivExpr operand types don't match!" ) ? static_cast<void> (0) : __assert_fail ("getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType(RHS->getType()) && \"SCEVUDivExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2678, __PRETTY_FUNCTION__)) |
2678 | "SCEVUDivExpr operand types don't match!")((getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType (RHS->getType()) && "SCEVUDivExpr operand types don't match!" ) ? static_cast<void> (0) : __assert_fail ("getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType(RHS->getType()) && \"SCEVUDivExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2678, __PRETTY_FUNCTION__)); |
2679 | |
2680 | if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { |
2681 | if (RHSC->getValue()->equalsInt(1)) |
2682 | return LHS; // X udiv 1 --> x |
2683 | // If the denominator is zero, the result of the udiv is undefined. Don't |
2684 | // try to analyze it, because the resolution chosen here may differ from |
2685 | // the resolution chosen in other parts of the compiler. |
2686 | if (!RHSC->getValue()->isZero()) { |
2687 | // Determine if the division can be folded into the operands of |
2688 | // its operands. |
2689 | // TODO: Generalize this to non-constants by using known-bits information. |
2690 | Type *Ty = LHS->getType(); |
2691 | unsigned LZ = RHSC->getAPInt().countLeadingZeros(); |
2692 | unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; |
2693 | // For non-power-of-two values, effectively round the value up to the |
2694 | // nearest power of two. |
2695 | if (!RHSC->getAPInt().isPowerOf2()) |
2696 | ++MaxShiftAmt; |
2697 | IntegerType *ExtTy = |
2698 | IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); |
2699 | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) |
2700 | if (const SCEVConstant *Step = |
2701 | dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) { |
2702 | // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. |
2703 | const APInt &StepInt = Step->getAPInt(); |
2704 | const APInt &DivInt = RHSC->getAPInt(); |
2705 | if (!StepInt.urem(DivInt) && |
2706 | getZeroExtendExpr(AR, ExtTy) == |
2707 | getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), |
2708 | getZeroExtendExpr(Step, ExtTy), |
2709 | AR->getLoop(), SCEV::FlagAnyWrap)) { |
2710 | SmallVector<const SCEV *, 4> Operands; |
2711 | for (const SCEV *Op : AR->operands()) |
2712 | Operands.push_back(getUDivExpr(Op, RHS)); |
2713 | return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); |
2714 | } |
2715 | /// Get a canonical UDivExpr for a recurrence. |
2716 | /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. |
2717 | // We can currently only fold X%N if X is constant. |
2718 | const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); |
2719 | if (StartC && !DivInt.urem(StepInt) && |
2720 | getZeroExtendExpr(AR, ExtTy) == |
2721 | getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), |
2722 | getZeroExtendExpr(Step, ExtTy), |
2723 | AR->getLoop(), SCEV::FlagAnyWrap)) { |
2724 | const APInt &StartInt = StartC->getAPInt(); |
2725 | const APInt &StartRem = StartInt.urem(StepInt); |
2726 | if (StartRem != 0) |
2727 | LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, |
2728 | AR->getLoop(), SCEV::FlagNW); |
2729 | } |
2730 | } |
2731 | // (A*B)/C --> A*(B/C) if safe and B/C can be folded. |
2732 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { |
2733 | SmallVector<const SCEV *, 4> Operands; |
2734 | for (const SCEV *Op : M->operands()) |
2735 | Operands.push_back(getZeroExtendExpr(Op, ExtTy)); |
2736 | if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) |
2737 | // Find an operand that's safely divisible. |
2738 | for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { |
2739 | const SCEV *Op = M->getOperand(i); |
2740 | const SCEV *Div = getUDivExpr(Op, RHSC); |
2741 | if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { |
2742 | Operands = SmallVector<const SCEV *, 4>(M->op_begin(), |
2743 | M->op_end()); |
2744 | Operands[i] = Div; |
2745 | return getMulExpr(Operands); |
2746 | } |
2747 | } |
2748 | } |
2749 | // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. |
2750 | if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { |
2751 | SmallVector<const SCEV *, 4> Operands; |
2752 | for (const SCEV *Op : A->operands()) |
2753 | Operands.push_back(getZeroExtendExpr(Op, ExtTy)); |
2754 | if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { |
2755 | Operands.clear(); |
2756 | for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { |
2757 | const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); |
2758 | if (isa<SCEVUDivExpr>(Op) || |
2759 | getMulExpr(Op, RHS) != A->getOperand(i)) |
2760 | break; |
2761 | Operands.push_back(Op); |
2762 | } |
2763 | if (Operands.size() == A->getNumOperands()) |
2764 | return getAddExpr(Operands); |
2765 | } |
2766 | } |
2767 | |
2768 | // Fold if both operands are constant. |
2769 | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { |
2770 | Constant *LHSCV = LHSC->getValue(); |
2771 | Constant *RHSCV = RHSC->getValue(); |
2772 | return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, |
2773 | RHSCV))); |
2774 | } |
2775 | } |
2776 | } |
2777 | |
2778 | FoldingSetNodeID ID; |
2779 | ID.AddInteger(scUDivExpr); |
2780 | ID.AddPointer(LHS); |
2781 | ID.AddPointer(RHS); |
2782 | void *IP = nullptr; |
2783 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
2784 | SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), |
2785 | LHS, RHS); |
2786 | UniqueSCEVs.InsertNode(S, IP); |
2787 | return S; |
2788 | } |
2789 | |
2790 | static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { |
2791 | APInt A = C1->getAPInt().abs(); |
2792 | APInt B = C2->getAPInt().abs(); |
2793 | uint32_t ABW = A.getBitWidth(); |
2794 | uint32_t BBW = B.getBitWidth(); |
2795 | |
2796 | if (ABW > BBW) |
2797 | B = B.zext(ABW); |
2798 | else if (ABW < BBW) |
2799 | A = A.zext(BBW); |
2800 | |
2801 | return APIntOps::GreatestCommonDivisor(A, B); |
2802 | } |
2803 | |
2804 | /// Get a canonical unsigned division expression, or something simpler if |
2805 | /// possible. There is no representation for an exact udiv in SCEV IR, but we |
2806 | /// can attempt to remove factors from the LHS and RHS. We can't do this when |
2807 | /// it's not exact because the udiv may be clearing bits. |
2808 | const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, |
2809 | const SCEV *RHS) { |
2810 | // TODO: we could try to find factors in all sorts of things, but for now we |
2811 | // just deal with u/exact (multiply, constant). See SCEVDivision towards the |
2812 | // end of this file for inspiration. |
2813 | |
2814 | const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS); |
2815 | if (!Mul) |
2816 | return getUDivExpr(LHS, RHS); |
2817 | |
2818 | if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) { |
2819 | // If the mulexpr multiplies by a constant, then that constant must be the |
2820 | // first element of the mulexpr. |
2821 | if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) { |
2822 | if (LHSCst == RHSCst) { |
2823 | SmallVector<const SCEV *, 2> Operands; |
2824 | Operands.append(Mul->op_begin() + 1, Mul->op_end()); |
2825 | return getMulExpr(Operands); |
2826 | } |
2827 | |
2828 | // We can't just assume that LHSCst divides RHSCst cleanly, it could be |
2829 | // that there's a factor provided by one of the other terms. We need to |
2830 | // check. |
2831 | APInt Factor = gcd(LHSCst, RHSCst); |
2832 | if (!Factor.isIntN(1)) { |
2833 | LHSCst = |
2834 | cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor))); |
2835 | RHSCst = |
2836 | cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor))); |
2837 | SmallVector<const SCEV *, 2> Operands; |
2838 | Operands.push_back(LHSCst); |
2839 | Operands.append(Mul->op_begin() + 1, Mul->op_end()); |
2840 | LHS = getMulExpr(Operands); |
2841 | RHS = RHSCst; |
2842 | Mul = dyn_cast<SCEVMulExpr>(LHS); |
2843 | if (!Mul) |
2844 | return getUDivExactExpr(LHS, RHS); |
2845 | } |
2846 | } |
2847 | } |
2848 | |
2849 | for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) { |
2850 | if (Mul->getOperand(i) == RHS) { |
2851 | SmallVector<const SCEV *, 2> Operands; |
2852 | Operands.append(Mul->op_begin(), Mul->op_begin() + i); |
2853 | Operands.append(Mul->op_begin() + i + 1, Mul->op_end()); |
2854 | return getMulExpr(Operands); |
2855 | } |
2856 | } |
2857 | |
2858 | return getUDivExpr(LHS, RHS); |
2859 | } |
2860 | |
2861 | /// Get an add recurrence expression for the specified loop. Simplify the |
2862 | /// expression as much as possible. |
2863 | const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, |
2864 | const Loop *L, |
2865 | SCEV::NoWrapFlags Flags) { |
2866 | SmallVector<const SCEV *, 4> Operands; |
2867 | Operands.push_back(Start); |
2868 | if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) |
2869 | if (StepChrec->getLoop() == L) { |
2870 | Operands.append(StepChrec->op_begin(), StepChrec->op_end()); |
2871 | return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); |
2872 | } |
2873 | |
2874 | Operands.push_back(Step); |
2875 | return getAddRecExpr(Operands, L, Flags); |
2876 | } |
2877 | |
2878 | /// Get an add recurrence expression for the specified loop. Simplify the |
2879 | /// expression as much as possible. |
2880 | const SCEV * |
2881 | ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, |
2882 | const Loop *L, SCEV::NoWrapFlags Flags) { |
2883 | if (Operands.size() == 1) return Operands[0]; |
2884 | #ifndef NDEBUG |
2885 | Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); |
2886 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) |
2887 | assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&((getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!") ? static_cast< void> (0) : __assert_fail ("getEffectiveSCEVType(Operands[i]->getType()) == ETy && \"SCEVAddRecExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2888, __PRETTY_FUNCTION__)) |
2888 | "SCEVAddRecExpr operand types don't match!")((getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!") ? static_cast< void> (0) : __assert_fail ("getEffectiveSCEVType(Operands[i]->getType()) == ETy && \"SCEVAddRecExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2888, __PRETTY_FUNCTION__)); |
2889 | for (unsigned i = 0, e = Operands.size(); i != e; ++i) |
2890 | assert(isLoopInvariant(Operands[i], L) &&((isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!" ) ? static_cast<void> (0) : __assert_fail ("isLoopInvariant(Operands[i], L) && \"SCEVAddRecExpr operand is not loop-invariant!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2891, __PRETTY_FUNCTION__)) |
2891 | "SCEVAddRecExpr operand is not loop-invariant!")((isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!" ) ? static_cast<void> (0) : __assert_fail ("isLoopInvariant(Operands[i], L) && \"SCEVAddRecExpr operand is not loop-invariant!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 2891, __PRETTY_FUNCTION__)); |
2892 | #endif |
2893 | |
2894 | if (Operands.back()->isZero()) { |
2895 | Operands.pop_back(); |
2896 | return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X |
2897 | } |
2898 | |
2899 | // It's tempting to want to call getMaxBackedgeTakenCount count here and |
2900 | // use that information to infer NUW and NSW flags. However, computing a |
2901 | // BE count requires calling getAddRecExpr, so we may not yet have a |
2902 | // meaningful BE count at this point (and if we don't, we'd be stuck |
2903 | // with a SCEVCouldNotCompute as the cached BE count). |
2904 | |
2905 | Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); |
2906 | |
2907 | // Canonicalize nested AddRecs in by nesting them in order of loop depth. |
2908 | if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { |
2909 | const Loop *NestedLoop = NestedAR->getLoop(); |
2910 | if (L->contains(NestedLoop) |
2911 | ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) |
2912 | : (!NestedLoop->contains(L) && |
2913 | DT.dominates(L->getHeader(), NestedLoop->getHeader()))) { |
2914 | SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), |
2915 | NestedAR->op_end()); |
2916 | Operands[0] = NestedAR->getStart(); |
2917 | // AddRecs require their operands be loop-invariant with respect to their |
2918 | // loops. Don't perform this transformation if it would break this |
2919 | // requirement. |
2920 | bool AllInvariant = all_of( |
2921 | Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); |
2922 | |
2923 | if (AllInvariant) { |
2924 | // Create a recurrence for the outer loop with the same step size. |
2925 | // |
2926 | // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the |
2927 | // inner recurrence has the same property. |
2928 | SCEV::NoWrapFlags OuterFlags = |
2929 | maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); |
2930 | |
2931 | NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); |
2932 | AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { |
2933 | return isLoopInvariant(Op, NestedLoop); |
2934 | }); |
2935 | |
2936 | if (AllInvariant) { |
2937 | // Ok, both add recurrences are valid after the transformation. |
2938 | // |
2939 | // The inner recurrence keeps its NW flag but only keeps NUW/NSW if |
2940 | // the outer recurrence has the same property. |
2941 | SCEV::NoWrapFlags InnerFlags = |
2942 | maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); |
2943 | return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); |
2944 | } |
2945 | } |
2946 | // Reset Operands to its original state. |
2947 | Operands[0] = NestedAR; |
2948 | } |
2949 | } |
2950 | |
2951 | // Okay, it looks like we really DO need an addrec expr. Check to see if we |
2952 | // already have one, otherwise create a new one. |
2953 | FoldingSetNodeID ID; |
2954 | ID.AddInteger(scAddRecExpr); |
2955 | for (unsigned i = 0, e = Operands.size(); i != e; ++i) |
2956 | ID.AddPointer(Operands[i]); |
2957 | ID.AddPointer(L); |
2958 | void *IP = nullptr; |
2959 | SCEVAddRecExpr *S = |
2960 | static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
2961 | if (!S) { |
2962 | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size()); |
2963 | std::uninitialized_copy(Operands.begin(), Operands.end(), O); |
2964 | S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), |
2965 | O, Operands.size(), L); |
2966 | UniqueSCEVs.InsertNode(S, IP); |
2967 | } |
2968 | S->setNoWrapFlags(Flags); |
2969 | return S; |
2970 | } |
2971 | |
2972 | const SCEV * |
2973 | ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr, |
2974 | const SmallVectorImpl<const SCEV *> &IndexExprs, |
2975 | bool InBounds) { |
2976 | // getSCEV(Base)->getType() has the same address space as Base->getType() |
2977 | // because SCEV::getType() preserves the address space. |
2978 | Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType()); |
2979 | // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP |
2980 | // instruction to its SCEV, because the Instruction may be guarded by control |
2981 | // flow and the no-overflow bits may not be valid for the expression in any |
2982 | // context. This can be fixed similarly to how these flags are handled for |
2983 | // adds. |
2984 | SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; |
2985 | |
2986 | const SCEV *TotalOffset = getZero(IntPtrTy); |
2987 | // The address space is unimportant. The first thing we do on CurTy is getting |
2988 | // its element type. |
2989 | Type *CurTy = PointerType::getUnqual(PointeeType); |
2990 | for (const SCEV *IndexExpr : IndexExprs) { |
2991 | // Compute the (potentially symbolic) offset in bytes for this index. |
2992 | if (StructType *STy = dyn_cast<StructType>(CurTy)) { |
2993 | // For a struct, add the member offset. |
2994 | ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue(); |
2995 | unsigned FieldNo = Index->getZExtValue(); |
2996 | const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); |
2997 | |
2998 | // Add the field offset to the running total offset. |
2999 | TotalOffset = getAddExpr(TotalOffset, FieldOffset); |
3000 | |
3001 | // Update CurTy to the type of the field at Index. |
3002 | CurTy = STy->getTypeAtIndex(Index); |
3003 | } else { |
3004 | // Update CurTy to its element type. |
3005 | CurTy = cast<SequentialType>(CurTy)->getElementType(); |
3006 | // For an array, add the element offset, explicitly scaled. |
3007 | const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy); |
3008 | // Getelementptr indices are signed. |
3009 | IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy); |
3010 | |
3011 | // Multiply the index by the element size to compute the element offset. |
3012 | const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap); |
3013 | |
3014 | // Add the element offset to the running total offset. |
3015 | TotalOffset = getAddExpr(TotalOffset, LocalOffset); |
3016 | } |
3017 | } |
3018 | |
3019 | // Add the total offset from all the GEP indices to the base. |
3020 | return getAddExpr(BaseExpr, TotalOffset, Wrap); |
3021 | } |
3022 | |
3023 | const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, |
3024 | const SCEV *RHS) { |
3025 | SmallVector<const SCEV *, 2> Ops; |
3026 | Ops.push_back(LHS); |
3027 | Ops.push_back(RHS); |
3028 | return getSMaxExpr(Ops); |
3029 | } |
3030 | |
3031 | const SCEV * |
3032 | ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { |
3033 | assert(!Ops.empty() && "Cannot get empty smax!")((!Ops.empty() && "Cannot get empty smax!") ? static_cast <void> (0) : __assert_fail ("!Ops.empty() && \"Cannot get empty smax!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3033, __PRETTY_FUNCTION__)); |
3034 | if (Ops.size() == 1) return Ops[0]; |
3035 | #ifndef NDEBUG |
3036 | Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); |
3037 | for (unsigned i = 1, e = Ops.size(); i != e; ++i) |
3038 | assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVSMaxExpr operand types don't match!") ? static_cast< void> (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVSMaxExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3039, __PRETTY_FUNCTION__)) |
3039 | "SCEVSMaxExpr operand types don't match!")((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVSMaxExpr operand types don't match!") ? static_cast< void> (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVSMaxExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3039, __PRETTY_FUNCTION__)); |
3040 | #endif |
3041 | |
3042 | // Sort by complexity, this groups all similar expression types together. |
3043 | GroupByComplexity(Ops, &LI); |
3044 | |
3045 | // If there are any constants, fold them together. |
3046 | unsigned Idx = 0; |
3047 | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { |
3048 | ++Idx; |
3049 | assert(Idx < Ops.size())((Idx < Ops.size()) ? static_cast<void> (0) : __assert_fail ("Idx < Ops.size()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3049, __PRETTY_FUNCTION__)); |
3050 | while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { |
3051 | // We found two constants, fold them together! |
3052 | ConstantInt *Fold = ConstantInt::get( |
3053 | getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); |
3054 | Ops[0] = getConstant(Fold); |
3055 | Ops.erase(Ops.begin()+1); // Erase the folded element |
3056 | if (Ops.size() == 1) return Ops[0]; |
3057 | LHSC = cast<SCEVConstant>(Ops[0]); |
3058 | } |
3059 | |
3060 | // If we are left with a constant minimum-int, strip it off. |
3061 | if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) { |
3062 | Ops.erase(Ops.begin()); |
3063 | --Idx; |
3064 | } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) { |
3065 | // If we have an smax with a constant maximum-int, it will always be |
3066 | // maximum-int. |
3067 | return Ops[0]; |
3068 | } |
3069 | |
3070 | if (Ops.size() == 1) return Ops[0]; |
3071 | } |
3072 | |
3073 | // Find the first SMax |
3074 | while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) |
3075 | ++Idx; |
3076 | |
3077 | // Check to see if one of the operands is an SMax. If so, expand its operands |
3078 | // onto our operand list, and recurse to simplify. |
3079 | if (Idx < Ops.size()) { |
3080 | bool DeletedSMax = false; |
3081 | while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) { |
3082 | Ops.erase(Ops.begin()+Idx); |
3083 | Ops.append(SMax->op_begin(), SMax->op_end()); |
3084 | DeletedSMax = true; |
3085 | } |
3086 | |
3087 | if (DeletedSMax) |
3088 | return getSMaxExpr(Ops); |
3089 | } |
3090 | |
3091 | // Okay, check to see if the same value occurs in the operand list twice. If |
3092 | // so, delete one. Since we sorted the list, these values are required to |
3093 | // be adjacent. |
3094 | for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) |
3095 | // X smax Y smax Y --> X smax Y |
3096 | // X smax Y --> X, if X is always greater than Y |
3097 | if (Ops[i] == Ops[i+1] || |
3098 | isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { |
3099 | Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); |
3100 | --i; --e; |
3101 | } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { |
3102 | Ops.erase(Ops.begin()+i, Ops.begin()+i+1); |
3103 | --i; --e; |
3104 | } |
3105 | |
3106 | if (Ops.size() == 1) return Ops[0]; |
3107 | |
3108 | assert(!Ops.empty() && "Reduced smax down to nothing!")((!Ops.empty() && "Reduced smax down to nothing!") ? static_cast <void> (0) : __assert_fail ("!Ops.empty() && \"Reduced smax down to nothing!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3108, __PRETTY_FUNCTION__)); |
3109 | |
3110 | // Okay, it looks like we really DO need an smax expr. Check to see if we |
3111 | // already have one, otherwise create a new one. |
3112 | FoldingSetNodeID ID; |
3113 | ID.AddInteger(scSMaxExpr); |
3114 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) |
3115 | ID.AddPointer(Ops[i]); |
3116 | void *IP = nullptr; |
3117 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
3118 | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
3119 | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
3120 | SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), |
3121 | O, Ops.size()); |
3122 | UniqueSCEVs.InsertNode(S, IP); |
3123 | return S; |
3124 | } |
3125 | |
3126 | const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, |
3127 | const SCEV *RHS) { |
3128 | SmallVector<const SCEV *, 2> Ops; |
3129 | Ops.push_back(LHS); |
3130 | Ops.push_back(RHS); |
3131 | return getUMaxExpr(Ops); |
3132 | } |
3133 | |
3134 | const SCEV * |
3135 | ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { |
3136 | assert(!Ops.empty() && "Cannot get empty umax!")((!Ops.empty() && "Cannot get empty umax!") ? static_cast <void> (0) : __assert_fail ("!Ops.empty() && \"Cannot get empty umax!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3136, __PRETTY_FUNCTION__)); |
3137 | if (Ops.size() == 1) return Ops[0]; |
3138 | #ifndef NDEBUG |
3139 | Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); |
3140 | for (unsigned i = 1, e = Ops.size(); i != e; ++i) |
3141 | assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVUMaxExpr operand types don't match!") ? static_cast< void> (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVUMaxExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3142, __PRETTY_FUNCTION__)) |
3142 | "SCEVUMaxExpr operand types don't match!")((getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVUMaxExpr operand types don't match!") ? static_cast< void> (0) : __assert_fail ("getEffectiveSCEVType(Ops[i]->getType()) == ETy && \"SCEVUMaxExpr operand types don't match!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3142, __PRETTY_FUNCTION__)); |
3143 | #endif |
3144 | |
3145 | // Sort by complexity, this groups all similar expression types together. |
3146 | GroupByComplexity(Ops, &LI); |
3147 | |
3148 | // If there are any constants, fold them together. |
3149 | unsigned Idx = 0; |
3150 | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { |
3151 | ++Idx; |
3152 | assert(Idx < Ops.size())((Idx < Ops.size()) ? static_cast<void> (0) : __assert_fail ("Idx < Ops.size()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3152, __PRETTY_FUNCTION__)); |
3153 | while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { |
3154 | // We found two constants, fold them together! |
3155 | ConstantInt *Fold = ConstantInt::get( |
3156 | getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); |
3157 | Ops[0] = getConstant(Fold); |
3158 | Ops.erase(Ops.begin()+1); // Erase the folded element |
3159 | if (Ops.size() == 1) return Ops[0]; |
3160 | LHSC = cast<SCEVConstant>(Ops[0]); |
3161 | } |
3162 | |
3163 | // If we are left with a constant minimum-int, strip it off. |
3164 | if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) { |
3165 | Ops.erase(Ops.begin()); |
3166 | --Idx; |
3167 | } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) { |
3168 | // If we have an umax with a constant maximum-int, it will always be |
3169 | // maximum-int. |
3170 | return Ops[0]; |
3171 | } |
3172 | |
3173 | if (Ops.size() == 1) return Ops[0]; |
3174 | } |
3175 | |
3176 | // Find the first UMax |
3177 | while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) |
3178 | ++Idx; |
3179 | |
3180 | // Check to see if one of the operands is a UMax. If so, expand its operands |
3181 | // onto our operand list, and recurse to simplify. |
3182 | if (Idx < Ops.size()) { |
3183 | bool DeletedUMax = false; |
3184 | while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) { |
3185 | Ops.erase(Ops.begin()+Idx); |
3186 | Ops.append(UMax->op_begin(), UMax->op_end()); |
3187 | DeletedUMax = true; |
3188 | } |
3189 | |
3190 | if (DeletedUMax) |
3191 | return getUMaxExpr(Ops); |
3192 | } |
3193 | |
3194 | // Okay, check to see if the same value occurs in the operand list twice. If |
3195 | // so, delete one. Since we sorted the list, these values are required to |
3196 | // be adjacent. |
3197 | for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) |
3198 | // X umax Y umax Y --> X umax Y |
3199 | // X umax Y --> X, if X is always greater than Y |
3200 | if (Ops[i] == Ops[i+1] || |
3201 | isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { |
3202 | Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); |
3203 | --i; --e; |
3204 | } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { |
3205 | Ops.erase(Ops.begin()+i, Ops.begin()+i+1); |
3206 | --i; --e; |
3207 | } |
3208 | |
3209 | if (Ops.size() == 1) return Ops[0]; |
3210 | |
3211 | assert(!Ops.empty() && "Reduced umax down to nothing!")((!Ops.empty() && "Reduced umax down to nothing!") ? static_cast <void> (0) : __assert_fail ("!Ops.empty() && \"Reduced umax down to nothing!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3211, __PRETTY_FUNCTION__)); |
3212 | |
3213 | // Okay, it looks like we really DO need a umax expr. Check to see if we |
3214 | // already have one, otherwise create a new one. |
3215 | FoldingSetNodeID ID; |
3216 | ID.AddInteger(scUMaxExpr); |
3217 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) |
3218 | ID.AddPointer(Ops[i]); |
3219 | void *IP = nullptr; |
3220 | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; |
3221 | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
3222 | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
3223 | SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), |
3224 | O, Ops.size()); |
3225 | UniqueSCEVs.InsertNode(S, IP); |
3226 | return S; |
3227 | } |
3228 | |
3229 | const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, |
3230 | const SCEV *RHS) { |
3231 | // ~smax(~x, ~y) == smin(x, y). |
3232 | return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); |
3233 | } |
3234 | |
3235 | const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, |
3236 | const SCEV *RHS) { |
3237 | // ~umax(~x, ~y) == umin(x, y) |
3238 | return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); |
3239 | } |
3240 | |
3241 | const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { |
3242 | // We can bypass creating a target-independent |
3243 | // constant expression and then folding it back into a ConstantInt. |
3244 | // This is just a compile-time optimization. |
3245 | return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); |
3246 | } |
3247 | |
3248 | const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, |
3249 | StructType *STy, |
3250 | unsigned FieldNo) { |
3251 | // We can bypass creating a target-independent |
3252 | // constant expression and then folding it back into a ConstantInt. |
3253 | // This is just a compile-time optimization. |
3254 | return getConstant( |
3255 | IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); |
3256 | } |
3257 | |
3258 | const SCEV *ScalarEvolution::getUnknown(Value *V) { |
3259 | // Don't attempt to do anything other than create a SCEVUnknown object |
3260 | // here. createSCEV only calls getUnknown after checking for all other |
3261 | // interesting possibilities, and any other code that calls getUnknown |
3262 | // is doing so in order to hide a value from SCEV canonicalization. |
3263 | |
3264 | FoldingSetNodeID ID; |
3265 | ID.AddInteger(scUnknown); |
3266 | ID.AddPointer(V); |
3267 | void *IP = nullptr; |
3268 | if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { |
3269 | assert(cast<SCEVUnknown>(S)->getValue() == V &&((cast<SCEVUnknown>(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!" ) ? static_cast<void> (0) : __assert_fail ("cast<SCEVUnknown>(S)->getValue() == V && \"Stale SCEVUnknown in uniquing map!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3270, __PRETTY_FUNCTION__)) |
3270 | "Stale SCEVUnknown in uniquing map!")((cast<SCEVUnknown>(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!" ) ? static_cast<void> (0) : __assert_fail ("cast<SCEVUnknown>(S)->getValue() == V && \"Stale SCEVUnknown in uniquing map!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3270, __PRETTY_FUNCTION__)); |
3271 | return S; |
3272 | } |
3273 | SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, |
3274 | FirstUnknown); |
3275 | FirstUnknown = cast<SCEVUnknown>(S); |
3276 | UniqueSCEVs.InsertNode(S, IP); |
3277 | return S; |
3278 | } |
3279 | |
3280 | //===----------------------------------------------------------------------===// |
3281 | // Basic SCEV Analysis and PHI Idiom Recognition Code |
3282 | // |
3283 | |
3284 | /// Test if values of the given type are analyzable within the SCEV |
3285 | /// framework. This primarily includes integer types, and it can optionally |
3286 | /// include pointer types if the ScalarEvolution class has access to |
3287 | /// target-specific information. |
3288 | bool ScalarEvolution::isSCEVable(Type *Ty) const { |
3289 | // Integers and pointers are always SCEVable. |
3290 | return Ty->isIntegerTy() || Ty->isPointerTy(); |
3291 | } |
3292 | |
3293 | /// Return the size in bits of the specified type, for which isSCEVable must |
3294 | /// return true. |
3295 | uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { |
3296 | assert(isSCEVable(Ty) && "Type is not SCEVable!")((isSCEVable(Ty) && "Type is not SCEVable!") ? static_cast <void> (0) : __assert_fail ("isSCEVable(Ty) && \"Type is not SCEVable!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3296, __PRETTY_FUNCTION__)); |
3297 | return getDataLayout().getTypeSizeInBits(Ty); |
3298 | } |
3299 | |
3300 | /// Return a type with the same bitwidth as the given type and which represents |
3301 | /// how SCEV will treat the given type, for which isSCEVable must return |
3302 | /// true. For pointer types, this is the pointer-sized integer type. |
3303 | Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { |
3304 | assert(isSCEVable(Ty) && "Type is not SCEVable!")((isSCEVable(Ty) && "Type is not SCEVable!") ? static_cast <void> (0) : __assert_fail ("isSCEVable(Ty) && \"Type is not SCEVable!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3304, __PRETTY_FUNCTION__)); |
3305 | |
3306 | if (Ty->isIntegerTy()) |
3307 | return Ty; |
3308 | |
3309 | // The only other support type is pointer. |
3310 | assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!")((Ty->isPointerTy() && "Unexpected non-pointer non-integer type!" ) ? static_cast<void> (0) : __assert_fail ("Ty->isPointerTy() && \"Unexpected non-pointer non-integer type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3310, __PRETTY_FUNCTION__)); |
3311 | return getDataLayout().getIntPtrType(Ty); |
3312 | } |
3313 | |
3314 | const SCEV *ScalarEvolution::getCouldNotCompute() { |
3315 | return CouldNotCompute.get(); |
3316 | } |
3317 | |
3318 | |
3319 | bool ScalarEvolution::checkValidity(const SCEV *S) const { |
3320 | // Helper class working with SCEVTraversal to figure out if a SCEV contains |
3321 | // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne |
3322 | // is set iff if find such SCEVUnknown. |
3323 | // |
3324 | struct FindInvalidSCEVUnknown { |
3325 | bool FindOne; |
3326 | FindInvalidSCEVUnknown() { FindOne = false; } |
3327 | bool follow(const SCEV *S) { |
3328 | switch (static_cast<SCEVTypes>(S->getSCEVType())) { |
3329 | case scConstant: |
3330 | return false; |
3331 | case scUnknown: |
3332 | if (!cast<SCEVUnknown>(S)->getValue()) |
3333 | FindOne = true; |
3334 | return false; |
3335 | default: |
3336 | return true; |
3337 | } |
3338 | } |
3339 | bool isDone() const { return FindOne; } |
3340 | }; |
3341 | |
3342 | FindInvalidSCEVUnknown F; |
3343 | SCEVTraversal<FindInvalidSCEVUnknown> ST(F); |
3344 | ST.visitAll(S); |
3345 | |
3346 | return !F.FindOne; |
3347 | } |
3348 | |
3349 | namespace { |
3350 | // Helper class working with SCEVTraversal to figure out if a SCEV contains |
3351 | // a sub SCEV of scAddRecExpr type. FindInvalidSCEVUnknown::FoundOne is set |
3352 | // iff if such sub scAddRecExpr type SCEV is found. |
3353 | struct FindAddRecurrence { |
3354 | bool FoundOne; |
3355 | FindAddRecurrence() : FoundOne(false) {} |
3356 | |
3357 | bool follow(const SCEV *S) { |
3358 | switch (static_cast<SCEVTypes>(S->getSCEVType())) { |
3359 | case scAddRecExpr: |
3360 | FoundOne = true; |
3361 | case scConstant: |
3362 | case scUnknown: |
3363 | case scCouldNotCompute: |
3364 | return false; |
3365 | default: |
3366 | return true; |
3367 | } |
3368 | } |
3369 | bool isDone() const { return FoundOne; } |
3370 | }; |
3371 | } |
3372 | |
3373 | bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { |
3374 | HasRecMapType::iterator I = HasRecMap.find_as(S); |
3375 | if (I != HasRecMap.end()) |
3376 | return I->second; |
3377 | |
3378 | FindAddRecurrence F; |
3379 | SCEVTraversal<FindAddRecurrence> ST(F); |
3380 | ST.visitAll(S); |
3381 | HasRecMap.insert({S, F.FoundOne}); |
3382 | return F.FoundOne; |
3383 | } |
3384 | |
3385 | /// Return the Value set from S. |
3386 | SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) { |
3387 | ExprValueMapType::iterator SI = ExprValueMap.find_as(S); |
3388 | if (SI == ExprValueMap.end()) |
3389 | return nullptr; |
3390 | #ifndef NDEBUG |
3391 | if (VerifySCEVMap) { |
3392 | // Check there is no dangling Value in the set returned. |
3393 | for (const auto &VE : SI->second) |
3394 | assert(ValueExprMap.count(VE))((ValueExprMap.count(VE)) ? static_cast<void> (0) : __assert_fail ("ValueExprMap.count(VE)", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3394, __PRETTY_FUNCTION__)); |
3395 | } |
3396 | #endif |
3397 | return &SI->second; |
3398 | } |
3399 | |
3400 | /// Erase Value from ValueExprMap and ExprValueMap. If ValueExprMap.erase(V) is |
3401 | /// not used together with forgetMemoizedResults(S), eraseValueFromMap should be |
3402 | /// used instead to ensure whenever V->S is removed from ValueExprMap, V is also |
3403 | /// removed from the set of ExprValueMap[S]. |
3404 | void ScalarEvolution::eraseValueFromMap(Value *V) { |
3405 | ValueExprMapType::iterator I = ValueExprMap.find_as(V); |
3406 | if (I != ValueExprMap.end()) { |
3407 | const SCEV *S = I->second; |
3408 | SetVector<Value *> *SV = getSCEVValues(S); |
3409 | // Remove V from the set of ExprValueMap[S] |
3410 | if (SV) |
3411 | SV->remove(V); |
3412 | ValueExprMap.erase(V); |
3413 | } |
3414 | } |
3415 | |
3416 | /// Return an existing SCEV if it exists, otherwise analyze the expression and |
3417 | /// create a new one. |
3418 | const SCEV *ScalarEvolution::getSCEV(Value *V) { |
3419 | assert(isSCEVable(V->getType()) && "Value is not SCEVable!")((isSCEVable(V->getType()) && "Value is not SCEVable!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(V->getType()) && \"Value is not SCEVable!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3419, __PRETTY_FUNCTION__)); |
3420 | |
3421 | const SCEV *S = getExistingSCEV(V); |
3422 | if (S == nullptr) { |
3423 | S = createSCEV(V); |
3424 | // During PHI resolution, it is possible to create two SCEVs for the same |
3425 | // V, so it is needed to double check whether V->S is inserted into |
3426 | // ValueExprMap before insert S->V into ExprValueMap. |
3427 | std::pair<ValueExprMapType::iterator, bool> Pair = |
3428 | ValueExprMap.insert({SCEVCallbackVH(V, this), S}); |
3429 | if (Pair.second) |
3430 | ExprValueMap[S].insert(V); |
3431 | } |
3432 | return S; |
3433 | } |
3434 | |
3435 | const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { |
3436 | assert(isSCEVable(V->getType()) && "Value is not SCEVable!")((isSCEVable(V->getType()) && "Value is not SCEVable!" ) ? static_cast<void> (0) : __assert_fail ("isSCEVable(V->getType()) && \"Value is not SCEVable!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3436, __PRETTY_FUNCTION__)); |
3437 | |
3438 | ValueExprMapType::iterator I = ValueExprMap.find_as(V); |
3439 | if (I != ValueExprMap.end()) { |
3440 | const SCEV *S = I->second; |
3441 | if (checkValidity(S)) |
3442 | return S; |
3443 | forgetMemoizedResults(S); |
3444 | ValueExprMap.erase(I); |
3445 | } |
3446 | return nullptr; |
3447 | } |
3448 | |
3449 | /// Return a SCEV corresponding to -V = -1*V |
3450 | /// |
3451 | const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, |
3452 | SCEV::NoWrapFlags Flags) { |
3453 | if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) |
3454 | return getConstant( |
3455 | cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); |
3456 | |
3457 | Type *Ty = V->getType(); |
3458 | Ty = getEffectiveSCEVType(Ty); |
3459 | return getMulExpr( |
3460 | V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags); |
3461 | } |
3462 | |
3463 | /// Return a SCEV corresponding to ~V = -1-V |
3464 | const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { |
3465 | if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) |
3466 | return getConstant( |
3467 | cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); |
3468 | |
3469 | Type *Ty = V->getType(); |
3470 | Ty = getEffectiveSCEVType(Ty); |
3471 | const SCEV *AllOnes = |
3472 | getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); |
3473 | return getMinusSCEV(AllOnes, V); |
3474 | } |
3475 | |
3476 | const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, |
3477 | SCEV::NoWrapFlags Flags) { |
3478 | // Fast path: X - X --> 0. |
3479 | if (LHS == RHS) |
3480 | return getZero(LHS->getType()); |
3481 | |
3482 | // We represent LHS - RHS as LHS + (-1)*RHS. This transformation |
3483 | // makes it so that we cannot make much use of NUW. |
3484 | auto AddFlags = SCEV::FlagAnyWrap; |
3485 | const bool RHSIsNotMinSigned = |
3486 | !getSignedRange(RHS).getSignedMin().isMinSignedValue(); |
3487 | if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { |
3488 | // Let M be the minimum representable signed value. Then (-1)*RHS |
3489 | // signed-wraps if and only if RHS is M. That can happen even for |
3490 | // a NSW subtraction because e.g. (-1)*M signed-wraps even though |
3491 | // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + |
3492 | // (-1)*RHS, we need to prove that RHS != M. |
3493 | // |
3494 | // If LHS is non-negative and we know that LHS - RHS does not |
3495 | // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap |
3496 | // either by proving that RHS > M or that LHS >= 0. |
3497 | if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) { |
3498 | AddFlags = SCEV::FlagNSW; |
3499 | } |
3500 | } |
3501 | |
3502 | // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - |
3503 | // RHS is NSW and LHS >= 0. |
3504 | // |
3505 | // The difficulty here is that the NSW flag may have been proven |
3506 | // relative to a loop that is to be found in a recurrence in LHS and |
3507 | // not in RHS. Applying NSW to (-1)*M may then let the NSW have a |
3508 | // larger scope than intended. |
3509 | auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; |
3510 | |
3511 | return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags); |
3512 | } |
3513 | |
3514 | const SCEV * |
3515 | ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { |
3516 | Type *SrcTy = V->getType(); |
3517 | assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3519, __PRETTY_FUNCTION__)) |
3518 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3519, __PRETTY_FUNCTION__)) |
3519 | "Cannot truncate or zero extend with non-integer arguments!")(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3519, __PRETTY_FUNCTION__)); |
3520 | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
3521 | return V; // No conversion |
3522 | if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) |
3523 | return getTruncateExpr(V, Ty); |
3524 | return getZeroExtendExpr(V, Ty); |
3525 | } |
3526 | |
3527 | const SCEV * |
3528 | ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, |
3529 | Type *Ty) { |
3530 | Type *SrcTy = V->getType(); |
3531 | assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3533, __PRETTY_FUNCTION__)) |
3532 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3533, __PRETTY_FUNCTION__)) |
3533 | "Cannot truncate or zero extend with non-integer arguments!")(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3533, __PRETTY_FUNCTION__)); |
3534 | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
3535 | return V; // No conversion |
3536 | if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) |
3537 | return getTruncateExpr(V, Ty); |
3538 | return getSignExtendExpr(V, Ty); |
3539 | } |
3540 | |
3541 | const SCEV * |
3542 | ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { |
3543 | Type *SrcTy = V->getType(); |
3544 | assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3546, __PRETTY_FUNCTION__)) |
3545 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3546, __PRETTY_FUNCTION__)) |
3546 | "Cannot noop or zero extend with non-integer arguments!")(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or zero extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or zero extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3546, __PRETTY_FUNCTION__)); |
3547 | assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&((getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!") ? static_cast<void > (0) : __assert_fail ("getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && \"getNoopOrZeroExtend cannot truncate!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3548, __PRETTY_FUNCTION__)) |
3548 | "getNoopOrZeroExtend cannot truncate!")((getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!") ? static_cast<void > (0) : __assert_fail ("getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && \"getNoopOrZeroExtend cannot truncate!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3548, __PRETTY_FUNCTION__)); |
3549 | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
3550 | return V; // No conversion |
3551 | return getZeroExtendExpr(V, Ty); |
3552 | } |
3553 | |
3554 | const SCEV * |
3555 | ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { |
3556 | Type *SrcTy = V->getType(); |
3557 | assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or sign extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or sign extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3559, __PRETTY_FUNCTION__)) |
3558 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or sign extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or sign extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3559, __PRETTY_FUNCTION__)) |
3559 | "Cannot noop or sign extend with non-integer arguments!")(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or sign extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or sign extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3559, __PRETTY_FUNCTION__)); |
3560 | assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&((getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!") ? static_cast<void > (0) : __assert_fail ("getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && \"getNoopOrSignExtend cannot truncate!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3561, __PRETTY_FUNCTION__)) |
3561 | "getNoopOrSignExtend cannot truncate!")((getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!") ? static_cast<void > (0) : __assert_fail ("getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && \"getNoopOrSignExtend cannot truncate!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3561, __PRETTY_FUNCTION__)); |
3562 | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
3563 | return V; // No conversion |
3564 | return getSignExtendExpr(V, Ty); |
3565 | } |
3566 | |
3567 | const SCEV * |
3568 | ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { |
3569 | Type *SrcTy = V->getType(); |
3570 | assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or any extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or any extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3572, __PRETTY_FUNCTION__)) |
3571 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or any extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or any extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3572, __PRETTY_FUNCTION__)) |
3572 | "Cannot noop or any extend with non-integer arguments!")(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or any extend with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot noop or any extend with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3572, __PRETTY_FUNCTION__)); |
3573 | assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&((getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!") ? static_cast<void > (0) : __assert_fail ("getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && \"getNoopOrAnyExtend cannot truncate!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3574, __PRETTY_FUNCTION__)) |
3574 | "getNoopOrAnyExtend cannot truncate!")((getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!") ? static_cast<void > (0) : __assert_fail ("getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && \"getNoopOrAnyExtend cannot truncate!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3574, __PRETTY_FUNCTION__)); |
3575 | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
3576 | return V; // No conversion |
3577 | return getAnyExtendExpr(V, Ty); |
3578 | } |
3579 | |
3580 | const SCEV * |
3581 | ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { |
3582 | Type *SrcTy = V->getType(); |
3583 | assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or noop with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or noop with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3585, __PRETTY_FUNCTION__)) |
3584 | (Ty->isIntegerTy() || Ty->isPointerTy()) &&(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or noop with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or noop with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3585, __PRETTY_FUNCTION__)) |
3585 | "Cannot truncate or noop with non-integer arguments!")(((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or noop with non-integer arguments!" ) ? static_cast<void> (0) : __assert_fail ("(SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && \"Cannot truncate or noop with non-integer arguments!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3585, __PRETTY_FUNCTION__)); |
3586 | assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&((getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!") ? static_cast<void> (0) : __assert_fail ("getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && \"getTruncateOrNoop cannot extend!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3587, __PRETTY_FUNCTION__)) |
3587 | "getTruncateOrNoop cannot extend!")((getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!") ? static_cast<void> (0) : __assert_fail ("getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && \"getTruncateOrNoop cannot extend!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3587, __PRETTY_FUNCTION__)); |
3588 | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
3589 | return V; // No conversion |
3590 | return getTruncateExpr(V, Ty); |
3591 | } |
3592 | |
3593 | const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, |
3594 | const SCEV *RHS) { |
3595 | const SCEV *PromotedLHS = LHS; |
3596 | const SCEV *PromotedRHS = RHS; |
3597 | |
3598 | if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) |
3599 | PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); |
3600 | else |
3601 | PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); |
3602 | |
3603 | return getUMaxExpr(PromotedLHS, PromotedRHS); |
3604 | } |
3605 | |
3606 | const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, |
3607 | const SCEV *RHS) { |
3608 | const SCEV *PromotedLHS = LHS; |
3609 | const SCEV *PromotedRHS = RHS; |
3610 | |
3611 | if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) |
3612 | PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); |
3613 | else |
3614 | PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); |
3615 | |
3616 | return getUMinExpr(PromotedLHS, PromotedRHS); |
3617 | } |
3618 | |
3619 | const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { |
3620 | // A pointer operand may evaluate to a nonpointer expression, such as null. |
3621 | if (!V->getType()->isPointerTy()) |
3622 | return V; |
3623 | |
3624 | if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { |
3625 | return getPointerBase(Cast->getOperand()); |
3626 | } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { |
3627 | const SCEV *PtrOp = nullptr; |
3628 | for (const SCEV *NAryOp : NAry->operands()) { |
3629 | if (NAryOp->getType()->isPointerTy()) { |
3630 | // Cannot find the base of an expression with multiple pointer operands. |
3631 | if (PtrOp) |
3632 | return V; |
3633 | PtrOp = NAryOp; |
3634 | } |
3635 | } |
3636 | if (!PtrOp) |
3637 | return V; |
3638 | return getPointerBase(PtrOp); |
3639 | } |
3640 | return V; |
3641 | } |
3642 | |
3643 | /// Push users of the given Instruction onto the given Worklist. |
3644 | static void |
3645 | PushDefUseChildren(Instruction *I, |
3646 | SmallVectorImpl<Instruction *> &Worklist) { |
3647 | // Push the def-use children onto the Worklist stack. |
3648 | for (User *U : I->users()) |
3649 | Worklist.push_back(cast<Instruction>(U)); |
3650 | } |
3651 | |
3652 | void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { |
3653 | SmallVector<Instruction *, 16> Worklist; |
3654 | PushDefUseChildren(PN, Worklist); |
3655 | |
3656 | SmallPtrSet<Instruction *, 8> Visited; |
3657 | Visited.insert(PN); |
3658 | while (!Worklist.empty()) { |
3659 | Instruction *I = Worklist.pop_back_val(); |
3660 | if (!Visited.insert(I).second) |
3661 | continue; |
3662 | |
3663 | auto It = ValueExprMap.find_as(static_cast<Value *>(I)); |
3664 | if (It != ValueExprMap.end()) { |
3665 | const SCEV *Old = It->second; |
3666 | |
3667 | // Short-circuit the def-use traversal if the symbolic name |
3668 | // ceases to appear in expressions. |
3669 | if (Old != SymName && !hasOperand(Old, SymName)) |
3670 | continue; |
3671 | |
3672 | // SCEVUnknown for a PHI either means that it has an unrecognized |
3673 | // structure, it's a PHI that's in the progress of being computed |
3674 | // by createNodeForPHI, or it's a single-value PHI. In the first case, |
3675 | // additional loop trip count information isn't going to change anything. |
3676 | // In the second case, createNodeForPHI will perform the necessary |
3677 | // updates on its own when it gets to that point. In the third, we do |
3678 | // want to forget the SCEVUnknown. |
3679 | if (!isa<PHINode>(I) || |
3680 | !isa<SCEVUnknown>(Old) || |
3681 | (I != PN && Old == SymName)) { |
3682 | forgetMemoizedResults(Old); |
3683 | ValueExprMap.erase(It); |
3684 | } |
3685 | } |
3686 | |
3687 | PushDefUseChildren(I, Worklist); |
3688 | } |
3689 | } |
3690 | |
3691 | namespace { |
3692 | class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> { |
3693 | public: |
3694 | static const SCEV *rewrite(const SCEV *S, const Loop *L, |
3695 | ScalarEvolution &SE) { |
3696 | SCEVInitRewriter Rewriter(L, SE); |
3697 | const SCEV *Result = Rewriter.visit(S); |
3698 | return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); |
3699 | } |
3700 | |
3701 | SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) |
3702 | : SCEVRewriteVisitor(SE), L(L), Valid(true) {} |
3703 | |
3704 | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
3705 | if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) |
3706 | Valid = false; |
3707 | return Expr; |
3708 | } |
3709 | |
3710 | const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { |
3711 | // Only allow AddRecExprs for this loop. |
3712 | if (Expr->getLoop() == L) |
3713 | return Expr->getStart(); |
3714 | Valid = false; |
3715 | return Expr; |
3716 | } |
3717 | |
3718 | bool isValid() { return Valid; } |
3719 | |
3720 | private: |
3721 | const Loop *L; |
3722 | bool Valid; |
3723 | }; |
3724 | |
3725 | class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> { |
3726 | public: |
3727 | static const SCEV *rewrite(const SCEV *S, const Loop *L, |
3728 | ScalarEvolution &SE) { |
3729 | SCEVShiftRewriter Rewriter(L, SE); |
3730 | const SCEV *Result = Rewriter.visit(S); |
3731 | return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); |
3732 | } |
3733 | |
3734 | SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) |
3735 | : SCEVRewriteVisitor(SE), L(L), Valid(true) {} |
3736 | |
3737 | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
3738 | // Only allow AddRecExprs for this loop. |
3739 | if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) |
3740 | Valid = false; |
3741 | return Expr; |
3742 | } |
3743 | |
3744 | const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { |
3745 | if (Expr->getLoop() == L && Expr->isAffine()) |
3746 | return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); |
3747 | Valid = false; |
3748 | return Expr; |
3749 | } |
3750 | bool isValid() { return Valid; } |
3751 | |
3752 | private: |
3753 | const Loop *L; |
3754 | bool Valid; |
3755 | }; |
3756 | } // end anonymous namespace |
3757 | |
3758 | SCEV::NoWrapFlags |
3759 | ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { |
3760 | if (!AR->isAffine()) |
3761 | return SCEV::FlagAnyWrap; |
3762 | |
3763 | typedef OverflowingBinaryOperator OBO; |
3764 | SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; |
3765 | |
3766 | if (!AR->hasNoSignedWrap()) { |
3767 | ConstantRange AddRecRange = getSignedRange(AR); |
3768 | ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); |
3769 | |
3770 | auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
3771 | Instruction::Add, IncRange, OBO::NoSignedWrap); |
3772 | if (NSWRegion.contains(AddRecRange)) |
3773 | Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW); |
3774 | } |
3775 | |
3776 | if (!AR->hasNoUnsignedWrap()) { |
3777 | ConstantRange AddRecRange = getUnsignedRange(AR); |
3778 | ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this)); |
3779 | |
3780 | auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
3781 | Instruction::Add, IncRange, OBO::NoUnsignedWrap); |
3782 | if (NUWRegion.contains(AddRecRange)) |
3783 | Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW); |
3784 | } |
3785 | |
3786 | return Result; |
3787 | } |
3788 | |
3789 | namespace { |
3790 | /// Represents an abstract binary operation. This may exist as a |
3791 | /// normal instruction or constant expression, or may have been |
3792 | /// derived from an expression tree. |
3793 | struct BinaryOp { |
3794 | unsigned Opcode; |
3795 | Value *LHS; |
3796 | Value *RHS; |
3797 | bool IsNSW; |
3798 | bool IsNUW; |
3799 | |
3800 | /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or |
3801 | /// constant expression. |
3802 | Operator *Op; |
3803 | |
3804 | explicit BinaryOp(Operator *Op) |
3805 | : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)), |
3806 | IsNSW(false), IsNUW(false), Op(Op) { |
3807 | if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) { |
3808 | IsNSW = OBO->hasNoSignedWrap(); |
3809 | IsNUW = OBO->hasNoUnsignedWrap(); |
3810 | } |
3811 | } |
3812 | |
3813 | explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, |
3814 | bool IsNUW = false) |
3815 | : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW), |
3816 | Op(nullptr) {} |
3817 | }; |
3818 | } |
3819 | |
3820 | |
3821 | /// Try to map \p V into a BinaryOp, and return \c None on failure. |
3822 | static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { |
3823 | auto *Op = dyn_cast<Operator>(V); |
3824 | if (!Op) |
3825 | return None; |
3826 | |
3827 | // Implementation detail: all the cleverness here should happen without |
3828 | // creating new SCEV expressions -- our caller knowns tricks to avoid creating |
3829 | // SCEV expressions when possible, and we should not break that. |
3830 | |
3831 | switch (Op->getOpcode()) { |
3832 | case Instruction::Add: |
3833 | case Instruction::Sub: |
3834 | case Instruction::Mul: |
3835 | case Instruction::UDiv: |
3836 | case Instruction::And: |
3837 | case Instruction::Or: |
3838 | case Instruction::AShr: |
3839 | case Instruction::Shl: |
3840 | return BinaryOp(Op); |
3841 | |
3842 | case Instruction::Xor: |
3843 | if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1))) |
3844 | // If the RHS of the xor is a signbit, then this is just an add. |
3845 | // Instcombine turns add of signbit into xor as a strength reduction step. |
3846 | if (RHSC->getValue().isSignBit()) |
3847 | return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); |
3848 | return BinaryOp(Op); |
3849 | |
3850 | case Instruction::LShr: |
3851 | // Turn logical shift right of a constant into a unsigned divide. |
3852 | if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) { |
3853 | uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth(); |
3854 | |
3855 | // If the shift count is not less than the bitwidth, the result of |
3856 | // the shift is undefined. Don't try to analyze it, because the |
3857 | // resolution chosen here may differ from the resolution chosen in |
3858 | // other parts of the compiler. |
3859 | if (SA->getValue().ult(BitWidth)) { |
3860 | Constant *X = |
3861 | ConstantInt::get(SA->getContext(), |
3862 | APInt::getOneBitSet(BitWidth, SA->getZExtValue())); |
3863 | return BinaryOp(Instruction::UDiv, Op->getOperand(0), X); |
3864 | } |
3865 | } |
3866 | return BinaryOp(Op); |
3867 | |
3868 | case Instruction::ExtractValue: { |
3869 | auto *EVI = cast<ExtractValueInst>(Op); |
3870 | if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0) |
3871 | break; |
3872 | |
3873 | auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand()); |
3874 | if (!CI) |
3875 | break; |
3876 | |
3877 | if (auto *F = CI->getCalledFunction()) |
3878 | switch (F->getIntrinsicID()) { |
3879 | case Intrinsic::sadd_with_overflow: |
3880 | case Intrinsic::uadd_with_overflow: { |
3881 | if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT)) |
3882 | return BinaryOp(Instruction::Add, CI->getArgOperand(0), |
3883 | CI->getArgOperand(1)); |
3884 | |
3885 | // Now that we know that all uses of the arithmetic-result component of |
3886 | // CI are guarded by the overflow check, we can go ahead and pretend |
3887 | // that the arithmetic is non-overflowing. |
3888 | if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow) |
3889 | return BinaryOp(Instruction::Add, CI->getArgOperand(0), |
3890 | CI->getArgOperand(1), /* IsNSW = */ true, |
3891 | /* IsNUW = */ false); |
3892 | else |
3893 | return BinaryOp(Instruction::Add, CI->getArgOperand(0), |
3894 | CI->getArgOperand(1), /* IsNSW = */ false, |
3895 | /* IsNUW*/ true); |
3896 | } |
3897 | |
3898 | case Intrinsic::ssub_with_overflow: |
3899 | case Intrinsic::usub_with_overflow: |
3900 | return BinaryOp(Instruction::Sub, CI->getArgOperand(0), |
3901 | CI->getArgOperand(1)); |
3902 | |
3903 | case Intrinsic::smul_with_overflow: |
3904 | case Intrinsic::umul_with_overflow: |
3905 | return BinaryOp(Instruction::Mul, CI->getArgOperand(0), |
3906 | CI->getArgOperand(1)); |
3907 | default: |
3908 | break; |
3909 | } |
3910 | } |
3911 | |
3912 | default: |
3913 | break; |
3914 | } |
3915 | |
3916 | return None; |
3917 | } |
3918 | |
3919 | const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { |
3920 | const Loop *L = LI.getLoopFor(PN->getParent()); |
3921 | if (!L || L->getHeader() != PN->getParent()) |
3922 | return nullptr; |
3923 | |
3924 | // The loop may have multiple entrances or multiple exits; we can analyze |
3925 | // this phi as an addrec if it has a unique entry value and a unique |
3926 | // backedge value. |
3927 | Value *BEValueV = nullptr, *StartValueV = nullptr; |
3928 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { |
3929 | Value *V = PN->getIncomingValue(i); |
3930 | if (L->contains(PN->getIncomingBlock(i))) { |
3931 | if (!BEValueV) { |
3932 | BEValueV = V; |
3933 | } else if (BEValueV != V) { |
3934 | BEValueV = nullptr; |
3935 | break; |
3936 | } |
3937 | } else if (!StartValueV) { |
3938 | StartValueV = V; |
3939 | } else if (StartValueV != V) { |
3940 | StartValueV = nullptr; |
3941 | break; |
3942 | } |
3943 | } |
3944 | if (BEValueV && StartValueV) { |
3945 | // While we are analyzing this PHI node, handle its value symbolically. |
3946 | const SCEV *SymbolicName = getUnknown(PN); |
3947 | assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&((ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?" ) ? static_cast<void> (0) : __assert_fail ("ValueExprMap.find_as(PN) == ValueExprMap.end() && \"PHI node already processed?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3948, __PRETTY_FUNCTION__)) |
3948 | "PHI node already processed?")((ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?" ) ? static_cast<void> (0) : __assert_fail ("ValueExprMap.find_as(PN) == ValueExprMap.end() && \"PHI node already processed?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 3948, __PRETTY_FUNCTION__)); |
3949 | ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); |
3950 | |
3951 | // Using this symbolic name for the PHI, analyze the value coming around |
3952 | // the back-edge. |
3953 | const SCEV *BEValue = getSCEV(BEValueV); |
3954 | |
3955 | // NOTE: If BEValue is loop invariant, we know that the PHI node just |
3956 | // has a special value for the first iteration of the loop. |
3957 | |
3958 | // If the value coming around the backedge is an add with the symbolic |
3959 | // value we just inserted, then we found a simple induction variable! |
3960 | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { |
3961 | // If there is a single occurrence of the symbolic value, replace it |
3962 | // with a recurrence. |
3963 | unsigned FoundIndex = Add->getNumOperands(); |
3964 | for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) |
3965 | if (Add->getOperand(i) == SymbolicName) |
3966 | if (FoundIndex == e) { |
3967 | FoundIndex = i; |
3968 | break; |
3969 | } |
3970 | |
3971 | if (FoundIndex != Add->getNumOperands()) { |
3972 | // Create an add with everything but the specified operand. |
3973 | SmallVector<const SCEV *, 8> Ops; |
3974 | for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) |
3975 | if (i != FoundIndex) |
3976 | Ops.push_back(Add->getOperand(i)); |
3977 | const SCEV *Accum = getAddExpr(Ops); |
3978 | |
3979 | // This is not a valid addrec if the step amount is varying each |
3980 | // loop iteration, but is not itself an addrec in this loop. |
3981 | if (isLoopInvariant(Accum, L) || |
3982 | (isa<SCEVAddRecExpr>(Accum) && |
3983 | cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { |
3984 | SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; |
3985 | |
3986 | if (auto BO = MatchBinaryOp(BEValueV, DT)) { |
3987 | if (BO->Opcode == Instruction::Add && BO->LHS == PN) { |
3988 | if (BO->IsNUW) |
3989 | Flags = setFlags(Flags, SCEV::FlagNUW); |
3990 | if (BO->IsNSW) |
3991 | Flags = setFlags(Flags, SCEV::FlagNSW); |
3992 | } |
3993 | } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { |
3994 | // If the increment is an inbounds GEP, then we know the address |
3995 | // space cannot be wrapped around. We cannot make any guarantee |
3996 | // about signed or unsigned overflow because pointers are |
3997 | // unsigned but we may have a negative index from the base |
3998 | // pointer. We can guarantee that no unsigned wrap occurs if the |
3999 | // indices form a positive value. |
4000 | if (GEP->isInBounds() && GEP->getOperand(0) == PN) { |
4001 | Flags = setFlags(Flags, SCEV::FlagNW); |
4002 | |
4003 | const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); |
4004 | if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) |
4005 | Flags = setFlags(Flags, SCEV::FlagNUW); |
4006 | } |
4007 | |
4008 | // We cannot transfer nuw and nsw flags from subtraction |
4009 | // operations -- sub nuw X, Y is not the same as add nuw X, -Y |
4010 | // for instance. |
4011 | } |
4012 | |
4013 | const SCEV *StartVal = getSCEV(StartValueV); |
4014 | const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); |
4015 | |
4016 | // Okay, for the entire analysis of this edge we assumed the PHI |
4017 | // to be symbolic. We now need to go back and purge all of the |
4018 | // entries for the scalars that use the symbolic expression. |
4019 | forgetSymbolicName(PN, SymbolicName); |
4020 | ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; |
4021 | |
4022 | // We can add Flags to the post-inc expression only if we |
4023 | // know that it us *undefined behavior* for BEValueV to |
4024 | // overflow. |
4025 | if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) |
4026 | if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) |
4027 | (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); |
4028 | |
4029 | return PHISCEV; |
4030 | } |
4031 | } |
4032 | } else { |
4033 | // Otherwise, this could be a loop like this: |
4034 | // i = 0; for (j = 1; ..; ++j) { .... i = j; } |
4035 | // In this case, j = {1,+,1} and BEValue is j. |
4036 | // Because the other in-value of i (0) fits the evolution of BEValue |
4037 | // i really is an addrec evolution. |
4038 | // |
4039 | // We can generalize this saying that i is the shifted value of BEValue |
4040 | // by one iteration: |
4041 | // PHI(f(0), f({1,+,1})) --> f({0,+,1}) |
4042 | const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); |
4043 | const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); |
4044 | if (Shifted != getCouldNotCompute() && |
4045 | Start != getCouldNotCompute()) { |
4046 | const SCEV *StartVal = getSCEV(StartValueV); |
4047 | if (Start == StartVal) { |
4048 | // Okay, for the entire analysis of this edge we assumed the PHI |
4049 | // to be symbolic. We now need to go back and purge all of the |
4050 | // entries for the scalars that use the symbolic expression. |
4051 | forgetSymbolicName(PN, SymbolicName); |
4052 | ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; |
4053 | return Shifted; |
4054 | } |
4055 | } |
4056 | } |
4057 | |
4058 | // Remove the temporary PHI node SCEV that has been inserted while intending |
4059 | // to create an AddRecExpr for this PHI node. We can not keep this temporary |
4060 | // as it will prevent later (possibly simpler) SCEV expressions to be added |
4061 | // to the ValueExprMap. |
4062 | ValueExprMap.erase(PN); |
4063 | } |
4064 | |
4065 | return nullptr; |
4066 | } |
4067 | |
4068 | // Checks if the SCEV S is available at BB. S is considered available at BB |
4069 | // if S can be materialized at BB without introducing a fault. |
4070 | static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, |
4071 | BasicBlock *BB) { |
4072 | struct CheckAvailable { |
4073 | bool TraversalDone = false; |
4074 | bool Available = true; |
4075 | |
4076 | const Loop *L = nullptr; // The loop BB is in (can be nullptr) |
4077 | BasicBlock *BB = nullptr; |
4078 | DominatorTree &DT; |
4079 | |
4080 | CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT) |
4081 | : L(L), BB(BB), DT(DT) {} |
4082 | |
4083 | bool setUnavailable() { |
4084 | TraversalDone = true; |
4085 | Available = false; |
4086 | return false; |
4087 | } |
4088 | |
4089 | bool follow(const SCEV *S) { |
4090 | switch (S->getSCEVType()) { |
4091 | case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: |
4092 | case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: |
4093 | // These expressions are available if their operand(s) is/are. |
4094 | return true; |
4095 | |
4096 | case scAddRecExpr: { |
4097 | // We allow add recurrences that are on the loop BB is in, or some |
4098 | // outer loop. This guarantees availability because the value of the |
4099 | // add recurrence at BB is simply the "current" value of the induction |
4100 | // variable. We can relax this in the future; for instance an add |
4101 | // recurrence on a sibling dominating loop is also available at BB. |
4102 | const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop(); |
4103 | if (L && (ARLoop == L || ARLoop->contains(L))) |
4104 | return true; |
4105 | |
4106 | return setUnavailable(); |
4107 | } |
4108 | |
4109 | case scUnknown: { |
4110 | // For SCEVUnknown, we check for simple dominance. |
4111 | const auto *SU = cast<SCEVUnknown>(S); |
4112 | Value *V = SU->getValue(); |
4113 | |
4114 | if (isa<Argument>(V)) |
4115 | return false; |
4116 | |
4117 | if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB)) |
4118 | return false; |
4119 | |
4120 | return setUnavailable(); |
4121 | } |
4122 | |
4123 | case scUDivExpr: |
4124 | case scCouldNotCompute: |
4125 | // We do not try to smart about these at all. |
4126 | return setUnavailable(); |
4127 | } |
4128 | llvm_unreachable("switch should be fully covered!")::llvm::llvm_unreachable_internal("switch should be fully covered!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4128); |
4129 | } |
4130 | |
4131 | bool isDone() { return TraversalDone; } |
4132 | }; |
4133 | |
4134 | CheckAvailable CA(L, BB, DT); |
4135 | SCEVTraversal<CheckAvailable> ST(CA); |
4136 | |
4137 | ST.visitAll(S); |
4138 | return CA.Available; |
4139 | } |
4140 | |
4141 | // Try to match a control flow sequence that branches out at BI and merges back |
4142 | // at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful |
4143 | // match. |
4144 | static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, |
4145 | Value *&C, Value *&LHS, Value *&RHS) { |
4146 | C = BI->getCondition(); |
4147 | |
4148 | BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); |
4149 | BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); |
4150 | |
4151 | if (!LeftEdge.isSingleEdge()) |
4152 | return false; |
4153 | |
4154 | assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()")((RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()" ) ? static_cast<void> (0) : __assert_fail ("RightEdge.isSingleEdge() && \"Follows from LeftEdge.isSingleEdge()\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4154, __PRETTY_FUNCTION__)); |
4155 | |
4156 | Use &LeftUse = Merge->getOperandUse(0); |
4157 | Use &RightUse = Merge->getOperandUse(1); |
4158 | |
4159 | if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) { |
4160 | LHS = LeftUse; |
4161 | RHS = RightUse; |
4162 | return true; |
4163 | } |
4164 | |
4165 | if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) { |
4166 | LHS = RightUse; |
4167 | RHS = LeftUse; |
4168 | return true; |
4169 | } |
4170 | |
4171 | return false; |
4172 | } |
4173 | |
4174 | const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { |
4175 | if (PN->getNumIncomingValues() == 2) { |
4176 | const Loop *L = LI.getLoopFor(PN->getParent()); |
4177 | |
4178 | // We don't want to break LCSSA, even in a SCEV expression tree. |
4179 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
4180 | if (LI.getLoopFor(PN->getIncomingBlock(i)) != L) |
4181 | return nullptr; |
4182 | |
4183 | // Try to match |
4184 | // |
4185 | // br %cond, label %left, label %right |
4186 | // left: |
4187 | // br label %merge |
4188 | // right: |
4189 | // br label %merge |
4190 | // merge: |
4191 | // V = phi [ %x, %left ], [ %y, %right ] |
4192 | // |
4193 | // as "select %cond, %x, %y" |
4194 | |
4195 | BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); |
4196 | assert(IDom && "At least the entry block should dominate PN")((IDom && "At least the entry block should dominate PN" ) ? static_cast<void> (0) : __assert_fail ("IDom && \"At least the entry block should dominate PN\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4196, __PRETTY_FUNCTION__)); |
4197 | |
4198 | auto *BI = dyn_cast<BranchInst>(IDom->getTerminator()); |
4199 | Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; |
4200 | |
4201 | if (BI && BI->isConditional() && |
4202 | BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) && |
4203 | IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) && |
4204 | IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent())) |
4205 | return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); |
4206 | } |
4207 | |
4208 | return nullptr; |
4209 | } |
4210 | |
4211 | const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { |
4212 | if (const SCEV *S = createAddRecFromPHI(PN)) |
4213 | return S; |
4214 | |
4215 | if (const SCEV *S = createNodeFromSelectLikePHI(PN)) |
4216 | return S; |
4217 | |
4218 | // If the PHI has a single incoming value, follow that value, unless the |
4219 | // PHI's incoming blocks are in a different loop, in which case doing so |
4220 | // risks breaking LCSSA form. Instcombine would normally zap these, but |
4221 | // it doesn't have DominatorTree information, so it may miss cases. |
4222 | if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC)) |
4223 | if (LI.replacementPreservesLCSSAForm(PN, V)) |
4224 | return getSCEV(V); |
4225 | |
4226 | // If it's not a loop phi, we can't handle it yet. |
4227 | return getUnknown(PN); |
4228 | } |
4229 | |
4230 | const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, |
4231 | Value *Cond, |
4232 | Value *TrueVal, |
4233 | Value *FalseVal) { |
4234 | // Handle "constant" branch or select. This can occur for instance when a |
4235 | // loop pass transforms an inner loop and moves on to process the outer loop. |
4236 | if (auto *CI = dyn_cast<ConstantInt>(Cond)) |
4237 | return getSCEV(CI->isOne() ? TrueVal : FalseVal); |
4238 | |
4239 | // Try to match some simple smax or umax patterns. |
4240 | auto *ICI = dyn_cast<ICmpInst>(Cond); |
4241 | if (!ICI) |
4242 | return getUnknown(I); |
4243 | |
4244 | Value *LHS = ICI->getOperand(0); |
4245 | Value *RHS = ICI->getOperand(1); |
4246 | |
4247 | switch (ICI->getPredicate()) { |
4248 | case ICmpInst::ICMP_SLT: |
4249 | case ICmpInst::ICMP_SLE: |
4250 | std::swap(LHS, RHS); |
4251 | // fall through |
4252 | case ICmpInst::ICMP_SGT: |
4253 | case ICmpInst::ICMP_SGE: |
4254 | // a >s b ? a+x : b+x -> smax(a, b)+x |
4255 | // a >s b ? b+x : a+x -> smin(a, b)+x |
4256 | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { |
4257 | const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType()); |
4258 | const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType()); |
4259 | const SCEV *LA = getSCEV(TrueVal); |
4260 | const SCEV *RA = getSCEV(FalseVal); |
4261 | const SCEV *LDiff = getMinusSCEV(LA, LS); |
4262 | const SCEV *RDiff = getMinusSCEV(RA, RS); |
4263 | if (LDiff == RDiff) |
4264 | return getAddExpr(getSMaxExpr(LS, RS), LDiff); |
4265 | LDiff = getMinusSCEV(LA, RS); |
4266 | RDiff = getMinusSCEV(RA, LS); |
4267 | if (LDiff == RDiff) |
4268 | return getAddExpr(getSMinExpr(LS, RS), LDiff); |
4269 | } |
4270 | break; |
4271 | case ICmpInst::ICMP_ULT: |
4272 | case ICmpInst::ICMP_ULE: |
4273 | std::swap(LHS, RHS); |
4274 | // fall through |
4275 | case ICmpInst::ICMP_UGT: |
4276 | case ICmpInst::ICMP_UGE: |
4277 | // a >u b ? a+x : b+x -> umax(a, b)+x |
4278 | // a >u b ? b+x : a+x -> umin(a, b)+x |
4279 | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { |
4280 | const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); |
4281 | const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType()); |
4282 | const SCEV *LA = getSCEV(TrueVal); |
4283 | const SCEV *RA = getSCEV(FalseVal); |
4284 | const SCEV *LDiff = getMinusSCEV(LA, LS); |
4285 | const SCEV *RDiff = getMinusSCEV(RA, RS); |
4286 | if (LDiff == RDiff) |
4287 | return getAddExpr(getUMaxExpr(LS, RS), LDiff); |
4288 | LDiff = getMinusSCEV(LA, RS); |
4289 | RDiff = getMinusSCEV(RA, LS); |
4290 | if (LDiff == RDiff) |
4291 | return getAddExpr(getUMinExpr(LS, RS), LDiff); |
4292 | } |
4293 | break; |
4294 | case ICmpInst::ICMP_NE: |
4295 | // n != 0 ? n+x : 1+x -> umax(n, 1)+x |
4296 | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && |
4297 | isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { |
4298 | const SCEV *One = getOne(I->getType()); |
4299 | const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); |
4300 | const SCEV *LA = getSCEV(TrueVal); |
4301 | const SCEV *RA = getSCEV(FalseVal); |
4302 | const SCEV *LDiff = getMinusSCEV(LA, LS); |
4303 | const SCEV *RDiff = getMinusSCEV(RA, One); |
4304 | if (LDiff == RDiff) |
4305 | return getAddExpr(getUMaxExpr(One, LS), LDiff); |
4306 | } |
4307 | break; |
4308 | case ICmpInst::ICMP_EQ: |
4309 | // n == 0 ? 1+x : n+x -> umax(n, 1)+x |
4310 | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && |
4311 | isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { |
4312 | const SCEV *One = getOne(I->getType()); |
4313 | const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); |
4314 | const SCEV *LA = getSCEV(TrueVal); |
4315 | const SCEV *RA = getSCEV(FalseVal); |
4316 | const SCEV *LDiff = getMinusSCEV(LA, One); |
4317 | const SCEV *RDiff = getMinusSCEV(RA, LS); |
4318 | if (LDiff == RDiff) |
4319 | return getAddExpr(getUMaxExpr(One, LS), LDiff); |
4320 | } |
4321 | break; |
4322 | default: |
4323 | break; |
4324 | } |
4325 | |
4326 | return getUnknown(I); |
4327 | } |
4328 | |
4329 | /// Expand GEP instructions into add and multiply operations. This allows them |
4330 | /// to be analyzed by regular SCEV code. |
4331 | const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { |
4332 | // Don't attempt to analyze GEPs over unsized objects. |
4333 | if (!GEP->getSourceElementType()->isSized()) |
4334 | return getUnknown(GEP); |
4335 | |
4336 | SmallVector<const SCEV *, 4> IndexExprs; |
4337 | for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) |
4338 | IndexExprs.push_back(getSCEV(*Index)); |
4339 | return getGEPExpr(GEP->getSourceElementType(), |
4340 | getSCEV(GEP->getPointerOperand()), |
4341 | IndexExprs, GEP->isInBounds()); |
4342 | } |
4343 | |
4344 | uint32_t |
4345 | ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { |
4346 | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) |
4347 | return C->getAPInt().countTrailingZeros(); |
4348 | |
4349 | if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) |
4350 | return std::min(GetMinTrailingZeros(T->getOperand()), |
4351 | (uint32_t)getTypeSizeInBits(T->getType())); |
4352 | |
4353 | if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { |
4354 | uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); |
4355 | return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? |
4356 | getTypeSizeInBits(E->getType()) : OpRes; |
4357 | } |
4358 | |
4359 | if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { |
4360 | uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); |
4361 | return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? |
4362 | getTypeSizeInBits(E->getType()) : OpRes; |
4363 | } |
4364 | |
4365 | if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { |
4366 | // The result is the min of all operands results. |
4367 | uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); |
4368 | for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) |
4369 | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); |
4370 | return MinOpRes; |
4371 | } |
4372 | |
4373 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { |
4374 | // The result is the sum of all operands results. |
4375 | uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); |
4376 | uint32_t BitWidth = getTypeSizeInBits(M->getType()); |
4377 | for (unsigned i = 1, e = M->getNumOperands(); |
4378 | SumOpRes != BitWidth && i != e; ++i) |
4379 | SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), |
4380 | BitWidth); |
4381 | return SumOpRes; |
4382 | } |
4383 | |
4384 | if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { |
4385 | // The result is the min of all operands results. |
4386 | uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); |
4387 | for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) |
4388 | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); |
4389 | return MinOpRes; |
4390 | } |
4391 | |
4392 | if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { |
4393 | // The result is the min of all operands results. |
4394 | uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); |
4395 | for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) |
4396 | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); |
4397 | return MinOpRes; |
4398 | } |
4399 | |
4400 | if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { |
4401 | // The result is the min of all operands results. |
4402 | uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); |
4403 | for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) |
4404 | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); |
4405 | return MinOpRes; |
4406 | } |
4407 | |
4408 | if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { |
4409 | // For a SCEVUnknown, ask ValueTracking. |
4410 | unsigned BitWidth = getTypeSizeInBits(U->getType()); |
4411 | APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); |
4412 | computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, |
4413 | nullptr, &DT); |
4414 | return Zeros.countTrailingOnes(); |
4415 | } |
4416 | |
4417 | // SCEVUDivExpr |
4418 | return 0; |
4419 | } |
4420 | |
4421 | /// Helper method to assign a range to V from metadata present in the IR. |
4422 | static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { |
4423 | if (Instruction *I = dyn_cast<Instruction>(V)) |
4424 | if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) |
4425 | return getConstantRangeFromMetadata(*MD); |
4426 | |
4427 | return None; |
4428 | } |
4429 | |
4430 | /// Determine the range for a particular SCEV. If SignHint is |
4431 | /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges |
4432 | /// with a "cleaner" unsigned (resp. signed) representation. |
4433 | ConstantRange |
4434 | ScalarEvolution::getRange(const SCEV *S, |
4435 | ScalarEvolution::RangeSignHint SignHint) { |
4436 | DenseMap<const SCEV *, ConstantRange> &Cache = |
4437 | SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges |
4438 | : SignedRanges; |
4439 | |
4440 | // See if we've computed this range already. |
4441 | DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S); |
4442 | if (I != Cache.end()) |
4443 | return I->second; |
4444 | |
4445 | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) |
4446 | return setRange(C, SignHint, ConstantRange(C->getAPInt())); |
4447 | |
4448 | unsigned BitWidth = getTypeSizeInBits(S->getType()); |
4449 | ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); |
4450 | |
4451 | // If the value has known zeros, the maximum value will have those known zeros |
4452 | // as well. |
4453 | uint32_t TZ = GetMinTrailingZeros(S); |
4454 | if (TZ != 0) { |
4455 | if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) |
4456 | ConservativeResult = |
4457 | ConstantRange(APInt::getMinValue(BitWidth), |
4458 | APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); |
4459 | else |
4460 | ConservativeResult = ConstantRange( |
4461 | APInt::getSignedMinValue(BitWidth), |
4462 | APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); |
4463 | } |
4464 | |
4465 | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { |
4466 | ConstantRange X = getRange(Add->getOperand(0), SignHint); |
4467 | for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) |
4468 | X = X.add(getRange(Add->getOperand(i), SignHint)); |
4469 | return setRange(Add, SignHint, ConservativeResult.intersectWith(X)); |
4470 | } |
4471 | |
4472 | if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { |
4473 | ConstantRange X = getRange(Mul->getOperand(0), SignHint); |
4474 | for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) |
4475 | X = X.multiply(getRange(Mul->getOperand(i), SignHint)); |
4476 | return setRange(Mul, SignHint, ConservativeResult.intersectWith(X)); |
4477 | } |
4478 | |
4479 | if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { |
4480 | ConstantRange X = getRange(SMax->getOperand(0), SignHint); |
4481 | for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) |
4482 | X = X.smax(getRange(SMax->getOperand(i), SignHint)); |
4483 | return setRange(SMax, SignHint, ConservativeResult.intersectWith(X)); |
4484 | } |
4485 | |
4486 | if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { |
4487 | ConstantRange X = getRange(UMax->getOperand(0), SignHint); |
4488 | for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) |
4489 | X = X.umax(getRange(UMax->getOperand(i), SignHint)); |
4490 | return setRange(UMax, SignHint, ConservativeResult.intersectWith(X)); |
4491 | } |
4492 | |
4493 | if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { |
4494 | ConstantRange X = getRange(UDiv->getLHS(), SignHint); |
4495 | ConstantRange Y = getRange(UDiv->getRHS(), SignHint); |
4496 | return setRange(UDiv, SignHint, |
4497 | ConservativeResult.intersectWith(X.udiv(Y))); |
4498 | } |
4499 | |
4500 | if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { |
4501 | ConstantRange X = getRange(ZExt->getOperand(), SignHint); |
4502 | return setRange(ZExt, SignHint, |
4503 | ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); |
4504 | } |
4505 | |
4506 | if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { |
4507 | ConstantRange X = getRange(SExt->getOperand(), SignHint); |
4508 | return setRange(SExt, SignHint, |
4509 | ConservativeResult.intersectWith(X.signExtend(BitWidth))); |
4510 | } |
4511 | |
4512 | if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { |
4513 | ConstantRange X = getRange(Trunc->getOperand(), SignHint); |
4514 | return setRange(Trunc, SignHint, |
4515 | ConservativeResult.intersectWith(X.truncate(BitWidth))); |
4516 | } |
4517 | |
4518 | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { |
4519 | // If there's no unsigned wrap, the value will never be less than its |
4520 | // initial value. |
4521 | if (AddRec->hasNoUnsignedWrap()) |
4522 | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) |
4523 | if (!C->getValue()->isZero()) |
4524 | ConservativeResult = ConservativeResult.intersectWith( |
4525 | ConstantRange(C->getAPInt(), APInt(BitWidth, 0))); |
4526 | |
4527 | // If there's no signed wrap, and all the operands have the same sign or |
4528 | // zero, the value won't ever change sign. |
4529 | if (AddRec->hasNoSignedWrap()) { |
4530 | bool AllNonNeg = true; |
4531 | bool AllNonPos = true; |
4532 | for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { |
4533 | if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; |
4534 | if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; |
4535 | } |
4536 | if (AllNonNeg) |
4537 | ConservativeResult = ConservativeResult.intersectWith( |
4538 | ConstantRange(APInt(BitWidth, 0), |
4539 | APInt::getSignedMinValue(BitWidth))); |
4540 | else if (AllNonPos) |
4541 | ConservativeResult = ConservativeResult.intersectWith( |
4542 | ConstantRange(APInt::getSignedMinValue(BitWidth), |
4543 | APInt(BitWidth, 1))); |
4544 | } |
4545 | |
4546 | // TODO: non-affine addrec |
4547 | if (AddRec->isAffine()) { |
4548 | const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); |
4549 | if (!isa<SCEVCouldNotCompute>(MaxBECount) && |
4550 | getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { |
4551 | auto RangeFromAffine = getRangeForAffineAR( |
4552 | AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, |
4553 | BitWidth); |
4554 | if (!RangeFromAffine.isFullSet()) |
4555 | ConservativeResult = |
4556 | ConservativeResult.intersectWith(RangeFromAffine); |
4557 | |
4558 | auto RangeFromFactoring = getRangeViaFactoring( |
4559 | AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, |
4560 | BitWidth); |
4561 | if (!RangeFromFactoring.isFullSet()) |
4562 | ConservativeResult = |
4563 | ConservativeResult.intersectWith(RangeFromFactoring); |
4564 | } |
4565 | } |
4566 | |
4567 | return setRange(AddRec, SignHint, ConservativeResult); |
4568 | } |
4569 | |
4570 | if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { |
4571 | // Check if the IR explicitly contains !range metadata. |
4572 | Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue()); |
4573 | if (MDRange.hasValue()) |
4574 | ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); |
4575 | |
4576 | // Split here to avoid paying the compile-time cost of calling both |
4577 | // computeKnownBits and ComputeNumSignBits. This restriction can be lifted |
4578 | // if needed. |
4579 | const DataLayout &DL = getDataLayout(); |
4580 | if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { |
4581 | // For a SCEVUnknown, ask ValueTracking. |
4582 | APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); |
4583 | computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT); |
4584 | if (Ones != ~Zeros + 1) |
4585 | ConservativeResult = |
4586 | ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); |
4587 | } else { |
4588 | assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&((SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!" ) ? static_cast<void> (0) : __assert_fail ("SignHint == ScalarEvolution::HINT_RANGE_SIGNED && \"generalize as needed!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4589, __PRETTY_FUNCTION__)) |
4589 | "generalize as needed!")((SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!" ) ? static_cast<void> (0) : __assert_fail ("SignHint == ScalarEvolution::HINT_RANGE_SIGNED && \"generalize as needed!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4589, __PRETTY_FUNCTION__)); |
4590 | unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); |
4591 | if (NS > 1) |
4592 | ConservativeResult = ConservativeResult.intersectWith( |
4593 | ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), |
4594 | APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); |
4595 | } |
4596 | |
4597 | return setRange(U, SignHint, ConservativeResult); |
4598 | } |
4599 | |
4600 | return setRange(S, SignHint, ConservativeResult); |
4601 | } |
4602 | |
4603 | ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, |
4604 | const SCEV *Step, |
4605 | const SCEV *MaxBECount, |
4606 | unsigned BitWidth) { |
4607 | assert(!isa<SCEVCouldNotCompute>(MaxBECount) &&((!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits (MaxBECount->getType()) <= BitWidth && "Precondition!" ) ? static_cast<void> (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && \"Precondition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4609, __PRETTY_FUNCTION__)) |
4608 | getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&((!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits (MaxBECount->getType()) <= BitWidth && "Precondition!" ) ? static_cast<void> (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && \"Precondition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4609, __PRETTY_FUNCTION__)) |
4609 | "Precondition!")((!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits (MaxBECount->getType()) <= BitWidth && "Precondition!" ) ? static_cast<void> (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && \"Precondition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4609, __PRETTY_FUNCTION__)); |
4610 | |
4611 | ConstantRange Result(BitWidth, /* isFullSet = */ true); |
4612 | |
4613 | // Check for overflow. This must be done with ConstantRange arithmetic |
4614 | // because we could be called from within the ScalarEvolution overflow |
4615 | // checking code. |
4616 | |
4617 | MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); |
4618 | ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); |
4619 | ConstantRange ZExtMaxBECountRange = |
4620 | MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1); |
4621 | |
4622 | ConstantRange StepSRange = getSignedRange(Step); |
4623 | ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1); |
4624 | |
4625 | ConstantRange StartURange = getUnsignedRange(Start); |
4626 | ConstantRange EndURange = |
4627 | StartURange.add(MaxBECountRange.multiply(StepSRange)); |
4628 | |
4629 | // Check for unsigned overflow. |
4630 | ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2 + 1); |
4631 | ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1); |
4632 | if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == |
4633 | ZExtEndURange) { |
4634 | APInt Min = APIntOps::umin(StartURange.getUnsignedMin(), |
4635 | EndURange.getUnsignedMin()); |
4636 | APInt Max = APIntOps::umax(StartURange.getUnsignedMax(), |
4637 | EndURange.getUnsignedMax()); |
4638 | bool IsFullRange = Min.isMinValue() && Max.isMaxValue(); |
4639 | if (!IsFullRange) |
4640 | Result = |
4641 | Result.intersectWith(ConstantRange(Min, Max + 1)); |
4642 | } |
4643 | |
4644 | ConstantRange StartSRange = getSignedRange(Start); |
4645 | ConstantRange EndSRange = |
4646 | StartSRange.add(MaxBECountRange.multiply(StepSRange)); |
4647 | |
4648 | // Check for signed overflow. This must be done with ConstantRange |
4649 | // arithmetic because we could be called from within the ScalarEvolution |
4650 | // overflow checking code. |
4651 | ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2 + 1); |
4652 | ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1); |
4653 | if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == |
4654 | SExtEndSRange) { |
4655 | APInt Min = |
4656 | APIntOps::smin(StartSRange.getSignedMin(), EndSRange.getSignedMin()); |
4657 | APInt Max = |
4658 | APIntOps::smax(StartSRange.getSignedMax(), EndSRange.getSignedMax()); |
4659 | bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue(); |
4660 | if (!IsFullRange) |
4661 | Result = |
4662 | Result.intersectWith(ConstantRange(Min, Max + 1)); |
4663 | } |
4664 | |
4665 | return Result; |
4666 | } |
4667 | |
4668 | ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, |
4669 | const SCEV *Step, |
4670 | const SCEV *MaxBECount, |
4671 | unsigned BitWidth) { |
4672 | // RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q}) |
4673 | // == RangeOf({A,+,P}) union RangeOf({B,+,Q}) |
4674 | |
4675 | struct SelectPattern { |
4676 | Value *Condition = nullptr; |
4677 | APInt TrueValue; |
4678 | APInt FalseValue; |
4679 | |
4680 | explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth, |
4681 | const SCEV *S) { |
4682 | Optional<unsigned> CastOp; |
4683 | APInt Offset(BitWidth, 0); |
4684 | |
4685 | assert(SE.getTypeSizeInBits(S->getType()) == BitWidth &&((SE.getTypeSizeInBits(S->getType()) == BitWidth && "Should be!") ? static_cast<void> (0) : __assert_fail ( "SE.getTypeSizeInBits(S->getType()) == BitWidth && \"Should be!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4686, __PRETTY_FUNCTION__)) |
4686 | "Should be!")((SE.getTypeSizeInBits(S->getType()) == BitWidth && "Should be!") ? static_cast<void> (0) : __assert_fail ( "SE.getTypeSizeInBits(S->getType()) == BitWidth && \"Should be!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4686, __PRETTY_FUNCTION__)); |
4687 | |
4688 | // Peel off a constant offset: |
4689 | if (auto *SA = dyn_cast<SCEVAddExpr>(S)) { |
4690 | // In the future we could consider being smarter here and handle |
4691 | // {Start+Step,+,Step} too. |
4692 | if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0))) |
4693 | return; |
4694 | |
4695 | Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt(); |
4696 | S = SA->getOperand(1); |
4697 | } |
4698 | |
4699 | // Peel off a cast operation |
4700 | if (auto *SCast = dyn_cast<SCEVCastExpr>(S)) { |
4701 | CastOp = SCast->getSCEVType(); |
4702 | S = SCast->getOperand(); |
4703 | } |
4704 | |
4705 | using namespace llvm::PatternMatch; |
4706 | |
4707 | auto *SU = dyn_cast<SCEVUnknown>(S); |
4708 | const APInt *TrueVal, *FalseVal; |
4709 | if (!SU || |
4710 | !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal), |
4711 | m_APInt(FalseVal)))) { |
4712 | Condition = nullptr; |
4713 | return; |
4714 | } |
4715 | |
4716 | TrueValue = *TrueVal; |
4717 | FalseValue = *FalseVal; |
4718 | |
4719 | // Re-apply the cast we peeled off earlier |
4720 | if (CastOp.hasValue()) |
4721 | switch (*CastOp) { |
4722 | default: |
4723 | llvm_unreachable("Unknown SCEV cast type!")::llvm::llvm_unreachable_internal("Unknown SCEV cast type!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4723); |
4724 | |
4725 | case scTruncate: |
4726 | TrueValue = TrueValue.trunc(BitWidth); |
4727 | FalseValue = FalseValue.trunc(BitWidth); |
4728 | break; |
4729 | case scZeroExtend: |
4730 | TrueValue = TrueValue.zext(BitWidth); |
4731 | FalseValue = FalseValue.zext(BitWidth); |
4732 | break; |
4733 | case scSignExtend: |
4734 | TrueValue = TrueValue.sext(BitWidth); |
4735 | FalseValue = FalseValue.sext(BitWidth); |
4736 | break; |
4737 | } |
4738 | |
4739 | // Re-apply the constant offset we peeled off earlier |
4740 | TrueValue += Offset; |
4741 | FalseValue += Offset; |
4742 | } |
4743 | |
4744 | bool isRecognized() { return Condition != nullptr; } |
4745 | }; |
4746 | |
4747 | SelectPattern StartPattern(*this, BitWidth, Start); |
4748 | if (!StartPattern.isRecognized()) |
4749 | return ConstantRange(BitWidth, /* isFullSet = */ true); |
4750 | |
4751 | SelectPattern StepPattern(*this, BitWidth, Step); |
4752 | if (!StepPattern.isRecognized()) |
4753 | return ConstantRange(BitWidth, /* isFullSet = */ true); |
4754 | |
4755 | if (StartPattern.Condition != StepPattern.Condition) { |
4756 | // We don't handle this case today; but we could, by considering four |
4757 | // possibilities below instead of two. I'm not sure if there are cases where |
4758 | // that will help over what getRange already does, though. |
4759 | return ConstantRange(BitWidth, /* isFullSet = */ true); |
4760 | } |
4761 | |
4762 | // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to |
4763 | // construct arbitrary general SCEV expressions here. This function is called |
4764 | // from deep in the call stack, and calling getSCEV (on a sext instruction, |
4765 | // say) can end up caching a suboptimal value. |
4766 | |
4767 | // FIXME: without the explicit `this` receiver below, MSVC errors out with |
4768 | // C2352 and C2512 (otherwise it isn't needed). |
4769 | |
4770 | const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue); |
4771 | const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue); |
4772 | const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue); |
4773 | const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue); |
4774 | |
4775 | ConstantRange TrueRange = |
4776 | this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth); |
4777 | ConstantRange FalseRange = |
4778 | this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth); |
4779 | |
4780 | return TrueRange.unionWith(FalseRange); |
4781 | } |
4782 | |
4783 | SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { |
4784 | if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap; |
4785 | const BinaryOperator *BinOp = cast<BinaryOperator>(V); |
4786 | |
4787 | // Return early if there are no flags to propagate to the SCEV. |
4788 | SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; |
4789 | if (BinOp->hasNoUnsignedWrap()) |
4790 | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); |
4791 | if (BinOp->hasNoSignedWrap()) |
4792 | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); |
4793 | if (Flags == SCEV::FlagAnyWrap) |
4794 | return SCEV::FlagAnyWrap; |
4795 | |
4796 | return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap; |
4797 | } |
4798 | |
4799 | bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { |
4800 | // Here we check that I is in the header of the innermost loop containing I, |
4801 | // since we only deal with instructions in the loop header. The actual loop we |
4802 | // need to check later will come from an add recurrence, but getting that |
4803 | // requires computing the SCEV of the operands, which can be expensive. This |
4804 | // check we can do cheaply to rule out some cases early. |
4805 | Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent()); |
4806 | if (InnermostContainingLoop == nullptr || |
4807 | InnermostContainingLoop->getHeader() != I->getParent()) |
4808 | return false; |
4809 | |
4810 | // Only proceed if we can prove that I does not yield poison. |
4811 | if (!isKnownNotFullPoison(I)) return false; |
4812 | |
4813 | // At this point we know that if I is executed, then it does not wrap |
4814 | // according to at least one of NSW or NUW. If I is not executed, then we do |
4815 | // not know if the calculation that I represents would wrap. Multiple |
4816 | // instructions can map to the same SCEV. If we apply NSW or NUW from I to |
4817 | // the SCEV, we must guarantee no wrapping for that SCEV also when it is |
4818 | // derived from other instructions that map to the same SCEV. We cannot make |
4819 | // that guarantee for cases where I is not executed. So we need to find the |
4820 | // loop that I is considered in relation to and prove that I is executed for |
4821 | // every iteration of that loop. That implies that the value that I |
4822 | // calculates does not wrap anywhere in the loop, so then we can apply the |
4823 | // flags to the SCEV. |
4824 | // |
4825 | // We check isLoopInvariant to disambiguate in case we are adding recurrences |
4826 | // from different loops, so that we know which loop to prove that I is |
4827 | // executed in. |
4828 | for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) { |
4829 | const SCEV *Op = getSCEV(I->getOperand(OpIndex)); |
4830 | if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { |
4831 | bool AllOtherOpsLoopInvariant = true; |
4832 | for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands(); |
4833 | ++OtherOpIndex) { |
4834 | if (OtherOpIndex != OpIndex) { |
4835 | const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex)); |
4836 | if (!isLoopInvariant(OtherOp, AddRec->getLoop())) { |
4837 | AllOtherOpsLoopInvariant = false; |
4838 | break; |
4839 | } |
4840 | } |
4841 | } |
4842 | if (AllOtherOpsLoopInvariant && |
4843 | isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop())) |
4844 | return true; |
4845 | } |
4846 | } |
4847 | return false; |
4848 | } |
4849 | |
4850 | bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { |
4851 | // If we know that \c I can never be poison period, then that's enough. |
4852 | if (isSCEVExprNeverPoison(I)) |
4853 | return true; |
4854 | |
4855 | // For an add recurrence specifically, we assume that infinite loops without |
4856 | // side effects are undefined behavior, and then reason as follows: |
4857 | // |
4858 | // If the add recurrence is poison in any iteration, it is poison on all |
4859 | // future iterations (since incrementing poison yields poison). If the result |
4860 | // of the add recurrence is fed into the loop latch condition and the loop |
4861 | // does not contain any throws or exiting blocks other than the latch, we now |
4862 | // have the ability to "choose" whether the backedge is taken or not (by |
4863 | // choosing a sufficiently evil value for the poison feeding into the branch) |
4864 | // for every iteration including and after the one in which \p I first became |
4865 | // poison. There are two possibilities (let's call the iteration in which \p |
4866 | // I first became poison as K): |
4867 | // |
4868 | // 1. In the set of iterations including and after K, the loop body executes |
4869 | // no side effects. In this case executing the backege an infinte number |
4870 | // of times will yield undefined behavior. |
4871 | // |
4872 | // 2. In the set of iterations including and after K, the loop body executes |
4873 | // at least one side effect. In this case, that specific instance of side |
4874 | // effect is control dependent on poison, which also yields undefined |
4875 | // behavior. |
4876 | |
4877 | auto *ExitingBB = L->getExitingBlock(); |
4878 | auto *LatchBB = L->getLoopLatch(); |
4879 | if (!ExitingBB || !LatchBB || ExitingBB != LatchBB) |
4880 | return false; |
4881 | |
4882 | SmallPtrSet<const Instruction *, 16> Pushed; |
4883 | SmallVector<const Instruction *, 8> Stack; |
4884 | |
4885 | Pushed.insert(I); |
4886 | for (auto *U : I->users()) |
4887 | if (Pushed.insert(cast<Instruction>(U)).second) |
4888 | Stack.push_back(cast<Instruction>(U)); |
4889 | |
4890 | bool LatchControlDependentOnPoison = false; |
4891 | while (!Stack.empty()) { |
4892 | const Instruction *I = Stack.pop_back_val(); |
4893 | |
4894 | for (auto *U : I->users()) { |
4895 | if (propagatesFullPoison(cast<Instruction>(U))) { |
4896 | if (Pushed.insert(cast<Instruction>(U)).second) |
4897 | Stack.push_back(cast<Instruction>(U)); |
4898 | } else if (auto *BI = dyn_cast<BranchInst>(U)) { |
4899 | assert(BI->isConditional() && "Only possibility!")((BI->isConditional() && "Only possibility!") ? static_cast <void> (0) : __assert_fail ("BI->isConditional() && \"Only possibility!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4899, __PRETTY_FUNCTION__)); |
4900 | if (BI->getParent() == LatchBB) { |
4901 | LatchControlDependentOnPoison = true; |
4902 | break; |
4903 | } |
4904 | } |
4905 | } |
4906 | } |
4907 | |
4908 | if (!LatchControlDependentOnPoison) |
4909 | return false; |
4910 | |
4911 | // Now check if loop is no-throw, and cache the information. In the future, |
4912 | // we can consider commoning this logic with LICMSafetyInfo into a separate |
4913 | // analysis pass. |
4914 | |
4915 | auto Itr = LoopMayThrow.find(L); |
4916 | if (Itr == LoopMayThrow.end()) { |
4917 | bool MayThrow = false; |
4918 | for (auto *BB : L->getBlocks()) { |
4919 | MayThrow = any_of(*BB, [](Instruction &I) { return I.mayThrow(); }); |
4920 | if (MayThrow) |
4921 | break; |
4922 | } |
4923 | auto InsertPair = LoopMayThrow.insert({L, MayThrow}); |
4924 | assert(InsertPair.second && "We just checked!")((InsertPair.second && "We just checked!") ? static_cast <void> (0) : __assert_fail ("InsertPair.second && \"We just checked!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 4924, __PRETTY_FUNCTION__)); |
4925 | Itr = InsertPair.first; |
4926 | } |
4927 | |
4928 | return !Itr->second; |
4929 | } |
4930 | |
4931 | const SCEV *ScalarEvolution::createSCEV(Value *V) { |
4932 | if (!isSCEVable(V->getType())) |
4933 | return getUnknown(V); |
4934 | |
4935 | if (Instruction *I = dyn_cast<Instruction>(V)) { |
4936 | // Don't attempt to analyze instructions in blocks that aren't |
4937 | // reachable. Such instructions don't matter, and they aren't required |
4938 | // to obey basic rules for definitions dominating uses which this |
4939 | // analysis depends on. |
4940 | if (!DT.isReachableFromEntry(I->getParent())) |
4941 | return getUnknown(V); |
4942 | } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) |
4943 | return getConstant(CI); |
4944 | else if (isa<ConstantPointerNull>(V)) |
4945 | return getZero(V->getType()); |
4946 | else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) |
4947 | return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee()); |
4948 | else if (!isa<ConstantExpr>(V)) |
4949 | return getUnknown(V); |
4950 | |
4951 | Operator *U = cast<Operator>(V); |
4952 | if (auto BO = MatchBinaryOp(U, DT)) { |
4953 | switch (BO->Opcode) { |
4954 | case Instruction::Add: { |
4955 | // The simple thing to do would be to just call getSCEV on both operands |
4956 | // and call getAddExpr with the result. However if we're looking at a |
4957 | // bunch of things all added together, this can be quite inefficient, |
4958 | // because it leads to N-1 getAddExpr calls for N ultimate operands. |
4959 | // Instead, gather up all the operands and make a single getAddExpr call. |
4960 | // LLVM IR canonical form means we need only traverse the left operands. |
4961 | SmallVector<const SCEV *, 4> AddOps; |
4962 | do { |
4963 | if (BO->Op) { |
4964 | if (auto *OpSCEV = getExistingSCEV(BO->Op)) { |
4965 | AddOps.push_back(OpSCEV); |
4966 | break; |
4967 | } |
4968 | |
4969 | // If a NUW or NSW flag can be applied to the SCEV for this |
4970 | // addition, then compute the SCEV for this addition by itself |
4971 | // with a separate call to getAddExpr. We need to do that |
4972 | // instead of pushing the operands of the addition onto AddOps, |
4973 | // since the flags are only known to apply to this particular |
4974 | // addition - they may not apply to other additions that can be |
4975 | // formed with operands from AddOps. |
4976 | const SCEV *RHS = getSCEV(BO->RHS); |
4977 | SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); |
4978 | if (Flags != SCEV::FlagAnyWrap) { |
4979 | const SCEV *LHS = getSCEV(BO->LHS); |
4980 | if (BO->Opcode == Instruction::Sub) |
4981 | AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); |
4982 | else |
4983 | AddOps.push_back(getAddExpr(LHS, RHS, Flags)); |
4984 | break; |
4985 | } |
4986 | } |
4987 | |
4988 | if (BO->Opcode == Instruction::Sub) |
4989 | AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS))); |
4990 | else |
4991 | AddOps.push_back(getSCEV(BO->RHS)); |
4992 | |
4993 | auto NewBO = MatchBinaryOp(BO->LHS, DT); |
4994 | if (!NewBO || (NewBO->Opcode != Instruction::Add && |
4995 | NewBO->Opcode != Instruction::Sub)) { |
4996 | AddOps.push_back(getSCEV(BO->LHS)); |
4997 | break; |
4998 | } |
4999 | BO = NewBO; |
5000 | } while (true); |
5001 | |
5002 | return getAddExpr(AddOps); |
5003 | } |
5004 | |
5005 | case Instruction::Mul: { |
5006 | SmallVector<const SCEV *, 4> MulOps; |
5007 | do { |
5008 | if (BO->Op) { |
5009 | if (auto *OpSCEV = getExistingSCEV(BO->Op)) { |
5010 | MulOps.push_back(OpSCEV); |
5011 | break; |
5012 | } |
5013 | |
5014 | SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); |
5015 | if (Flags != SCEV::FlagAnyWrap) { |
5016 | MulOps.push_back( |
5017 | getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags)); |
5018 | break; |
5019 | } |
5020 | } |
5021 | |
5022 | MulOps.push_back(getSCEV(BO->RHS)); |
5023 | auto NewBO = MatchBinaryOp(BO->LHS, DT); |
5024 | if (!NewBO || NewBO->Opcode != Instruction::Mul) { |
5025 | MulOps.push_back(getSCEV(BO->LHS)); |
5026 | break; |
5027 | } |
5028 | BO = NewBO; |
5029 | } while (true); |
5030 | |
5031 | return getMulExpr(MulOps); |
5032 | } |
5033 | case Instruction::UDiv: |
5034 | return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); |
5035 | case Instruction::Sub: { |
5036 | SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; |
5037 | if (BO->Op) |
5038 | Flags = getNoWrapFlagsFromUB(BO->Op); |
5039 | return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); |
5040 | } |
5041 | case Instruction::And: |
5042 | // For an expression like x&255 that merely masks off the high bits, |
5043 | // use zext(trunc(x)) as the SCEV expression. |
5044 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { |
5045 | if (CI->isNullValue()) |
5046 | return getSCEV(BO->RHS); |
5047 | if (CI->isAllOnesValue()) |
5048 | return getSCEV(BO->LHS); |
5049 | const APInt &A = CI->getValue(); |
5050 | |
5051 | // Instcombine's ShrinkDemandedConstant may strip bits out of |
5052 | // constants, obscuring what would otherwise be a low-bits mask. |
5053 | // Use computeKnownBits to compute what ShrinkDemandedConstant |
5054 | // knew about to reconstruct a low-bits mask value. |
5055 | unsigned LZ = A.countLeadingZeros(); |
5056 | unsigned TZ = A.countTrailingZeros(); |
5057 | unsigned BitWidth = A.getBitWidth(); |
5058 | APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); |
5059 | computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(), |
5060 | 0, &AC, nullptr, &DT); |
5061 | |
5062 | APInt EffectiveMask = |
5063 | APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); |
5064 | if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { |
5065 | const SCEV *MulCount = getConstant(ConstantInt::get( |
5066 | getContext(), APInt::getOneBitSet(BitWidth, TZ))); |
5067 | return getMulExpr( |
5068 | getZeroExtendExpr( |
5069 | getTruncateExpr( |
5070 | getUDivExactExpr(getSCEV(BO->LHS), MulCount), |
5071 | IntegerType::get(getContext(), BitWidth - LZ - TZ)), |
5072 | BO->LHS->getType()), |
5073 | MulCount); |
5074 | } |
5075 | } |
5076 | break; |
5077 | |
5078 | case Instruction::Or: |
5079 | // If the RHS of the Or is a constant, we may have something like: |
5080 | // X*4+1 which got turned into X*4|1. Handle this as an Add so loop |
5081 | // optimizations will transparently handle this case. |
5082 | // |
5083 | // In order for this transformation to be safe, the LHS must be of the |
5084 | // form X*(2^n) and the Or constant must be less than 2^n. |
5085 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { |
5086 | const SCEV *LHS = getSCEV(BO->LHS); |
5087 | const APInt &CIVal = CI->getValue(); |
5088 | if (GetMinTrailingZeros(LHS) >= |
5089 | (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { |
5090 | // Build a plain add SCEV. |
5091 | const SCEV *S = getAddExpr(LHS, getSCEV(CI)); |
5092 | // If the LHS of the add was an addrec and it has no-wrap flags, |
5093 | // transfer the no-wrap flags, since an or won't introduce a wrap. |
5094 | if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { |
5095 | const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); |
5096 | const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( |
5097 | OldAR->getNoWrapFlags()); |
5098 | } |
5099 | return S; |
5100 | } |
5101 | } |
5102 | break; |
5103 | |
5104 | case Instruction::Xor: |
5105 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { |
5106 | // If the RHS of xor is -1, then this is a not operation. |
5107 | if (CI->isAllOnesValue()) |
5108 | return getNotSCEV(getSCEV(BO->LHS)); |
5109 | |
5110 | // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. |
5111 | // This is a variant of the check for xor with -1, and it handles |
5112 | // the case where instcombine has trimmed non-demanded bits out |
5113 | // of an xor with -1. |
5114 | if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS)) |
5115 | if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1))) |
5116 | if (LBO->getOpcode() == Instruction::And && |
5117 | LCI->getValue() == CI->getValue()) |
5118 | if (const SCEVZeroExtendExpr *Z = |
5119 | dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) { |
5120 | Type *UTy = BO->LHS->getType(); |
5121 | const SCEV *Z0 = Z->getOperand(); |
5122 | Type *Z0Ty = Z0->getType(); |
5123 | unsigned Z0TySize = getTypeSizeInBits(Z0Ty); |
5124 | |
5125 | // If C is a low-bits mask, the zero extend is serving to |
5126 | // mask off the high bits. Complement the operand and |
5127 | // re-apply the zext. |
5128 | if (APIntOps::isMask(Z0TySize, CI->getValue())) |
5129 | return getZeroExtendExpr(getNotSCEV(Z0), UTy); |
5130 | |
5131 | // If C is a single bit, it may be in the sign-bit position |
5132 | // before the zero-extend. In this case, represent the xor |
5133 | // using an add, which is equivalent, and re-apply the zext. |
5134 | APInt Trunc = CI->getValue().trunc(Z0TySize); |
5135 | if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && |
5136 | Trunc.isSignBit()) |
5137 | return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), |
5138 | UTy); |
5139 | } |
5140 | } |
5141 | break; |
5142 | |
5143 | case Instruction::Shl: |
5144 | // Turn shift left of a constant amount into a multiply. |
5145 | if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) { |
5146 | uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth(); |
5147 | |
5148 | // If the shift count is not less than the bitwidth, the result of |
5149 | // the shift is undefined. Don't try to analyze it, because the |
5150 | // resolution chosen here may differ from the resolution chosen in |
5151 | // other parts of the compiler. |
5152 | if (SA->getValue().uge(BitWidth)) |
5153 | break; |
5154 | |
5155 | // It is currently not resolved how to interpret NSW for left |
5156 | // shift by BitWidth - 1, so we avoid applying flags in that |
5157 | // case. Remove this check (or this comment) once the situation |
5158 | // is resolved. See |
5159 | // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html |
5160 | // and http://reviews.llvm.org/D8890 . |
5161 | auto Flags = SCEV::FlagAnyWrap; |
5162 | if (BO->Op && SA->getValue().ult(BitWidth - 1)) |
5163 | Flags = getNoWrapFlagsFromUB(BO->Op); |
5164 | |
5165 | Constant *X = ConstantInt::get(getContext(), |
5166 | APInt::getOneBitSet(BitWidth, SA->getZExtValue())); |
5167 | return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags); |
5168 | } |
5169 | break; |
5170 | |
5171 | case Instruction::AShr: |
5172 | // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. |
5173 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) |
5174 | if (Operator *L = dyn_cast<Operator>(BO->LHS)) |
5175 | if (L->getOpcode() == Instruction::Shl && |
5176 | L->getOperand(1) == BO->RHS) { |
5177 | uint64_t BitWidth = getTypeSizeInBits(BO->LHS->getType()); |
5178 | |
5179 | // If the shift count is not less than the bitwidth, the result of |
5180 | // the shift is undefined. Don't try to analyze it, because the |
5181 | // resolution chosen here may differ from the resolution chosen in |
5182 | // other parts of the compiler. |
5183 | if (CI->getValue().uge(BitWidth)) |
5184 | break; |
5185 | |
5186 | uint64_t Amt = BitWidth - CI->getZExtValue(); |
5187 | if (Amt == BitWidth) |
5188 | return getSCEV(L->getOperand(0)); // shift by zero --> noop |
5189 | return getSignExtendExpr( |
5190 | getTruncateExpr(getSCEV(L->getOperand(0)), |
5191 | IntegerType::get(getContext(), Amt)), |
5192 | BO->LHS->getType()); |
5193 | } |
5194 | break; |
5195 | } |
5196 | } |
5197 | |
5198 | switch (U->getOpcode()) { |
5199 | case Instruction::Trunc: |
5200 | return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); |
5201 | |
5202 | case Instruction::ZExt: |
5203 | return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); |
5204 | |
5205 | case Instruction::SExt: |
5206 | return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); |
5207 | |
5208 | case Instruction::BitCast: |
5209 | // BitCasts are no-op casts so we just eliminate the cast. |
5210 | if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) |
5211 | return getSCEV(U->getOperand(0)); |
5212 | break; |
5213 | |
5214 | // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can |
5215 | // lead to pointer expressions which cannot safely be expanded to GEPs, |
5216 | // because ScalarEvolution doesn't respect the GEP aliasing rules when |
5217 | // simplifying integer expressions. |
5218 | |
5219 | case Instruction::GetElementPtr: |
5220 | return createNodeForGEP(cast<GEPOperator>(U)); |
5221 | |
5222 | case Instruction::PHI: |
5223 | return createNodeForPHI(cast<PHINode>(U)); |
5224 | |
5225 | case Instruction::Select: |
5226 | // U can also be a select constant expr, which let fall through. Since |
5227 | // createNodeForSelect only works for a condition that is an `ICmpInst`, and |
5228 | // constant expressions cannot have instructions as operands, we'd have |
5229 | // returned getUnknown for a select constant expressions anyway. |
5230 | if (isa<Instruction>(U)) |
5231 | return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0), |
5232 | U->getOperand(1), U->getOperand(2)); |
5233 | } |
5234 | |
5235 | return getUnknown(V); |
5236 | } |
5237 | |
5238 | |
5239 | |
5240 | //===----------------------------------------------------------------------===// |
5241 | // Iteration Count Computation Code |
5242 | // |
5243 | |
5244 | unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) { |
5245 | if (BasicBlock *ExitingBB = L->getExitingBlock()) |
5246 | return getSmallConstantTripCount(L, ExitingBB); |
5247 | |
5248 | // No trip count information for multiple exits. |
5249 | return 0; |
5250 | } |
5251 | |
5252 | unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, |
5253 | BasicBlock *ExitingBlock) { |
5254 | assert(ExitingBlock && "Must pass a non-null exiting block!")((ExitingBlock && "Must pass a non-null exiting block!" ) ? static_cast<void> (0) : __assert_fail ("ExitingBlock && \"Must pass a non-null exiting block!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5254, __PRETTY_FUNCTION__)); |
5255 | assert(L->isLoopExiting(ExitingBlock) &&((L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!" ) ? static_cast<void> (0) : __assert_fail ("L->isLoopExiting(ExitingBlock) && \"Exiting block must actually branch out of the loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5256, __PRETTY_FUNCTION__)) |
5256 | "Exiting block must actually branch out of the loop!")((L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!" ) ? static_cast<void> (0) : __assert_fail ("L->isLoopExiting(ExitingBlock) && \"Exiting block must actually branch out of the loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5256, __PRETTY_FUNCTION__)); |
5257 | const SCEVConstant *ExitCount = |
5258 | dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock)); |
5259 | if (!ExitCount) |
5260 | return 0; |
5261 | |
5262 | ConstantInt *ExitConst = ExitCount->getValue(); |
5263 | |
5264 | // Guard against huge trip counts. |
5265 | if (ExitConst->getValue().getActiveBits() > 32) |
5266 | return 0; |
5267 | |
5268 | // In case of integer overflow, this returns 0, which is correct. |
5269 | return ((unsigned)ExitConst->getZExtValue()) + 1; |
5270 | } |
5271 | |
5272 | unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) { |
5273 | if (BasicBlock *ExitingBB = L->getExitingBlock()) |
5274 | return getSmallConstantTripMultiple(L, ExitingBB); |
5275 | |
5276 | // No trip multiple information for multiple exits. |
5277 | return 0; |
5278 | } |
5279 | |
5280 | /// Returns the largest constant divisor of the trip count of this loop as a |
5281 | /// normal unsigned value, if possible. This means that the actual trip count is |
5282 | /// always a multiple of the returned value (don't forget the trip count could |
5283 | /// very well be zero as well!). |
5284 | /// |
5285 | /// Returns 1 if the trip count is unknown or not guaranteed to be the |
5286 | /// multiple of a constant (which is also the case if the trip count is simply |
5287 | /// constant, use getSmallConstantTripCount for that case), Will also return 1 |
5288 | /// if the trip count is very large (>= 2^32). |
5289 | /// |
5290 | /// As explained in the comments for getSmallConstantTripCount, this assumes |
5291 | /// that control exits the loop via ExitingBlock. |
5292 | unsigned |
5293 | ScalarEvolution::getSmallConstantTripMultiple(Loop *L, |
5294 | BasicBlock *ExitingBlock) { |
5295 | assert(ExitingBlock && "Must pass a non-null exiting block!")((ExitingBlock && "Must pass a non-null exiting block!" ) ? static_cast<void> (0) : __assert_fail ("ExitingBlock && \"Must pass a non-null exiting block!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5295, __PRETTY_FUNCTION__)); |
5296 | assert(L->isLoopExiting(ExitingBlock) &&((L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!" ) ? static_cast<void> (0) : __assert_fail ("L->isLoopExiting(ExitingBlock) && \"Exiting block must actually branch out of the loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5297, __PRETTY_FUNCTION__)) |
5297 | "Exiting block must actually branch out of the loop!")((L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!" ) ? static_cast<void> (0) : __assert_fail ("L->isLoopExiting(ExitingBlock) && \"Exiting block must actually branch out of the loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5297, __PRETTY_FUNCTION__)); |
5298 | const SCEV *ExitCount = getExitCount(L, ExitingBlock); |
5299 | if (ExitCount == getCouldNotCompute()) |
5300 | return 1; |
5301 | |
5302 | // Get the trip count from the BE count by adding 1. |
5303 | const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType())); |
5304 | // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt |
5305 | // to factor simple cases. |
5306 | if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul)) |
5307 | TCMul = Mul->getOperand(0); |
5308 | |
5309 | const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul); |
5310 | if (!MulC) |
5311 | return 1; |
5312 | |
5313 | ConstantInt *Result = MulC->getValue(); |
5314 | |
5315 | // Guard against huge trip counts (this requires checking |
5316 | // for zero to handle the case where the trip count == -1 and the |
5317 | // addition wraps). |
5318 | if (!Result || Result->getValue().getActiveBits() > 32 || |
5319 | Result->getValue().getActiveBits() == 0) |
5320 | return 1; |
5321 | |
5322 | return (unsigned)Result->getZExtValue(); |
5323 | } |
5324 | |
5325 | /// Get the expression for the number of loop iterations for which this loop is |
5326 | /// guaranteed not to exit via ExitingBlock. Otherwise return |
5327 | /// SCEVCouldNotCompute. |
5328 | const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { |
5329 | return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); |
5330 | } |
5331 | |
5332 | const SCEV * |
5333 | ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, |
5334 | SCEVUnionPredicate &Preds) { |
5335 | return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds); |
5336 | } |
5337 | |
5338 | const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { |
5339 | return getBackedgeTakenInfo(L).getExact(this); |
5340 | } |
5341 | |
5342 | /// Similar to getBackedgeTakenCount, except return the least SCEV value that is |
5343 | /// known never to be less than the actual backedge taken count. |
5344 | const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { |
5345 | return getBackedgeTakenInfo(L).getMax(this); |
5346 | } |
5347 | |
5348 | /// Push PHI nodes in the header of the given loop onto the given Worklist. |
5349 | static void |
5350 | PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { |
5351 | BasicBlock *Header = L->getHeader(); |
5352 | |
5353 | // Push all Loop-header PHIs onto the Worklist stack. |
5354 | for (BasicBlock::iterator I = Header->begin(); |
5355 | PHINode *PN = dyn_cast<PHINode>(I); ++I) |
5356 | Worklist.push_back(PN); |
5357 | } |
5358 | |
5359 | const ScalarEvolution::BackedgeTakenInfo & |
5360 | ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { |
5361 | auto &BTI = getBackedgeTakenInfo(L); |
5362 | if (BTI.hasFullInfo()) |
5363 | return BTI; |
5364 | |
5365 | auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); |
5366 | |
5367 | if (!Pair.second) |
5368 | return Pair.first->second; |
5369 | |
5370 | BackedgeTakenInfo Result = |
5371 | computeBackedgeTakenCount(L, /*AllowPredicates=*/true); |
5372 | |
5373 | return PredicatedBackedgeTakenCounts.find(L)->second = Result; |
5374 | } |
5375 | |
5376 | const ScalarEvolution::BackedgeTakenInfo & |
5377 | ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { |
5378 | // Initially insert an invalid entry for this loop. If the insertion |
5379 | // succeeds, proceed to actually compute a backedge-taken count and |
5380 | // update the value. The temporary CouldNotCompute value tells SCEV |
5381 | // code elsewhere that it shouldn't attempt to request a new |
5382 | // backedge-taken count, which could result in infinite recursion. |
5383 | std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = |
5384 | BackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); |
5385 | if (!Pair.second) |
5386 | return Pair.first->second; |
5387 | |
5388 | // computeBackedgeTakenCount may allocate memory for its result. Inserting it |
5389 | // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result |
5390 | // must be cleared in this scope. |
5391 | BackedgeTakenInfo Result = computeBackedgeTakenCount(L); |
5392 | |
5393 | if (Result.getExact(this) != getCouldNotCompute()) { |
5394 | assert(isLoopInvariant(Result.getExact(this), L) &&((isLoopInvariant(Result.getExact(this), L) && isLoopInvariant (Result.getMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!" ) ? static_cast<void> (0) : __assert_fail ("isLoopInvariant(Result.getExact(this), L) && isLoopInvariant(Result.getMax(this), L) && \"Computed backedge-taken count isn't loop invariant for loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5396, __PRETTY_FUNCTION__)) |
5395 | isLoopInvariant(Result.getMax(this), L) &&((isLoopInvariant(Result.getExact(this), L) && isLoopInvariant (Result.getMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!" ) ? static_cast<void> (0) : __assert_fail ("isLoopInvariant(Result.getExact(this), L) && isLoopInvariant(Result.getMax(this), L) && \"Computed backedge-taken count isn't loop invariant for loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5396, __PRETTY_FUNCTION__)) |
5396 | "Computed backedge-taken count isn't loop invariant for loop!")((isLoopInvariant(Result.getExact(this), L) && isLoopInvariant (Result.getMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!" ) ? static_cast<void> (0) : __assert_fail ("isLoopInvariant(Result.getExact(this), L) && isLoopInvariant(Result.getMax(this), L) && \"Computed backedge-taken count isn't loop invariant for loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5396, __PRETTY_FUNCTION__)); |
5397 | ++NumTripCountsComputed; |
5398 | } |
5399 | else if (Result.getMax(this) == getCouldNotCompute() && |
5400 | isa<PHINode>(L->getHeader()->begin())) { |
5401 | // Only count loops that have phi nodes as not being computable. |
5402 | ++NumTripCountsNotComputed; |
5403 | } |
5404 | |
5405 | // Now that we know more about the trip count for this loop, forget any |
5406 | // existing SCEV values for PHI nodes in this loop since they are only |
5407 | // conservative estimates made without the benefit of trip count |
5408 | // information. This is similar to the code in forgetLoop, except that |
5409 | // it handles SCEVUnknown PHI nodes specially. |
5410 | if (Result.hasAnyInfo()) { |
5411 | SmallVector<Instruction *, 16> Worklist; |
5412 | PushLoopPHIs(L, Worklist); |
5413 | |
5414 | SmallPtrSet<Instruction *, 8> Visited; |
5415 | while (!Worklist.empty()) { |
5416 | Instruction *I = Worklist.pop_back_val(); |
5417 | if (!Visited.insert(I).second) |
5418 | continue; |
5419 | |
5420 | ValueExprMapType::iterator It = |
5421 | ValueExprMap.find_as(static_cast<Value *>(I)); |
5422 | if (It != ValueExprMap.end()) { |
5423 | const SCEV *Old = It->second; |
5424 | |
5425 | // SCEVUnknown for a PHI either means that it has an unrecognized |
5426 | // structure, or it's a PHI that's in the progress of being computed |
5427 | // by createNodeForPHI. In the former case, additional loop trip |
5428 | // count information isn't going to change anything. In the later |
5429 | // case, createNodeForPHI will perform the necessary updates on its |
5430 | // own when it gets to that point. |
5431 | if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) { |
5432 | forgetMemoizedResults(Old); |
5433 | ValueExprMap.erase(It); |
5434 | } |
5435 | if (PHINode *PN = dyn_cast<PHINode>(I)) |
5436 | ConstantEvolutionLoopExitValue.erase(PN); |
5437 | } |
5438 | |
5439 | PushDefUseChildren(I, Worklist); |
5440 | } |
5441 | } |
5442 | |
5443 | // Re-lookup the insert position, since the call to |
5444 | // computeBackedgeTakenCount above could result in a |
5445 | // recusive call to getBackedgeTakenInfo (on a different |
5446 | // loop), which would invalidate the iterator computed |
5447 | // earlier. |
5448 | return BackedgeTakenCounts.find(L)->second = Result; |
5449 | } |
5450 | |
5451 | void ScalarEvolution::forgetLoop(const Loop *L) { |
5452 | // Drop any stored trip count value. |
5453 | auto RemoveLoopFromBackedgeMap = |
5454 | [L](DenseMap<const Loop *, BackedgeTakenInfo> &Map) { |
5455 | auto BTCPos = Map.find(L); |
5456 | if (BTCPos != Map.end()) { |
5457 | BTCPos->second.clear(); |
5458 | Map.erase(BTCPos); |
5459 | } |
5460 | }; |
5461 | |
5462 | RemoveLoopFromBackedgeMap(BackedgeTakenCounts); |
5463 | RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); |
5464 | |
5465 | // Drop information about expressions based on loop-header PHIs. |
5466 | SmallVector<Instruction *, 16> Worklist; |
5467 | PushLoopPHIs(L, Worklist); |
5468 | |
5469 | SmallPtrSet<Instruction *, 8> Visited; |
5470 | while (!Worklist.empty()) { |
5471 | Instruction *I = Worklist.pop_back_val(); |
5472 | if (!Visited.insert(I).second) |
5473 | continue; |
5474 | |
5475 | ValueExprMapType::iterator It = |
5476 | ValueExprMap.find_as(static_cast<Value *>(I)); |
5477 | if (It != ValueExprMap.end()) { |
5478 | forgetMemoizedResults(It->second); |
5479 | ValueExprMap.erase(It); |
5480 | if (PHINode *PN = dyn_cast<PHINode>(I)) |
5481 | ConstantEvolutionLoopExitValue.erase(PN); |
5482 | } |
5483 | |
5484 | PushDefUseChildren(I, Worklist); |
5485 | } |
5486 | |
5487 | // Forget all contained loops too, to avoid dangling entries in the |
5488 | // ValuesAtScopes map. |
5489 | for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) |
5490 | forgetLoop(*I); |
5491 | |
5492 | LoopMayThrow.erase(L); |
5493 | } |
5494 | |
5495 | void ScalarEvolution::forgetValue(Value *V) { |
5496 | Instruction *I = dyn_cast<Instruction>(V); |
5497 | if (!I) return; |
5498 | |
5499 | // Drop information about expressions based on loop-header PHIs. |
5500 | SmallVector<Instruction *, 16> Worklist; |
5501 | Worklist.push_back(I); |
5502 | |
5503 | SmallPtrSet<Instruction *, 8> Visited; |
5504 | while (!Worklist.empty()) { |
5505 | I = Worklist.pop_back_val(); |
5506 | if (!Visited.insert(I).second) |
5507 | continue; |
5508 | |
5509 | ValueExprMapType::iterator It = |
5510 | ValueExprMap.find_as(static_cast<Value *>(I)); |
5511 | if (It != ValueExprMap.end()) { |
5512 | forgetMemoizedResults(It->second); |
5513 | ValueExprMap.erase(It); |
5514 | if (PHINode *PN = dyn_cast<PHINode>(I)) |
5515 | ConstantEvolutionLoopExitValue.erase(PN); |
5516 | } |
5517 | |
5518 | PushDefUseChildren(I, Worklist); |
5519 | } |
5520 | } |
5521 | |
5522 | /// Get the exact loop backedge taken count considering all loop exits. A |
5523 | /// computable result can only be returned for loops with a single exit. |
5524 | /// Returning the minimum taken count among all exits is incorrect because one |
5525 | /// of the loop's exit limit's may have been skipped. howFarToZero assumes that |
5526 | /// the limit of each loop test is never skipped. This is a valid assumption as |
5527 | /// long as the loop exits via that test. For precise results, it is the |
5528 | /// caller's responsibility to specify the relevant loop exit using |
5529 | /// getExact(ExitingBlock, SE). |
5530 | const SCEV * |
5531 | ScalarEvolution::BackedgeTakenInfo::getExact( |
5532 | ScalarEvolution *SE, SCEVUnionPredicate *Preds) const { |
5533 | // If any exits were not computable, the loop is not computable. |
5534 | if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); |
5535 | |
5536 | // We need exactly one computable exit. |
5537 | if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); |
5538 | assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info")((ExitNotTaken.ExactNotTaken && "uninitialized not-taken info" ) ? static_cast<void> (0) : __assert_fail ("ExitNotTaken.ExactNotTaken && \"uninitialized not-taken info\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5538, __PRETTY_FUNCTION__)); |
5539 | |
5540 | const SCEV *BECount = nullptr; |
5541 | for (auto &ENT : ExitNotTaken) { |
5542 | assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV")((ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV") ? static_cast<void> (0) : __assert_fail ("ENT.ExactNotTaken != SE->getCouldNotCompute() && \"bad exit SCEV\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5542, __PRETTY_FUNCTION__)); |
5543 | |
5544 | if (!BECount) |
5545 | BECount = ENT.ExactNotTaken; |
5546 | else if (BECount != ENT.ExactNotTaken) |
5547 | return SE->getCouldNotCompute(); |
5548 | if (Preds && ENT.getPred()) |
5549 | Preds->add(ENT.getPred()); |
5550 | |
5551 | assert((Preds || ENT.hasAlwaysTruePred()) &&(((Preds || ENT.hasAlwaysTruePred()) && "Predicate should be always true!" ) ? static_cast<void> (0) : __assert_fail ("(Preds || ENT.hasAlwaysTruePred()) && \"Predicate should be always true!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5552, __PRETTY_FUNCTION__)) |
5552 | "Predicate should be always true!")(((Preds || ENT.hasAlwaysTruePred()) && "Predicate should be always true!" ) ? static_cast<void> (0) : __assert_fail ("(Preds || ENT.hasAlwaysTruePred()) && \"Predicate should be always true!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5552, __PRETTY_FUNCTION__)); |
5553 | } |
5554 | |
5555 | assert(BECount && "Invalid not taken count for loop exit")((BECount && "Invalid not taken count for loop exit") ? static_cast<void> (0) : __assert_fail ("BECount && \"Invalid not taken count for loop exit\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5555, __PRETTY_FUNCTION__)); |
5556 | return BECount; |
5557 | } |
5558 | |
5559 | /// Get the exact not taken count for this loop exit. |
5560 | const SCEV * |
5561 | ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, |
5562 | ScalarEvolution *SE) const { |
5563 | for (auto &ENT : ExitNotTaken) |
5564 | if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred()) |
5565 | return ENT.ExactNotTaken; |
5566 | |
5567 | return SE->getCouldNotCompute(); |
5568 | } |
5569 | |
5570 | /// getMax - Get the max backedge taken count for the loop. |
5571 | const SCEV * |
5572 | ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { |
5573 | for (auto &ENT : ExitNotTaken) |
5574 | if (!ENT.hasAlwaysTruePred()) |
5575 | return SE->getCouldNotCompute(); |
5576 | |
5577 | return Max ? Max : SE->getCouldNotCompute(); |
5578 | } |
5579 | |
5580 | bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, |
5581 | ScalarEvolution *SE) const { |
5582 | if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) |
5583 | return true; |
5584 | |
5585 | if (!ExitNotTaken.ExitingBlock) |
5586 | return false; |
5587 | |
5588 | for (auto &ENT : ExitNotTaken) |
5589 | if (ENT.ExactNotTaken != SE->getCouldNotCompute() && |
5590 | SE->hasOperand(ENT.ExactNotTaken, S)) |
5591 | return true; |
5592 | |
5593 | return false; |
5594 | } |
5595 | |
5596 | /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each |
5597 | /// computable exit into a persistent ExitNotTakenInfo array. |
5598 | ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( |
5599 | SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, const SCEV *MaxCount) |
5600 | : Max(MaxCount) { |
5601 | |
5602 | if (!Complete) |
5603 | ExitNotTaken.setIncomplete(); |
5604 | |
5605 | unsigned NumExits = ExitCounts.size(); |
5606 | if (NumExits == 0) return; |
5607 | |
5608 | ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock; |
5609 | ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken; |
5610 | |
5611 | // Determine the number of ExitNotTakenExtras structures that we need. |
5612 | unsigned ExtraInfoSize = 0; |
5613 | if (NumExits > 1) |
5614 | ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()), |
5615 | ExitCounts.end(), [](EdgeInfo &Entry) { |
5616 | return !Entry.Pred.isAlwaysTrue(); |
5617 | }); |
5618 | else if (!ExitCounts[0].Pred.isAlwaysTrue()) |
5619 | ExtraInfoSize = 1; |
5620 | |
5621 | ExitNotTakenExtras *ENT = nullptr; |
5622 | |
5623 | // Allocate the ExitNotTakenExtras structures and initialize the first |
5624 | // element (ExitNotTaken). |
5625 | if (ExtraInfoSize > 0) { |
5626 | ENT = new ExitNotTakenExtras[ExtraInfoSize]; |
5627 | ExitNotTaken.ExtraInfo = &ENT[0]; |
5628 | *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred); |
5629 | } |
5630 | |
5631 | if (NumExits == 1) |
5632 | return; |
5633 | |
5634 | assert(ENT && "ExitNotTakenExtras is NULL while having more than one exit")((ENT && "ExitNotTakenExtras is NULL while having more than one exit" ) ? static_cast<void> (0) : __assert_fail ("ENT && \"ExitNotTakenExtras is NULL while having more than one exit\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5634, __PRETTY_FUNCTION__)); |
5635 | |
5636 | auto &Exits = ExitNotTaken.ExtraInfo->Exits; |
5637 | |
5638 | // Handle the rare case of multiple computable exits. |
5639 | for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) { |
5640 | ExitNotTakenExtras *Ptr = nullptr; |
5641 | if (!ExitCounts[i].Pred.isAlwaysTrue()) { |
5642 | Ptr = &ENT[PredPos++]; |
5643 | Ptr->Pred = std::move(ExitCounts[i].Pred); |
5644 | } |
5645 | |
5646 | Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr); |
5647 | } |
5648 | } |
5649 | |
5650 | /// Invalidate this result and free the ExitNotTakenInfo array. |
5651 | void ScalarEvolution::BackedgeTakenInfo::clear() { |
5652 | ExitNotTaken.ExitingBlock = nullptr; |
5653 | ExitNotTaken.ExactNotTaken = nullptr; |
5654 | delete[] ExitNotTaken.ExtraInfo; |
5655 | } |
5656 | |
5657 | /// Compute the number of times the backedge of the specified loop will execute. |
5658 | ScalarEvolution::BackedgeTakenInfo |
5659 | ScalarEvolution::computeBackedgeTakenCount(const Loop *L, |
5660 | bool AllowPredicates) { |
5661 | SmallVector<BasicBlock *, 8> ExitingBlocks; |
5662 | L->getExitingBlocks(ExitingBlocks); |
5663 | |
5664 | SmallVector<EdgeInfo, 4> ExitCounts; |
5665 | bool CouldComputeBECount = true; |
5666 | BasicBlock *Latch = L->getLoopLatch(); // may be NULL. |
5667 | const SCEV *MustExitMaxBECount = nullptr; |
5668 | const SCEV *MayExitMaxBECount = nullptr; |
5669 | |
5670 | // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts |
5671 | // and compute maxBECount. |
5672 | // Do a union of all the predicates here. |
5673 | for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { |
5674 | BasicBlock *ExitBB = ExitingBlocks[i]; |
5675 | ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); |
5676 | |
5677 | assert((AllowPredicates || EL.Pred.isAlwaysTrue()) &&(((AllowPredicates || EL.Pred.isAlwaysTrue()) && "Predicated exit limit when predicates are not allowed!" ) ? static_cast<void> (0) : __assert_fail ("(AllowPredicates || EL.Pred.isAlwaysTrue()) && \"Predicated exit limit when predicates are not allowed!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5678, __PRETTY_FUNCTION__)) |
5678 | "Predicated exit limit when predicates are not allowed!")(((AllowPredicates || EL.Pred.isAlwaysTrue()) && "Predicated exit limit when predicates are not allowed!" ) ? static_cast<void> (0) : __assert_fail ("(AllowPredicates || EL.Pred.isAlwaysTrue()) && \"Predicated exit limit when predicates are not allowed!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5678, __PRETTY_FUNCTION__)); |
5679 | |
5680 | // 1. For each exit that can be computed, add an entry to ExitCounts. |
5681 | // CouldComputeBECount is true only if all exits can be computed. |
5682 | if (EL.Exact == getCouldNotCompute()) |
5683 | // We couldn't compute an exact value for this exit, so |
5684 | // we won't be able to compute an exact value for the loop. |
5685 | CouldComputeBECount = false; |
5686 | else |
5687 | ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred)); |
5688 | |
5689 | // 2. Derive the loop's MaxBECount from each exit's max number of |
5690 | // non-exiting iterations. Partition the loop exits into two kinds: |
5691 | // LoopMustExits and LoopMayExits. |
5692 | // |
5693 | // If the exit dominates the loop latch, it is a LoopMustExit otherwise it |
5694 | // is a LoopMayExit. If any computable LoopMustExit is found, then |
5695 | // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise, |
5696 | // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is |
5697 | // considered greater than any computable EL.Max. |
5698 | if (EL.Max != getCouldNotCompute() && Latch && |
5699 | DT.dominates(ExitBB, Latch)) { |
5700 | if (!MustExitMaxBECount) |
5701 | MustExitMaxBECount = EL.Max; |
5702 | else { |
5703 | MustExitMaxBECount = |
5704 | getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max); |
5705 | } |
5706 | } else if (MayExitMaxBECount != getCouldNotCompute()) { |
5707 | if (!MayExitMaxBECount || EL.Max == getCouldNotCompute()) |
5708 | MayExitMaxBECount = EL.Max; |
5709 | else { |
5710 | MayExitMaxBECount = |
5711 | getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max); |
5712 | } |
5713 | } |
5714 | } |
5715 | const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : |
5716 | (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); |
5717 | return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); |
5718 | } |
5719 | |
5720 | ScalarEvolution::ExitLimit |
5721 | ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, |
5722 | bool AllowPredicates) { |
5723 | |
5724 | // Okay, we've chosen an exiting block. See what condition causes us to exit |
5725 | // at this block and remember the exit block and whether all other targets |
5726 | // lead to the loop header. |
5727 | bool MustExecuteLoopHeader = true; |
5728 | BasicBlock *Exit = nullptr; |
5729 | for (auto *SBB : successors(ExitingBlock)) |
5730 | if (!L->contains(SBB)) { |
5731 | if (Exit) // Multiple exit successors. |
5732 | return getCouldNotCompute(); |
5733 | Exit = SBB; |
5734 | } else if (SBB != L->getHeader()) { |
5735 | MustExecuteLoopHeader = false; |
5736 | } |
5737 | |
5738 | // At this point, we know we have a conditional branch that determines whether |
5739 | // the loop is exited. However, we don't know if the branch is executed each |
5740 | // time through the loop. If not, then the execution count of the branch will |
5741 | // not be equal to the trip count of the loop. |
5742 | // |
5743 | // Currently we check for this by checking to see if the Exit branch goes to |
5744 | // the loop header. If so, we know it will always execute the same number of |
5745 | // times as the loop. We also handle the case where the exit block *is* the |
5746 | // loop header. This is common for un-rotated loops. |
5747 | // |
5748 | // If both of those tests fail, walk up the unique predecessor chain to the |
5749 | // header, stopping if there is an edge that doesn't exit the loop. If the |
5750 | // header is reached, the execution count of the branch will be equal to the |
5751 | // trip count of the loop. |
5752 | // |
5753 | // More extensive analysis could be done to handle more cases here. |
5754 | // |
5755 | if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) { |
5756 | // The simple checks failed, try climbing the unique predecessor chain |
5757 | // up to the header. |
5758 | bool Ok = false; |
5759 | for (BasicBlock *BB = ExitingBlock; BB; ) { |
5760 | BasicBlock *Pred = BB->getUniquePredecessor(); |
5761 | if (!Pred) |
5762 | return getCouldNotCompute(); |
5763 | TerminatorInst *PredTerm = Pred->getTerminator(); |
5764 | for (const BasicBlock *PredSucc : PredTerm->successors()) { |
5765 | if (PredSucc == BB) |
5766 | continue; |
5767 | // If the predecessor has a successor that isn't BB and isn't |
5768 | // outside the loop, assume the worst. |
5769 | if (L->contains(PredSucc)) |
5770 | return getCouldNotCompute(); |
5771 | } |
5772 | if (Pred == L->getHeader()) { |
5773 | Ok = true; |
5774 | break; |
5775 | } |
5776 | BB = Pred; |
5777 | } |
5778 | if (!Ok) |
5779 | return getCouldNotCompute(); |
5780 | } |
5781 | |
5782 | bool IsOnlyExit = (L->getExitingBlock() != nullptr); |
5783 | TerminatorInst *Term = ExitingBlock->getTerminator(); |
5784 | if (BranchInst *BI = dyn_cast<BranchInst>(Term)) { |
5785 | assert(BI->isConditional() && "If unconditional, it can't be in loop!")((BI->isConditional() && "If unconditional, it can't be in loop!" ) ? static_cast<void> (0) : __assert_fail ("BI->isConditional() && \"If unconditional, it can't be in loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5785, __PRETTY_FUNCTION__)); |
5786 | // Proceed to the next level to examine the exit condition expression. |
5787 | return computeExitLimitFromCond( |
5788 | L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), |
5789 | /*ControlsExit=*/IsOnlyExit, AllowPredicates); |
5790 | } |
5791 | |
5792 | if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) |
5793 | return computeExitLimitFromSingleExitSwitch(L, SI, Exit, |
5794 | /*ControlsExit=*/IsOnlyExit); |
5795 | |
5796 | return getCouldNotCompute(); |
5797 | } |
5798 | |
5799 | ScalarEvolution::ExitLimit |
5800 | ScalarEvolution::computeExitLimitFromCond(const Loop *L, |
5801 | Value *ExitCond, |
5802 | BasicBlock *TBB, |
5803 | BasicBlock *FBB, |
5804 | bool ControlsExit, |
5805 | bool AllowPredicates) { |
5806 | // Check if the controlling expression for this loop is an And or Or. |
5807 | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { |
5808 | if (BO->getOpcode() == Instruction::And) { |
5809 | // Recurse on the operands of the and. |
5810 | bool EitherMayExit = L->contains(TBB); |
5811 | ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, |
5812 | ControlsExit && !EitherMayExit, |
5813 | AllowPredicates); |
5814 | ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, |
5815 | ControlsExit && !EitherMayExit, |
5816 | AllowPredicates); |
5817 | const SCEV *BECount = getCouldNotCompute(); |
5818 | const SCEV *MaxBECount = getCouldNotCompute(); |
5819 | if (EitherMayExit) { |
5820 | // Both conditions must be true for the loop to continue executing. |
5821 | // Choose the less conservative count. |
5822 | if (EL0.Exact == getCouldNotCompute() || |
5823 | EL1.Exact == getCouldNotCompute()) |
5824 | BECount = getCouldNotCompute(); |
5825 | else |
5826 | BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); |
5827 | if (EL0.Max == getCouldNotCompute()) |
5828 | MaxBECount = EL1.Max; |
5829 | else if (EL1.Max == getCouldNotCompute()) |
5830 | MaxBECount = EL0.Max; |
5831 | else |
5832 | MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); |
5833 | } else { |
5834 | // Both conditions must be true at the same time for the loop to exit. |
5835 | // For now, be conservative. |
5836 | assert(L->contains(FBB) && "Loop block has no successor in loop!")((L->contains(FBB) && "Loop block has no successor in loop!" ) ? static_cast<void> (0) : __assert_fail ("L->contains(FBB) && \"Loop block has no successor in loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5836, __PRETTY_FUNCTION__)); |
5837 | if (EL0.Max == EL1.Max) |
5838 | MaxBECount = EL0.Max; |
5839 | if (EL0.Exact == EL1.Exact) |
5840 | BECount = EL0.Exact; |
5841 | } |
5842 | |
5843 | SCEVUnionPredicate NP; |
5844 | NP.add(&EL0.Pred); |
5845 | NP.add(&EL1.Pred); |
5846 | // There are cases (e.g. PR26207) where computeExitLimitFromCond is able |
5847 | // to be more aggressive when computing BECount than when computing |
5848 | // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact |
5849 | // to match, but for EL0.Max and EL1.Max to not. |
5850 | if (isa<SCEVCouldNotCompute>(MaxBECount) && |
5851 | !isa<SCEVCouldNotCompute>(BECount)) |
5852 | MaxBECount = BECount; |
5853 | |
5854 | return ExitLimit(BECount, MaxBECount, NP); |
5855 | } |
5856 | if (BO->getOpcode() == Instruction::Or) { |
5857 | // Recurse on the operands of the or. |
5858 | bool EitherMayExit = L->contains(FBB); |
5859 | ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, |
5860 | ControlsExit && !EitherMayExit, |
5861 | AllowPredicates); |
5862 | ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, |
5863 | ControlsExit && !EitherMayExit, |
5864 | AllowPredicates); |
5865 | const SCEV *BECount = getCouldNotCompute(); |
5866 | const SCEV *MaxBECount = getCouldNotCompute(); |
5867 | if (EitherMayExit) { |
5868 | // Both conditions must be false for the loop to continue executing. |
5869 | // Choose the less conservative count. |
5870 | if (EL0.Exact == getCouldNotCompute() || |
5871 | EL1.Exact == getCouldNotCompute()) |
5872 | BECount = getCouldNotCompute(); |
5873 | else |
5874 | BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); |
5875 | if (EL0.Max == getCouldNotCompute()) |
5876 | MaxBECount = EL1.Max; |
5877 | else if (EL1.Max == getCouldNotCompute()) |
5878 | MaxBECount = EL0.Max; |
5879 | else |
5880 | MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); |
5881 | } else { |
5882 | // Both conditions must be false at the same time for the loop to exit. |
5883 | // For now, be conservative. |
5884 | assert(L->contains(TBB) && "Loop block has no successor in loop!")((L->contains(TBB) && "Loop block has no successor in loop!" ) ? static_cast<void> (0) : __assert_fail ("L->contains(TBB) && \"Loop block has no successor in loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 5884, __PRETTY_FUNCTION__)); |
5885 | if (EL0.Max == EL1.Max) |
5886 | MaxBECount = EL0.Max; |
5887 | if (EL0.Exact == EL1.Exact) |
5888 | BECount = EL0.Exact; |
5889 | } |
5890 | |
5891 | SCEVUnionPredicate NP; |
5892 | NP.add(&EL0.Pred); |
5893 | NP.add(&EL1.Pred); |
5894 | return ExitLimit(BECount, MaxBECount, NP); |
5895 | } |
5896 | } |
5897 | |
5898 | // With an icmp, it may be feasible to compute an exact backedge-taken count. |
5899 | // Proceed to the next level to examine the icmp. |
5900 | if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) { |
5901 | ExitLimit EL = |
5902 | computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); |
5903 | if (EL.hasFullInfo() || !AllowPredicates) |
5904 | return EL; |
5905 | |
5906 | // Try again, but use SCEV predicates this time. |
5907 | return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit, |
5908 | /*AllowPredicates=*/true); |
5909 | } |
5910 | |
5911 | // Check for a constant condition. These are normally stripped out by |
5912 | // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to |
5913 | // preserve the CFG and is temporarily leaving constant conditions |
5914 | // in place. |
5915 | if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { |
5916 | if (L->contains(FBB) == !CI->getZExtValue()) |
5917 | // The backedge is always taken. |
5918 | return getCouldNotCompute(); |
5919 | else |
5920 | // The backedge is never taken. |
5921 | return getZero(CI->getType()); |
5922 | } |
5923 | |
5924 | // If it's not an integer or pointer comparison then compute it the hard way. |
5925 | return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); |
5926 | } |
5927 | |
5928 | ScalarEvolution::ExitLimit |
5929 | ScalarEvolution::computeExitLimitFromICmp(const Loop *L, |
5930 | ICmpInst *ExitCond, |
5931 | BasicBlock *TBB, |
5932 | BasicBlock *FBB, |
5933 | bool ControlsExit, |
5934 | bool AllowPredicates) { |
5935 | |
5936 | // If the condition was exit on true, convert the condition to exit on false |
5937 | ICmpInst::Predicate Cond; |
5938 | if (!L->contains(FBB)) |
5939 | Cond = ExitCond->getPredicate(); |
5940 | else |
5941 | Cond = ExitCond->getInversePredicate(); |
5942 | |
5943 | // Handle common loops like: for (X = "string"; *X; ++X) |
5944 | if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) |
5945 | if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { |
5946 | ExitLimit ItCnt = |
5947 | computeLoadConstantCompareExitLimit(LI, RHS, L, Cond); |
5948 | if (ItCnt.hasAnyInfo()) |
5949 | return ItCnt; |
5950 | } |
5951 | |
5952 | ExitLimit ShiftEL = computeShiftCompareExitLimit( |
5953 | ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond); |
5954 | if (ShiftEL.hasAnyInfo()) |
5955 | return ShiftEL; |
5956 | |
5957 | const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); |
5958 | const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); |
5959 | |
5960 | // Try to evaluate any dependencies out of the loop. |
5961 | LHS = getSCEVAtScope(LHS, L); |
5962 | RHS = getSCEVAtScope(RHS, L); |
5963 | |
5964 | // At this point, we would like to compute how many iterations of the |
5965 | // loop the predicate will return true for these inputs. |
5966 | if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { |
5967 | // If there is a loop-invariant, force it into the RHS. |
5968 | std::swap(LHS, RHS); |
5969 | Cond = ICmpInst::getSwappedPredicate(Cond); |
5970 | } |
5971 | |
5972 | // Simplify the operands before analyzing them. |
5973 | (void)SimplifyICmpOperands(Cond, LHS, RHS); |
5974 | |
5975 | // If we have a comparison of a chrec against a constant, try to use value |
5976 | // ranges to answer this query. |
5977 | if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) |
5978 | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) |
5979 | if (AddRec->getLoop() == L) { |
5980 | // Form the constant range. |
5981 | ConstantRange CompRange( |
5982 | ICmpInst::makeConstantRange(Cond, RHSC->getAPInt())); |
5983 | |
5984 | const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); |
5985 | if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; |
5986 | } |
5987 | |
5988 | switch (Cond) { |
5989 | case ICmpInst::ICMP_NE: { // while (X != Y) |
5990 | // Convert to: while (X-Y != 0) |
5991 | ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, |
5992 | AllowPredicates); |
5993 | if (EL.hasAnyInfo()) return EL; |
5994 | break; |
5995 | } |
5996 | case ICmpInst::ICMP_EQ: { // while (X == Y) |
5997 | // Convert to: while (X-Y == 0) |
5998 | ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L); |
5999 | if (EL.hasAnyInfo()) return EL; |
6000 | break; |
6001 | } |
6002 | case ICmpInst::ICMP_SLT: |
6003 | case ICmpInst::ICMP_ULT: { // while (X < Y) |
6004 | bool IsSigned = Cond == ICmpInst::ICMP_SLT; |
6005 | ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, |
6006 | AllowPredicates); |
6007 | if (EL.hasAnyInfo()) return EL; |
6008 | break; |
6009 | } |
6010 | case ICmpInst::ICMP_SGT: |
6011 | case ICmpInst::ICMP_UGT: { // while (X > Y) |
6012 | bool IsSigned = Cond == ICmpInst::ICMP_SGT; |
6013 | ExitLimit EL = |
6014 | howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, |
6015 | AllowPredicates); |
6016 | if (EL.hasAnyInfo()) return EL; |
6017 | break; |
6018 | } |
6019 | default: |
6020 | break; |
6021 | } |
6022 | return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); |
6023 | } |
6024 | |
6025 | ScalarEvolution::ExitLimit |
6026 | ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, |
6027 | SwitchInst *Switch, |
6028 | BasicBlock *ExitingBlock, |
6029 | bool ControlsExit) { |
6030 | assert(!L->contains(ExitingBlock) && "Not an exiting block!")((!L->contains(ExitingBlock) && "Not an exiting block!" ) ? static_cast<void> (0) : __assert_fail ("!L->contains(ExitingBlock) && \"Not an exiting block!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6030, __PRETTY_FUNCTION__)); |
6031 | |
6032 | // Give up if the exit is the default dest of a switch. |
6033 | if (Switch->getDefaultDest() == ExitingBlock) |
6034 | return getCouldNotCompute(); |
6035 | |
6036 | assert(L->contains(Switch->getDefaultDest()) &&((L->contains(Switch->getDefaultDest()) && "Default case must not exit the loop!" ) ? static_cast<void> (0) : __assert_fail ("L->contains(Switch->getDefaultDest()) && \"Default case must not exit the loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6037, __PRETTY_FUNCTION__)) |
6037 | "Default case must not exit the loop!")((L->contains(Switch->getDefaultDest()) && "Default case must not exit the loop!" ) ? static_cast<void> (0) : __assert_fail ("L->contains(Switch->getDefaultDest()) && \"Default case must not exit the loop!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6037, __PRETTY_FUNCTION__)); |
6038 | const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L); |
6039 | const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); |
6040 | |
6041 | // while (X != Y) --> while (X-Y != 0) |
6042 | ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); |
6043 | if (EL.hasAnyInfo()) |
6044 | return EL; |
6045 | |
6046 | return getCouldNotCompute(); |
6047 | } |
6048 | |
6049 | static ConstantInt * |
6050 | EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, |
6051 | ScalarEvolution &SE) { |
6052 | const SCEV *InVal = SE.getConstant(C); |
6053 | const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); |
6054 | assert(isa<SCEVConstant>(Val) &&((isa<SCEVConstant>(Val) && "Evaluation of SCEV at constant didn't fold correctly?" ) ? static_cast<void> (0) : __assert_fail ("isa<SCEVConstant>(Val) && \"Evaluation of SCEV at constant didn't fold correctly?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6055, __PRETTY_FUNCTION__)) |
6055 | "Evaluation of SCEV at constant didn't fold correctly?")((isa<SCEVConstant>(Val) && "Evaluation of SCEV at constant didn't fold correctly?" ) ? static_cast<void> (0) : __assert_fail ("isa<SCEVConstant>(Val) && \"Evaluation of SCEV at constant didn't fold correctly?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6055, __PRETTY_FUNCTION__)); |
6056 | return cast<SCEVConstant>(Val)->getValue(); |
6057 | } |
6058 | |
6059 | /// Given an exit condition of 'icmp op load X, cst', try to see if we can |
6060 | /// compute the backedge execution count. |
6061 | ScalarEvolution::ExitLimit |
6062 | ScalarEvolution::computeLoadConstantCompareExitLimit( |
6063 | LoadInst *LI, |
6064 | Constant *RHS, |
6065 | const Loop *L, |
6066 | ICmpInst::Predicate predicate) { |
6067 | |
6068 | if (LI->isVolatile()) return getCouldNotCompute(); |
6069 | |
6070 | // Check to see if the loaded pointer is a getelementptr of a global. |
6071 | // TODO: Use SCEV instead of manually grubbing with GEPs. |
6072 | GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); |
6073 | if (!GEP) return getCouldNotCompute(); |
6074 | |
6075 | // Make sure that it is really a constant global we are gepping, with an |
6076 | // initializer, and make sure the first IDX is really 0. |
6077 | GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); |
6078 | if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || |
6079 | GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || |
6080 | !cast<Constant>(GEP->getOperand(1))->isNullValue()) |
6081 | return getCouldNotCompute(); |
6082 | |
6083 | // Okay, we allow one non-constant index into the GEP instruction. |
6084 | Value *VarIdx = nullptr; |
6085 | std::vector<Constant*> Indexes; |
6086 | unsigned VarIdxNum = 0; |
6087 | for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) |
6088 | if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { |
6089 | Indexes.push_back(CI); |
6090 | } else if (!isa<ConstantInt>(GEP->getOperand(i))) { |
6091 | if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. |
6092 | VarIdx = GEP->getOperand(i); |
6093 | VarIdxNum = i-2; |
6094 | Indexes.push_back(nullptr); |
6095 | } |
6096 | |
6097 | // Loop-invariant loads may be a byproduct of loop optimization. Skip them. |
6098 | if (!VarIdx) |
6099 | return getCouldNotCompute(); |
6100 | |
6101 | // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. |
6102 | // Check to see if X is a loop variant variable value now. |
6103 | const SCEV *Idx = getSCEV(VarIdx); |
6104 | Idx = getSCEVAtScope(Idx, L); |
6105 | |
6106 | // We can only recognize very limited forms of loop index expressions, in |
6107 | // particular, only affine AddRec's like {C1,+,C2}. |
6108 | const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); |
6109 | if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) || |
6110 | !isa<SCEVConstant>(IdxExpr->getOperand(0)) || |
6111 | !isa<SCEVConstant>(IdxExpr->getOperand(1))) |
6112 | return getCouldNotCompute(); |
6113 | |
6114 | unsigned MaxSteps = MaxBruteForceIterations; |
6115 | for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { |
6116 | ConstantInt *ItCst = ConstantInt::get( |
6117 | cast<IntegerType>(IdxExpr->getType()), IterationNum); |
6118 | ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); |
6119 | |
6120 | // Form the GEP offset. |
6121 | Indexes[VarIdxNum] = Val; |
6122 | |
6123 | Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), |
6124 | Indexes); |
6125 | if (!Result) break; // Cannot compute! |
6126 | |
6127 | // Evaluate the condition for this iteration. |
6128 | Result = ConstantExpr::getICmp(predicate, Result, RHS); |
6129 | if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure |
6130 | if (cast<ConstantInt>(Result)->getValue().isMinValue()) { |
6131 | ++NumArrayLenItCounts; |
6132 | return getConstant(ItCst); // Found terminating iteration! |
6133 | } |
6134 | } |
6135 | return getCouldNotCompute(); |
6136 | } |
6137 | |
6138 | ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( |
6139 | Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { |
6140 | ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV); |
6141 | if (!RHS) |
6142 | return getCouldNotCompute(); |
6143 | |
6144 | const BasicBlock *Latch = L->getLoopLatch(); |
6145 | if (!Latch) |
6146 | return getCouldNotCompute(); |
6147 | |
6148 | const BasicBlock *Predecessor = L->getLoopPredecessor(); |
6149 | if (!Predecessor) |
6150 | return getCouldNotCompute(); |
6151 | |
6152 | // Return true if V is of the form "LHS `shift_op` <positive constant>". |
6153 | // Return LHS in OutLHS and shift_opt in OutOpCode. |
6154 | auto MatchPositiveShift = |
6155 | [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { |
6156 | |
6157 | using namespace PatternMatch; |
6158 | |
6159 | ConstantInt *ShiftAmt; |
6160 | if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) |
6161 | OutOpCode = Instruction::LShr; |
6162 | else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) |
6163 | OutOpCode = Instruction::AShr; |
6164 | else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) |
6165 | OutOpCode = Instruction::Shl; |
6166 | else |
6167 | return false; |
6168 | |
6169 | return ShiftAmt->getValue().isStrictlyPositive(); |
6170 | }; |
6171 | |
6172 | // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in |
6173 | // |
6174 | // loop: |
6175 | // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] |
6176 | // %iv.shifted = lshr i32 %iv, <positive constant> |
6177 | // |
6178 | // Return true on a succesful match. Return the corresponding PHI node (%iv |
6179 | // above) in PNOut and the opcode of the shift operation in OpCodeOut. |
6180 | auto MatchShiftRecurrence = |
6181 | [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { |
6182 | Optional<Instruction::BinaryOps> PostShiftOpCode; |
6183 | |
6184 | { |
6185 | Instruction::BinaryOps OpC; |
6186 | Value *V; |
6187 | |
6188 | // If we encounter a shift instruction, "peel off" the shift operation, |
6189 | // and remember that we did so. Later when we inspect %iv's backedge |
6190 | // value, we will make sure that the backedge value uses the same |
6191 | // operation. |
6192 | // |
6193 | // Note: the peeled shift operation does not have to be the same |
6194 | // instruction as the one feeding into the PHI's backedge value. We only |
6195 | // really care about it being the same *kind* of shift instruction -- |
6196 | // that's all that is required for our later inferences to hold. |
6197 | if (MatchPositiveShift(LHS, V, OpC)) { |
6198 | PostShiftOpCode = OpC; |
6199 | LHS = V; |
6200 | } |
6201 | } |
6202 | |
6203 | PNOut = dyn_cast<PHINode>(LHS); |
6204 | if (!PNOut || PNOut->getParent() != L->getHeader()) |
6205 | return false; |
6206 | |
6207 | Value *BEValue = PNOut->getIncomingValueForBlock(Latch); |
6208 | Value *OpLHS; |
6209 | |
6210 | return |
6211 | // The backedge value for the PHI node must be a shift by a positive |
6212 | // amount |
6213 | MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && |
6214 | |
6215 | // of the PHI node itself |
6216 | OpLHS == PNOut && |
6217 | |
6218 | // and the kind of shift should be match the kind of shift we peeled |
6219 | // off, if any. |
6220 | (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut); |
6221 | }; |
6222 | |
6223 | PHINode *PN; |
6224 | Instruction::BinaryOps OpCode; |
6225 | if (!MatchShiftRecurrence(LHS, PN, OpCode)) |
6226 | return getCouldNotCompute(); |
6227 | |
6228 | const DataLayout &DL = getDataLayout(); |
6229 | |
6230 | // The key rationale for this optimization is that for some kinds of shift |
6231 | // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 |
6232 | // within a finite number of iterations. If the condition guarding the |
6233 | // backedge (in the sense that the backedge is taken if the condition is true) |
6234 | // is false for the value the shift recurrence stabilizes to, then we know |
6235 | // that the backedge is taken only a finite number of times. |
6236 | |
6237 | ConstantInt *StableValue = nullptr; |
6238 | switch (OpCode) { |
6239 | default: |
6240 | llvm_unreachable("Impossible case!")::llvm::llvm_unreachable_internal("Impossible case!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6240); |
6241 | |
6242 | case Instruction::AShr: { |
6243 | // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most |
6244 | // bitwidth(K) iterations. |
6245 | Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); |
6246 | bool KnownZero, KnownOne; |
6247 | ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, |
6248 | Predecessor->getTerminator(), &DT); |
6249 | auto *Ty = cast<IntegerType>(RHS->getType()); |
6250 | if (KnownZero) |
6251 | StableValue = ConstantInt::get(Ty, 0); |
6252 | else if (KnownOne) |
6253 | StableValue = ConstantInt::get(Ty, -1, true); |
6254 | else |
6255 | return getCouldNotCompute(); |
6256 | |
6257 | break; |
6258 | } |
6259 | case Instruction::LShr: |
6260 | case Instruction::Shl: |
6261 | // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>} |
6262 | // stabilize to 0 in at most bitwidth(K) iterations. |
6263 | StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0); |
6264 | break; |
6265 | } |
6266 | |
6267 | auto *Result = |
6268 | ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); |
6269 | assert(Result->getType()->isIntegerTy(1) &&((Result->getType()->isIntegerTy(1) && "Otherwise cannot be an operand to a branch instruction" ) ? static_cast<void> (0) : __assert_fail ("Result->getType()->isIntegerTy(1) && \"Otherwise cannot be an operand to a branch instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6270, __PRETTY_FUNCTION__)) |
6270 | "Otherwise cannot be an operand to a branch instruction")((Result->getType()->isIntegerTy(1) && "Otherwise cannot be an operand to a branch instruction" ) ? static_cast<void> (0) : __assert_fail ("Result->getType()->isIntegerTy(1) && \"Otherwise cannot be an operand to a branch instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6270, __PRETTY_FUNCTION__)); |
6271 | |
6272 | if (Result->isZeroValue()) { |
6273 | unsigned BitWidth = getTypeSizeInBits(RHS->getType()); |
6274 | const SCEV *UpperBound = |
6275 | getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); |
6276 | SCEVUnionPredicate P; |
6277 | return ExitLimit(getCouldNotCompute(), UpperBound, P); |
6278 | } |
6279 | |
6280 | return getCouldNotCompute(); |
6281 | } |
6282 | |
6283 | /// Return true if we can constant fold an instruction of the specified type, |
6284 | /// assuming that all operands were constants. |
6285 | static bool CanConstantFold(const Instruction *I) { |
6286 | if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || |
6287 | isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) || |
6288 | isa<LoadInst>(I)) |
6289 | return true; |
6290 | |
6291 | if (const CallInst *CI = dyn_cast<CallInst>(I)) |
6292 | if (const Function *F = CI->getCalledFunction()) |
6293 | return canConstantFoldCallTo(F); |
6294 | return false; |
6295 | } |
6296 | |
6297 | /// Determine whether this instruction can constant evolve within this loop |
6298 | /// assuming its operands can all constant evolve. |
6299 | static bool canConstantEvolve(Instruction *I, const Loop *L) { |
6300 | // An instruction outside of the loop can't be derived from a loop PHI. |
6301 | if (!L->contains(I)) return false; |
6302 | |
6303 | if (isa<PHINode>(I)) { |
6304 | // We don't currently keep track of the control flow needed to evaluate |
6305 | // PHIs, so we cannot handle PHIs inside of loops. |
6306 | return L->getHeader() == I->getParent(); |
6307 | } |
6308 | |
6309 | // If we won't be able to constant fold this expression even if the operands |
6310 | // are constants, bail early. |
6311 | return CanConstantFold(I); |
6312 | } |
6313 | |
6314 | /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by |
6315 | /// recursing through each instruction operand until reaching a loop header phi. |
6316 | static PHINode * |
6317 | getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, |
6318 | DenseMap<Instruction *, PHINode *> &PHIMap) { |
6319 | |
6320 | // Otherwise, we can evaluate this instruction if all of its operands are |
6321 | // constant or derived from a PHI node themselves. |
6322 | PHINode *PHI = nullptr; |
6323 | for (Value *Op : UseInst->operands()) { |
6324 | if (isa<Constant>(Op)) continue; |
6325 | |
6326 | Instruction *OpInst = dyn_cast<Instruction>(Op); |
6327 | if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; |
6328 | |
6329 | PHINode *P = dyn_cast<PHINode>(OpInst); |
6330 | if (!P) |
6331 | // If this operand is already visited, reuse the prior result. |
6332 | // We may have P != PHI if this is the deepest point at which the |
6333 | // inconsistent paths meet. |
6334 | P = PHIMap.lookup(OpInst); |
6335 | if (!P) { |
6336 | // Recurse and memoize the results, whether a phi is found or not. |
6337 | // This recursive call invalidates pointers into PHIMap. |
6338 | P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); |
6339 | PHIMap[OpInst] = P; |
6340 | } |
6341 | if (!P) |
6342 | return nullptr; // Not evolving from PHI |
6343 | if (PHI && PHI != P) |
6344 | return nullptr; // Evolving from multiple different PHIs. |
6345 | PHI = P; |
6346 | } |
6347 | // This is a expression evolving from a constant PHI! |
6348 | return PHI; |
6349 | } |
6350 | |
6351 | /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node |
6352 | /// in the loop that V is derived from. We allow arbitrary operations along the |
6353 | /// way, but the operands of an operation must either be constants or a value |
6354 | /// derived from a constant PHI. If this expression does not fit with these |
6355 | /// constraints, return null. |
6356 | static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { |
6357 | Instruction *I = dyn_cast<Instruction>(V); |
6358 | if (!I || !canConstantEvolve(I, L)) return nullptr; |
6359 | |
6360 | if (PHINode *PN = dyn_cast<PHINode>(I)) |
6361 | return PN; |
6362 | |
6363 | // Record non-constant instructions contained by the loop. |
6364 | DenseMap<Instruction *, PHINode *> PHIMap; |
6365 | return getConstantEvolvingPHIOperands(I, L, PHIMap); |
6366 | } |
6367 | |
6368 | /// EvaluateExpression - Given an expression that passes the |
6369 | /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node |
6370 | /// in the loop has the value PHIVal. If we can't fold this expression for some |
6371 | /// reason, return null. |
6372 | static Constant *EvaluateExpression(Value *V, const Loop *L, |
6373 | DenseMap<Instruction *, Constant *> &Vals, |
6374 | const DataLayout &DL, |
6375 | const TargetLibraryInfo *TLI) { |
6376 | // Convenient constant check, but redundant for recursive calls. |
6377 | if (Constant *C = dyn_cast<Constant>(V)) return C; |
6378 | Instruction *I = dyn_cast<Instruction>(V); |
6379 | if (!I) return nullptr; |
6380 | |
6381 | if (Constant *C = Vals.lookup(I)) return C; |
6382 | |
6383 | // An instruction inside the loop depends on a value outside the loop that we |
6384 | // weren't given a mapping for, or a value such as a call inside the loop. |
6385 | if (!canConstantEvolve(I, L)) return nullptr; |
6386 | |
6387 | // An unmapped PHI can be due to a branch or another loop inside this loop, |
6388 | // or due to this not being the initial iteration through a loop where we |
6389 | // couldn't compute the evolution of this particular PHI last time. |
6390 | if (isa<PHINode>(I)) return nullptr; |
6391 | |
6392 | std::vector<Constant*> Operands(I->getNumOperands()); |
6393 | |
6394 | for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { |
6395 | Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); |
6396 | if (!Operand) { |
6397 | Operands[i] = dyn_cast<Constant>(I->getOperand(i)); |
6398 | if (!Operands[i]) return nullptr; |
6399 | continue; |
6400 | } |
6401 | Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); |
6402 | Vals[Operand] = C; |
6403 | if (!C) return nullptr; |
6404 | Operands[i] = C; |
6405 | } |
6406 | |
6407 | if (CmpInst *CI = dyn_cast<CmpInst>(I)) |
6408 | return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], |
6409 | Operands[1], DL, TLI); |
6410 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { |
6411 | if (!LI->isVolatile()) |
6412 | return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); |
6413 | } |
6414 | return ConstantFoldInstOperands(I, Operands, DL, TLI); |
6415 | } |
6416 | |
6417 | |
6418 | // If every incoming value to PN except the one for BB is a specific Constant, |
6419 | // return that, else return nullptr. |
6420 | static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { |
6421 | Constant *IncomingVal = nullptr; |
6422 | |
6423 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { |
6424 | if (PN->getIncomingBlock(i) == BB) |
6425 | continue; |
6426 | |
6427 | auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i)); |
6428 | if (!CurrentVal) |
6429 | return nullptr; |
6430 | |
6431 | if (IncomingVal != CurrentVal) { |
6432 | if (IncomingVal) |
6433 | return nullptr; |
6434 | IncomingVal = CurrentVal; |
6435 | } |
6436 | } |
6437 | |
6438 | return IncomingVal; |
6439 | } |
6440 | |
6441 | /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is |
6442 | /// in the header of its containing loop, we know the loop executes a |
6443 | /// constant number of times, and the PHI node is just a recurrence |
6444 | /// involving constants, fold it. |
6445 | Constant * |
6446 | ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, |
6447 | const APInt &BEs, |
6448 | const Loop *L) { |
6449 | auto I = ConstantEvolutionLoopExitValue.find(PN); |
6450 | if (I != ConstantEvolutionLoopExitValue.end()) |
6451 | return I->second; |
6452 | |
6453 | if (BEs.ugt(MaxBruteForceIterations)) |
6454 | return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. |
6455 | |
6456 | Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; |
6457 | |
6458 | DenseMap<Instruction *, Constant *> CurrentIterVals; |
6459 | BasicBlock *Header = L->getHeader(); |
6460 | assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!")((PN->getParent() == Header && "Can't evaluate PHI not in loop header!" ) ? static_cast<void> (0) : __assert_fail ("PN->getParent() == Header && \"Can't evaluate PHI not in loop header!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6460, __PRETTY_FUNCTION__)); |
6461 | |
6462 | BasicBlock *Latch = L->getLoopLatch(); |
6463 | if (!Latch) |
6464 | return nullptr; |
6465 | |
6466 | for (auto &I : *Header) { |
6467 | PHINode *PHI = dyn_cast<PHINode>(&I); |
6468 | if (!PHI) break; |
6469 | auto *StartCST = getOtherIncomingValue(PHI, Latch); |
6470 | if (!StartCST) continue; |
6471 | CurrentIterVals[PHI] = StartCST; |
6472 | } |
6473 | if (!CurrentIterVals.count(PN)) |
6474 | return RetVal = nullptr; |
6475 | |
6476 | Value *BEValue = PN->getIncomingValueForBlock(Latch); |
6477 | |
6478 | // Execute the loop symbolically to determine the exit value. |
6479 | if (BEs.getActiveBits() >= 32) |
6480 | return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it! |
6481 | |
6482 | unsigned NumIterations = BEs.getZExtValue(); // must be in range |
6483 | unsigned IterationNum = 0; |
6484 | const DataLayout &DL = getDataLayout(); |
6485 | for (; ; ++IterationNum) { |
6486 | if (IterationNum == NumIterations) |
6487 | return RetVal = CurrentIterVals[PN]; // Got exit value! |
6488 | |
6489 | // Compute the value of the PHIs for the next iteration. |
6490 | // EvaluateExpression adds non-phi values to the CurrentIterVals map. |
6491 | DenseMap<Instruction *, Constant *> NextIterVals; |
6492 | Constant *NextPHI = |
6493 | EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); |
6494 | if (!NextPHI) |
6495 | return nullptr; // Couldn't evaluate! |
6496 | NextIterVals[PN] = NextPHI; |
6497 | |
6498 | bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; |
6499 | |
6500 | // Also evaluate the other PHI nodes. However, we don't get to stop if we |
6501 | // cease to be able to evaluate one of them or if they stop evolving, |
6502 | // because that doesn't necessarily prevent us from computing PN. |
6503 | SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; |
6504 | for (const auto &I : CurrentIterVals) { |
6505 | PHINode *PHI = dyn_cast<PHINode>(I.first); |
6506 | if (!PHI || PHI == PN || PHI->getParent() != Header) continue; |
6507 | PHIsToCompute.emplace_back(PHI, I.second); |
6508 | } |
6509 | // We use two distinct loops because EvaluateExpression may invalidate any |
6510 | // iterators into CurrentIterVals. |
6511 | for (const auto &I : PHIsToCompute) { |
6512 | PHINode *PHI = I.first; |
6513 | Constant *&NextPHI = NextIterVals[PHI]; |
6514 | if (!NextPHI) { // Not already computed. |
6515 | Value *BEValue = PHI->getIncomingValueForBlock(Latch); |
6516 | NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); |
6517 | } |
6518 | if (NextPHI != I.second) |
6519 | StoppedEvolving = false; |
6520 | } |
6521 | |
6522 | // If all entries in CurrentIterVals == NextIterVals then we can stop |
6523 | // iterating, the loop can't continue to change. |
6524 | if (StoppedEvolving) |
6525 | return RetVal = CurrentIterVals[PN]; |
6526 | |
6527 | CurrentIterVals.swap(NextIterVals); |
6528 | } |
6529 | } |
6530 | |
6531 | const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, |
6532 | Value *Cond, |
6533 | bool ExitWhen) { |
6534 | PHINode *PN = getConstantEvolvingPHI(Cond, L); |
6535 | if (!PN) return getCouldNotCompute(); |
6536 | |
6537 | // If the loop is canonicalized, the PHI will have exactly two entries. |
6538 | // That's the only form we support here. |
6539 | if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); |
6540 | |
6541 | DenseMap<Instruction *, Constant *> CurrentIterVals; |
6542 | BasicBlock *Header = L->getHeader(); |
6543 | assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!")((PN->getParent() == Header && "Can't evaluate PHI not in loop header!" ) ? static_cast<void> (0) : __assert_fail ("PN->getParent() == Header && \"Can't evaluate PHI not in loop header!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6543, __PRETTY_FUNCTION__)); |
6544 | |
6545 | BasicBlock *Latch = L->getLoopLatch(); |
6546 | assert(Latch && "Should follow from NumIncomingValues == 2!")((Latch && "Should follow from NumIncomingValues == 2!" ) ? static_cast<void> (0) : __assert_fail ("Latch && \"Should follow from NumIncomingValues == 2!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6546, __PRETTY_FUNCTION__)); |
6547 | |
6548 | for (auto &I : *Header) { |
6549 | PHINode *PHI = dyn_cast<PHINode>(&I); |
6550 | if (!PHI) |
6551 | break; |
6552 | auto *StartCST = getOtherIncomingValue(PHI, Latch); |
6553 | if (!StartCST) continue; |
6554 | CurrentIterVals[PHI] = StartCST; |
6555 | } |
6556 | if (!CurrentIterVals.count(PN)) |
6557 | return getCouldNotCompute(); |
6558 | |
6559 | // Okay, we find a PHI node that defines the trip count of this loop. Execute |
6560 | // the loop symbolically to determine when the condition gets a value of |
6561 | // "ExitWhen". |
6562 | unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. |
6563 | const DataLayout &DL = getDataLayout(); |
6564 | for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ |
6565 | auto *CondVal = dyn_cast_or_null<ConstantInt>( |
6566 | EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI)); |
6567 | |
6568 | // Couldn't symbolically evaluate. |
6569 | if (!CondVal) return getCouldNotCompute(); |
6570 | |
6571 | if (CondVal->getValue() == uint64_t(ExitWhen)) { |
6572 | ++NumBruteForceTripCountsComputed; |
6573 | return getConstant(Type::getInt32Ty(getContext()), IterationNum); |
6574 | } |
6575 | |
6576 | // Update all the PHI nodes for the next iteration. |
6577 | DenseMap<Instruction *, Constant *> NextIterVals; |
6578 | |
6579 | // Create a list of which PHIs we need to compute. We want to do this before |
6580 | // calling EvaluateExpression on them because that may invalidate iterators |
6581 | // into CurrentIterVals. |
6582 | SmallVector<PHINode *, 8> PHIsToCompute; |
6583 | for (const auto &I : CurrentIterVals) { |
6584 | PHINode *PHI = dyn_cast<PHINode>(I.first); |
6585 | if (!PHI || PHI->getParent() != Header) continue; |
6586 | PHIsToCompute.push_back(PHI); |
6587 | } |
6588 | for (PHINode *PHI : PHIsToCompute) { |
6589 | Constant *&NextPHI = NextIterVals[PHI]; |
6590 | if (NextPHI) continue; // Already computed! |
6591 | |
6592 | Value *BEValue = PHI->getIncomingValueForBlock(Latch); |
6593 | NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); |
6594 | } |
6595 | CurrentIterVals.swap(NextIterVals); |
6596 | } |
6597 | |
6598 | // Too many iterations were needed to evaluate. |
6599 | return getCouldNotCompute(); |
6600 | } |
6601 | |
6602 | const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { |
6603 | SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = |
6604 | ValuesAtScopes[V]; |
6605 | // Check to see if we've folded this expression at this loop before. |
6606 | for (auto &LS : Values) |
6607 | if (LS.first == L) |
6608 | return LS.second ? LS.second : V; |
6609 | |
6610 | Values.emplace_back(L, nullptr); |
6611 | |
6612 | // Otherwise compute it. |
6613 | const SCEV *C = computeSCEVAtScope(V, L); |
6614 | for (auto &LS : reverse(ValuesAtScopes[V])) |
6615 | if (LS.first == L) { |
6616 | LS.second = C; |
6617 | break; |
6618 | } |
6619 | return C; |
6620 | } |
6621 | |
6622 | /// This builds up a Constant using the ConstantExpr interface. That way, we |
6623 | /// will return Constants for objects which aren't represented by a |
6624 | /// SCEVConstant, because SCEVConstant is restricted to ConstantInt. |
6625 | /// Returns NULL if the SCEV isn't representable as a Constant. |
6626 | static Constant *BuildConstantFromSCEV(const SCEV *V) { |
6627 | switch (static_cast<SCEVTypes>(V->getSCEVType())) { |
6628 | case scCouldNotCompute: |
6629 | case scAddRecExpr: |
6630 | break; |
6631 | case scConstant: |
6632 | return cast<SCEVConstant>(V)->getValue(); |
6633 | case scUnknown: |
6634 | return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue()); |
6635 | case scSignExtend: { |
6636 | const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V); |
6637 | if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) |
6638 | return ConstantExpr::getSExt(CastOp, SS->getType()); |
6639 | break; |
6640 | } |
6641 | case scZeroExtend: { |
6642 | const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V); |
6643 | if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) |
6644 | return ConstantExpr::getZExt(CastOp, SZ->getType()); |
6645 | break; |
6646 | } |
6647 | case scTruncate: { |
6648 | const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V); |
6649 | if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) |
6650 | return ConstantExpr::getTrunc(CastOp, ST->getType()); |
6651 | break; |
6652 | } |
6653 | case scAddExpr: { |
6654 | const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); |
6655 | if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { |
6656 | if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { |
6657 | unsigned AS = PTy->getAddressSpace(); |
6658 | Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); |
6659 | C = ConstantExpr::getBitCast(C, DestPtrTy); |
6660 | } |
6661 | for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { |
6662 | Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); |
6663 | if (!C2) return nullptr; |
6664 | |
6665 | // First pointer! |
6666 | if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { |
6667 | unsigned AS = C2->getType()->getPointerAddressSpace(); |
6668 | std::swap(C, C2); |
6669 | Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); |
6670 | // The offsets have been converted to bytes. We can add bytes to an |
6671 | // i8* by GEP with the byte count in the first index. |
6672 | C = ConstantExpr::getBitCast(C, DestPtrTy); |
6673 | } |
6674 | |
6675 | // Don't bother trying to sum two pointers. We probably can't |
6676 | // statically compute a load that results from it anyway. |
6677 | if (C2->getType()->isPointerTy()) |
6678 | return nullptr; |
6679 | |
6680 | if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { |
6681 | if (PTy->getElementType()->isStructTy()) |
6682 | C2 = ConstantExpr::getIntegerCast( |
6683 | C2, Type::getInt32Ty(C->getContext()), true); |
6684 | C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); |
6685 | } else |
6686 | C = ConstantExpr::getAdd(C, C2); |
6687 | } |
6688 | return C; |
6689 | } |
6690 | break; |
6691 | } |
6692 | case scMulExpr: { |
6693 | const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); |
6694 | if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { |
6695 | // Don't bother with pointers at all. |
6696 | if (C->getType()->isPointerTy()) return nullptr; |
6697 | for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { |
6698 | Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); |
6699 | if (!C2 || C2->getType()->isPointerTy()) return nullptr; |
6700 | C = ConstantExpr::getMul(C, C2); |
6701 | } |
6702 | return C; |
6703 | } |
6704 | break; |
6705 | } |
6706 | case scUDivExpr: { |
6707 | const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); |
6708 | if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) |
6709 | if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) |
6710 | if (LHS->getType() == RHS->getType()) |
6711 | return ConstantExpr::getUDiv(LHS, RHS); |
6712 | break; |
6713 | } |
6714 | case scSMaxExpr: |
6715 | case scUMaxExpr: |
6716 | break; // TODO: smax, umax. |
6717 | } |
6718 | return nullptr; |
6719 | } |
6720 | |
6721 | const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { |
6722 | if (isa<SCEVConstant>(V)) return V; |
6723 | |
6724 | // If this instruction is evolved from a constant-evolving PHI, compute the |
6725 | // exit value from the loop without using SCEVs. |
6726 | if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { |
6727 | if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { |
6728 | const Loop *LI = this->LI[I->getParent()]; |
6729 | if (LI && LI->getParentLoop() == L) // Looking for loop exit value. |
6730 | if (PHINode *PN = dyn_cast<PHINode>(I)) |
6731 | if (PN->getParent() == LI->getHeader()) { |
6732 | // Okay, there is no closed form solution for the PHI node. Check |
6733 | // to see if the loop that contains it has a known backedge-taken |
6734 | // count. If so, we may be able to force computation of the exit |
6735 | // value. |
6736 | const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); |
6737 | if (const SCEVConstant *BTCC = |
6738 | dyn_cast<SCEVConstant>(BackedgeTakenCount)) { |
6739 | // Okay, we know how many times the containing loop executes. If |
6740 | // this is a constant evolving PHI node, get the final value at |
6741 | // the specified iteration number. |
6742 | Constant *RV = |
6743 | getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); |
6744 | if (RV) return getSCEV(RV); |
6745 | } |
6746 | } |
6747 | |
6748 | // Okay, this is an expression that we cannot symbolically evaluate |
6749 | // into a SCEV. Check to see if it's possible to symbolically evaluate |
6750 | // the arguments into constants, and if so, try to constant propagate the |
6751 | // result. This is particularly useful for computing loop exit values. |
6752 | if (CanConstantFold(I)) { |
6753 | SmallVector<Constant *, 4> Operands; |
6754 | bool MadeImprovement = false; |
6755 | for (Value *Op : I->operands()) { |
6756 | if (Constant *C = dyn_cast<Constant>(Op)) { |
6757 | Operands.push_back(C); |
6758 | continue; |
6759 | } |
6760 | |
6761 | // If any of the operands is non-constant and if they are |
6762 | // non-integer and non-pointer, don't even try to analyze them |
6763 | // with scev techniques. |
6764 | if (!isSCEVable(Op->getType())) |
6765 | return V; |
6766 | |
6767 | const SCEV *OrigV = getSCEV(Op); |
6768 | const SCEV *OpV = getSCEVAtScope(OrigV, L); |
6769 | MadeImprovement |= OrigV != OpV; |
6770 | |
6771 | Constant *C = BuildConstantFromSCEV(OpV); |
6772 | if (!C) return V; |
6773 | if (C->getType() != Op->getType()) |
6774 | C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, |
6775 | Op->getType(), |
6776 | false), |
6777 | C, Op->getType()); |
6778 | Operands.push_back(C); |
6779 | } |
6780 | |
6781 | // Check to see if getSCEVAtScope actually made an improvement. |
6782 | if (MadeImprovement) { |
6783 | Constant *C = nullptr; |
6784 | const DataLayout &DL = getDataLayout(); |
6785 | if (const CmpInst *CI = dyn_cast<CmpInst>(I)) |
6786 | C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], |
6787 | Operands[1], DL, &TLI); |
6788 | else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { |
6789 | if (!LI->isVolatile()) |
6790 | C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); |
6791 | } else |
6792 | C = ConstantFoldInstOperands(I, Operands, DL, &TLI); |
6793 | if (!C) return V; |
6794 | return getSCEV(C); |
6795 | } |
6796 | } |
6797 | } |
6798 | |
6799 | // This is some other type of SCEVUnknown, just return it. |
6800 | return V; |
6801 | } |
6802 | |
6803 | if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { |
6804 | // Avoid performing the look-up in the common case where the specified |
6805 | // expression has no loop-variant portions. |
6806 | for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { |
6807 | const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); |
6808 | if (OpAtScope != Comm->getOperand(i)) { |
6809 | // Okay, at least one of these operands is loop variant but might be |
6810 | // foldable. Build a new instance of the folded commutative expression. |
6811 | SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(), |
6812 | Comm->op_begin()+i); |
6813 | NewOps.push_back(OpAtScope); |
6814 | |
6815 | for (++i; i != e; ++i) { |
6816 | OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); |
6817 | NewOps.push_back(OpAtScope); |
6818 | } |
6819 | if (isa<SCEVAddExpr>(Comm)) |
6820 | return getAddExpr(NewOps); |
6821 | if (isa<SCEVMulExpr>(Comm)) |
6822 | return getMulExpr(NewOps); |
6823 | if (isa<SCEVSMaxExpr>(Comm)) |
6824 | return getSMaxExpr(NewOps); |
6825 | if (isa<SCEVUMaxExpr>(Comm)) |
6826 | return getUMaxExpr(NewOps); |
6827 | llvm_unreachable("Unknown commutative SCEV type!")::llvm::llvm_unreachable_internal("Unknown commutative SCEV type!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6827); |
6828 | } |
6829 | } |
6830 | // If we got here, all operands are loop invariant. |
6831 | return Comm; |
6832 | } |
6833 | |
6834 | if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { |
6835 | const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); |
6836 | const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); |
6837 | if (LHS == Div->getLHS() && RHS == Div->getRHS()) |
6838 | return Div; // must be loop invariant |
6839 | return getUDivExpr(LHS, RHS); |
6840 | } |
6841 | |
6842 | // If this is a loop recurrence for a loop that does not contain L, then we |
6843 | // are dealing with the final value computed by the loop. |
6844 | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { |
6845 | // First, attempt to evaluate each operand. |
6846 | // Avoid performing the look-up in the common case where the specified |
6847 | // expression has no loop-variant portions. |
6848 | for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { |
6849 | const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); |
6850 | if (OpAtScope == AddRec->getOperand(i)) |
6851 | continue; |
6852 | |
6853 | // Okay, at least one of these operands is loop variant but might be |
6854 | // foldable. Build a new instance of the folded commutative expression. |
6855 | SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(), |
6856 | AddRec->op_begin()+i); |
6857 | NewOps.push_back(OpAtScope); |
6858 | for (++i; i != e; ++i) |
6859 | NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); |
6860 | |
6861 | const SCEV *FoldedRec = |
6862 | getAddRecExpr(NewOps, AddRec->getLoop(), |
6863 | AddRec->getNoWrapFlags(SCEV::FlagNW)); |
6864 | AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec); |
6865 | // The addrec may be folded to a nonrecurrence, for example, if the |
6866 | // induction variable is multiplied by zero after constant folding. Go |
6867 | // ahead and return the folded value. |
6868 | if (!AddRec) |
6869 | return FoldedRec; |
6870 | break; |
6871 | } |
6872 | |
6873 | // If the scope is outside the addrec's loop, evaluate it by using the |
6874 | // loop exit value of the addrec. |
6875 | if (!AddRec->getLoop()->contains(L)) { |
6876 | // To evaluate this recurrence, we need to know how many times the AddRec |
6877 | // loop iterates. Compute this now. |
6878 | const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); |
6879 | if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; |
6880 | |
6881 | // Then, evaluate the AddRec. |
6882 | return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); |
6883 | } |
6884 | |
6885 | return AddRec; |
6886 | } |
6887 | |
6888 | if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { |
6889 | const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); |
6890 | if (Op == Cast->getOperand()) |
6891 | return Cast; // must be loop invariant |
6892 | return getZeroExtendExpr(Op, Cast->getType()); |
6893 | } |
6894 | |
6895 | if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { |
6896 | const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); |
6897 | if (Op == Cast->getOperand()) |
6898 | return Cast; // must be loop invariant |
6899 | return getSignExtendExpr(Op, Cast->getType()); |
6900 | } |
6901 | |
6902 | if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { |
6903 | const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); |
6904 | if (Op == Cast->getOperand()) |
6905 | return Cast; // must be loop invariant |
6906 | return getTruncateExpr(Op, Cast->getType()); |
6907 | } |
6908 | |
6909 | llvm_unreachable("Unknown SCEV type!")::llvm::llvm_unreachable_internal("Unknown SCEV type!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6909); |
6910 | } |
6911 | |
6912 | const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { |
6913 | return getSCEVAtScope(getSCEV(V), L); |
6914 | } |
6915 | |
6916 | /// Finds the minimum unsigned root of the following equation: |
6917 | /// |
6918 | /// A * X = B (mod N) |
6919 | /// |
6920 | /// where N = 2^BW and BW is the common bit width of A and B. The signedness of |
6921 | /// A and B isn't important. |
6922 | /// |
6923 | /// If the equation does not have a solution, SCEVCouldNotCompute is returned. |
6924 | static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, |
6925 | ScalarEvolution &SE) { |
6926 | uint32_t BW = A.getBitWidth(); |
6927 | assert(BW == B.getBitWidth() && "Bit widths must be the same.")((BW == B.getBitWidth() && "Bit widths must be the same." ) ? static_cast<void> (0) : __assert_fail ("BW == B.getBitWidth() && \"Bit widths must be the same.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6927, __PRETTY_FUNCTION__)); |
6928 | assert(A != 0 && "A must be non-zero.")((A != 0 && "A must be non-zero.") ? static_cast<void > (0) : __assert_fail ("A != 0 && \"A must be non-zero.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6928, __PRETTY_FUNCTION__)); |
6929 | |
6930 | // 1. D = gcd(A, N) |
6931 | // |
6932 | // The gcd of A and N may have only one prime factor: 2. The number of |
6933 | // trailing zeros in A is its multiplicity |
6934 | uint32_t Mult2 = A.countTrailingZeros(); |
6935 | // D = 2^Mult2 |
6936 | |
6937 | // 2. Check if B is divisible by D. |
6938 | // |
6939 | // B is divisible by D if and only if the multiplicity of prime factor 2 for B |
6940 | // is not less than multiplicity of this prime factor for D. |
6941 | if (B.countTrailingZeros() < Mult2) |
6942 | return SE.getCouldNotCompute(); |
6943 | |
6944 | // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic |
6945 | // modulo (N / D). |
6946 | // |
6947 | // (N / D) may need BW+1 bits in its representation. Hence, we'll use this |
6948 | // bit width during computations. |
6949 | APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D |
6950 | APInt Mod(BW + 1, 0); |
6951 | Mod.setBit(BW - Mult2); // Mod = N / D |
6952 | APInt I = AD.multiplicativeInverse(Mod); |
6953 | |
6954 | // 4. Compute the minimum unsigned root of the equation: |
6955 | // I * (B / D) mod (N / D) |
6956 | APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); |
6957 | |
6958 | // The result is guaranteed to be less than 2^BW so we may truncate it to BW |
6959 | // bits. |
6960 | return SE.getConstant(Result.trunc(BW)); |
6961 | } |
6962 | |
6963 | /// Find the roots of the quadratic equation for the given quadratic chrec |
6964 | /// {L,+,M,+,N}. This returns either the two roots (which might be the same) or |
6965 | /// two SCEVCouldNotCompute objects. |
6966 | /// |
6967 | static std::pair<const SCEV *,const SCEV *> |
6968 | SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { |
6969 | assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!")((AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!" ) ? static_cast<void> (0) : __assert_fail ("AddRec->getNumOperands() == 3 && \"This is not a quadratic chrec!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 6969, __PRETTY_FUNCTION__)); |
6970 | const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); |
6971 | const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1)); |
6972 | const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2)); |
6973 | |
6974 | // We currently can only solve this if the coefficients are constants. |
6975 | if (!LC || !MC || !NC) { |
6976 | const SCEV *CNC = SE.getCouldNotCompute(); |
6977 | return {CNC, CNC}; |
6978 | } |
6979 | |
6980 | uint32_t BitWidth = LC->getAPInt().getBitWidth(); |
6981 | const APInt &L = LC->getAPInt(); |
6982 | const APInt &M = MC->getAPInt(); |
6983 | const APInt &N = NC->getAPInt(); |
6984 | APInt Two(BitWidth, 2); |
6985 | APInt Four(BitWidth, 4); |
6986 | |
6987 | { |
6988 | using namespace APIntOps; |
6989 | const APInt& C = L; |
6990 | // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C |
6991 | // The B coefficient is M-N/2 |
6992 | APInt B(M); |
6993 | B -= sdiv(N,Two); |
6994 | |
6995 | // The A coefficient is N/2 |
6996 | APInt A(N.sdiv(Two)); |
6997 | |
6998 | // Compute the B^2-4ac term. |
6999 | APInt SqrtTerm(B); |
7000 | SqrtTerm *= B; |
7001 | SqrtTerm -= Four * (A * C); |
7002 | |
7003 | if (SqrtTerm.isNegative()) { |
7004 | // The loop is provably infinite. |
7005 | const SCEV *CNC = SE.getCouldNotCompute(); |
7006 | return {CNC, CNC}; |
7007 | } |
7008 | |
7009 | // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest |
7010 | // integer value or else APInt::sqrt() will assert. |
7011 | APInt SqrtVal(SqrtTerm.sqrt()); |
7012 | |
7013 | // Compute the two solutions for the quadratic formula. |
7014 | // The divisions must be performed as signed divisions. |
7015 | APInt NegB(-B); |
7016 | APInt TwoA(A << 1); |
7017 | if (TwoA.isMinValue()) { |
7018 | const SCEV *CNC = SE.getCouldNotCompute(); |
7019 | return {CNC, CNC}; |
7020 | } |
7021 | |
7022 | LLVMContext &Context = SE.getContext(); |
7023 | |
7024 | ConstantInt *Solution1 = |
7025 | ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); |
7026 | ConstantInt *Solution2 = |
7027 | ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); |
7028 | |
7029 | return {SE.getConstant(Solution1), SE.getConstant(Solution2)}; |
7030 | } // end APIntOps namespace |
7031 | } |
7032 | |
7033 | ScalarEvolution::ExitLimit |
7034 | ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, |
7035 | bool AllowPredicates) { |
7036 | |
7037 | // This is only used for loops with a "x != y" exit test. The exit condition |
7038 | // is now expressed as a single expression, V = x-y. So the exit test is |
7039 | // effectively V != 0. We know and take advantage of the fact that this |
7040 | // expression only being used in a comparison by zero context. |
7041 | |
7042 | SCEVUnionPredicate P; |
7043 | // If the value is a constant |
7044 | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { |
7045 | // If the value is already zero, the branch will execute zero times. |
7046 | if (C->getValue()->isZero()) return C; |
7047 | return getCouldNotCompute(); // Otherwise it will loop infinitely. |
7048 | } |
7049 | |
7050 | const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V); |
7051 | if (!AddRec && AllowPredicates) |
7052 | // Try to make this an AddRec using runtime tests, in the first X |
7053 | // iterations of this loop, where X is the SCEV expression found by the |
7054 | // algorithm below. |
7055 | AddRec = convertSCEVToAddRecWithPredicates(V, L, P); |
7056 | |
7057 | if (!AddRec || AddRec->getLoop() != L) |
7058 | return getCouldNotCompute(); |
7059 | |
7060 | // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of |
7061 | // the quadratic equation to solve it. |
7062 | if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { |
7063 | std::pair<const SCEV *,const SCEV *> Roots = |
7064 | SolveQuadraticEquation(AddRec, *this); |
7065 | const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); |
7066 | const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); |
7067 | if (R1 && R2) { |
7068 | // Pick the smallest positive root value. |
7069 | if (ConstantInt *CB = |
7070 | dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, |
7071 | R1->getValue(), |
7072 | R2->getValue()))) { |
7073 | if (!CB->getZExtValue()) |
7074 | std::swap(R1, R2); // R1 is the minimum root now. |
7075 | |
7076 | // We can only use this value if the chrec ends up with an exact zero |
7077 | // value at this index. When solving for "X*X != 5", for example, we |
7078 | // should not accept a root of 2. |
7079 | const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); |
7080 | if (Val->isZero()) |
7081 | return ExitLimit(R1, R1, P); // We found a quadratic root! |
7082 | } |
7083 | } |
7084 | return getCouldNotCompute(); |
7085 | } |
7086 | |
7087 | // Otherwise we can only handle this if it is affine. |
7088 | if (!AddRec->isAffine()) |
7089 | return getCouldNotCompute(); |
7090 | |
7091 | // If this is an affine expression, the execution count of this branch is |
7092 | // the minimum unsigned root of the following equation: |
7093 | // |
7094 | // Start + Step*N = 0 (mod 2^BW) |
7095 | // |
7096 | // equivalent to: |
7097 | // |
7098 | // Step*N = -Start (mod 2^BW) |
7099 | // |
7100 | // where BW is the common bit width of Start and Step. |
7101 | |
7102 | // Get the initial value for the loop. |
7103 | const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); |
7104 | const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); |
7105 | |
7106 | // For now we handle only constant steps. |
7107 | // |
7108 | // TODO: Handle a nonconstant Step given AddRec<NUW>. If the |
7109 | // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap |
7110 | // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. |
7111 | // We have not yet seen any such cases. |
7112 | const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); |
7113 | if (!StepC || StepC->getValue()->equalsInt(0)) |
7114 | return getCouldNotCompute(); |
7115 | |
7116 | // For positive steps (counting up until unsigned overflow): |
7117 | // N = -Start/Step (as unsigned) |
7118 | // For negative steps (counting down to zero): |
7119 | // N = Start/-Step |
7120 | // First compute the unsigned distance from zero in the direction of Step. |
7121 | bool CountDown = StepC->getAPInt().isNegative(); |
7122 | const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); |
7123 | |
7124 | // Handle unitary steps, which cannot wraparound. |
7125 | // 1*N = -Start; -1*N = Start (mod 2^BW), so: |
7126 | // N = Distance (as unsigned) |
7127 | if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { |
7128 | ConstantRange CR = getUnsignedRange(Start); |
7129 | const SCEV *MaxBECount; |
7130 | if (!CountDown && CR.getUnsignedMin().isMinValue()) |
7131 | // When counting up, the worst starting value is 1, not 0. |
7132 | MaxBECount = CR.getUnsignedMax().isMinValue() |
7133 | ? getConstant(APInt::getMinValue(CR.getBitWidth())) |
7134 | : getConstant(APInt::getMaxValue(CR.getBitWidth())); |
7135 | else |
7136 | MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() |
7137 | : -CR.getUnsignedMin()); |
7138 | return ExitLimit(Distance, MaxBECount, P); |
7139 | } |
7140 | |
7141 | // As a special case, handle the instance where Step is a positive power of |
7142 | // two. In this case, determining whether Step divides Distance evenly can be |
7143 | // done by counting and comparing the number of trailing zeros of Step and |
7144 | // Distance. |
7145 | if (!CountDown) { |
7146 | const APInt &StepV = StepC->getAPInt(); |
7147 | // StepV.isPowerOf2() returns true if StepV is an positive power of two. It |
7148 | // also returns true if StepV is maximally negative (eg, INT_MIN), but that |
7149 | // case is not handled as this code is guarded by !CountDown. |
7150 | if (StepV.isPowerOf2() && |
7151 | GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) { |
7152 | // Here we've constrained the equation to be of the form |
7153 | // |
7154 | // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0) |
7155 | // |
7156 | // where we're operating on a W bit wide integer domain and k is |
7157 | // non-negative. The smallest unsigned solution for X is the trip count. |
7158 | // |
7159 | // (0) is equivalent to: |
7160 | // |
7161 | // 2^(N + k) * Distance' - 2^N * X = L * 2^W |
7162 | // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N |
7163 | // <=> 2^k * Distance' - X = L * 2^(W - N) |
7164 | // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1) |
7165 | // |
7166 | // The smallest X satisfying (1) is unsigned remainder of dividing the LHS |
7167 | // by 2^(W - N). |
7168 | // |
7169 | // <=> X = 2^k * Distance' URem 2^(W - N) ... (2) |
7170 | // |
7171 | // E.g. say we're solving |
7172 | // |
7173 | // 2 * Val = 2 * X (in i8) ... (3) |
7174 | // |
7175 | // then from (2), we get X = Val URem i8 128 (k = 0 in this case). |
7176 | // |
7177 | // Note: It is tempting to solve (3) by setting X = Val, but Val is not |
7178 | // necessarily the smallest unsigned value of X that satisfies (3). |
7179 | // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) |
7180 | // is i8 1, not i8 -127 |
7181 | |
7182 | const auto *ModuloResult = getUDivExactExpr(Distance, Step); |
7183 | |
7184 | // Since SCEV does not have a URem node, we construct one using a truncate |
7185 | // and a zero extend. |
7186 | |
7187 | unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); |
7188 | auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); |
7189 | auto *WideTy = Distance->getType(); |
7190 | |
7191 | const SCEV *Limit = |
7192 | getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); |
7193 | return ExitLimit(Limit, Limit, P); |
7194 | } |
7195 | } |
7196 | |
7197 | // If the condition controls loop exit (the loop exits only if the expression |
7198 | // is true) and the addition is no-wrap we can use unsigned divide to |
7199 | // compute the backedge count. In this case, the step may not divide the |
7200 | // distance, but we don't care because if the condition is "missed" the loop |
7201 | // will have undefined behavior due to wrapping. |
7202 | if (ControlsExit && AddRec->hasNoSelfWrap()) { |
7203 | const SCEV *Exact = |
7204 | getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); |
7205 | return ExitLimit(Exact, Exact, P); |
7206 | } |
7207 | |
7208 | // Then, try to solve the above equation provided that Start is constant. |
7209 | if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) { |
7210 | const SCEV *E = SolveLinEquationWithOverflow( |
7211 | StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); |
7212 | return ExitLimit(E, E, P); |
7213 | } |
7214 | return getCouldNotCompute(); |
7215 | } |
7216 | |
7217 | ScalarEvolution::ExitLimit |
7218 | ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { |
7219 | // Loops that look like: while (X == 0) are very strange indeed. We don't |
7220 | // handle them yet except for the trivial case. This could be expanded in the |
7221 | // future as needed. |
7222 | |
7223 | // If the value is a constant, check to see if it is known to be non-zero |
7224 | // already. If so, the backedge will execute zero times. |
7225 | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { |
7226 | if (!C->getValue()->isNullValue()) |
7227 | return getZero(C->getType()); |
7228 | return getCouldNotCompute(); // Otherwise it will loop infinitely. |
7229 | } |
7230 | |
7231 | // We could implement others, but I really doubt anyone writes loops like |
7232 | // this, and if they did, they would already be constant folded. |
7233 | return getCouldNotCompute(); |
7234 | } |
7235 | |
7236 | std::pair<BasicBlock *, BasicBlock *> |
7237 | ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { |
7238 | // If the block has a unique predecessor, then there is no path from the |
7239 | // predecessor to the block that does not go through the direct edge |
7240 | // from the predecessor to the block. |
7241 | if (BasicBlock *Pred = BB->getSinglePredecessor()) |
7242 | return {Pred, BB}; |
7243 | |
7244 | // A loop's header is defined to be a block that dominates the loop. |
7245 | // If the header has a unique predecessor outside the loop, it must be |
7246 | // a block that has exactly one successor that can reach the loop. |
7247 | if (Loop *L = LI.getLoopFor(BB)) |
7248 | return {L->getLoopPredecessor(), L->getHeader()}; |
7249 | |
7250 | return {nullptr, nullptr}; |
7251 | } |
7252 | |
7253 | /// SCEV structural equivalence is usually sufficient for testing whether two |
7254 | /// expressions are equal, however for the purposes of looking for a condition |
7255 | /// guarding a loop, it can be useful to be a little more general, since a |
7256 | /// front-end may have replicated the controlling expression. |
7257 | /// |
7258 | static bool HasSameValue(const SCEV *A, const SCEV *B) { |
7259 | // Quick check to see if they are the same SCEV. |
7260 | if (A == B) return true; |
7261 | |
7262 | auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { |
7263 | // Not all instructions that are "identical" compute the same value. For |
7264 | // instance, two distinct alloca instructions allocating the same type are |
7265 | // identical and do not read memory; but compute distinct values. |
7266 | return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A)); |
7267 | }; |
7268 | |
7269 | // Otherwise, if they're both SCEVUnknown, it's possible that they hold |
7270 | // two different instructions with the same value. Check for this case. |
7271 | if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) |
7272 | if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) |
7273 | if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) |
7274 | if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) |
7275 | if (ComputesEqualValues(AI, BI)) |
7276 | return true; |
7277 | |
7278 | // Otherwise assume they may have a different value. |
7279 | return false; |
7280 | } |
7281 | |
7282 | bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, |
7283 | const SCEV *&LHS, const SCEV *&RHS, |
7284 | unsigned Depth) { |
7285 | bool Changed = false; |
7286 | |
7287 | // If we hit the max recursion limit bail out. |
7288 | if (Depth >= 3) |
7289 | return false; |
7290 | |
7291 | // Canonicalize a constant to the right side. |
7292 | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { |
7293 | // Check for both operands constant. |
7294 | if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { |
7295 | if (ConstantExpr::getICmp(Pred, |
7296 | LHSC->getValue(), |
7297 | RHSC->getValue())->isNullValue()) |
7298 | goto trivially_false; |
7299 | else |
7300 | goto trivially_true; |
7301 | } |
7302 | // Otherwise swap the operands to put the constant on the right. |
7303 | std::swap(LHS, RHS); |
7304 | Pred = ICmpInst::getSwappedPredicate(Pred); |
7305 | Changed = true; |
7306 | } |
7307 | |
7308 | // If we're comparing an addrec with a value which is loop-invariant in the |
7309 | // addrec's loop, put the addrec on the left. Also make a dominance check, |
7310 | // as both operands could be addrecs loop-invariant in each other's loop. |
7311 | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { |
7312 | const Loop *L = AR->getLoop(); |
7313 | if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) { |
7314 | std::swap(LHS, RHS); |
7315 | Pred = ICmpInst::getSwappedPredicate(Pred); |
7316 | Changed = true; |
7317 | } |
7318 | } |
7319 | |
7320 | // If there's a constant operand, canonicalize comparisons with boundary |
7321 | // cases, and canonicalize *-or-equal comparisons to regular comparisons. |
7322 | if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { |
7323 | const APInt &RA = RC->getAPInt(); |
7324 | switch (Pred) { |
7325 | default: llvm_unreachable("Unexpected ICmpInst::Predicate value!")::llvm::llvm_unreachable_internal("Unexpected ICmpInst::Predicate value!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 7325); |
7326 | case ICmpInst::ICMP_EQ: |
7327 | case ICmpInst::ICMP_NE: |
7328 | // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. |
7329 | if (!RA) |
7330 | if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS)) |
7331 | if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0))) |
7332 | if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && |
7333 | ME->getOperand(0)->isAllOnesValue()) { |
7334 | RHS = AE->getOperand(1); |
7335 | LHS = ME->getOperand(1); |
7336 | Changed = true; |
7337 | } |
7338 | break; |
7339 | case ICmpInst::ICMP_UGE: |
7340 | if ((RA - 1).isMinValue()) { |
7341 | Pred = ICmpInst::ICMP_NE; |
7342 | RHS = getConstant(RA - 1); |
7343 | Changed = true; |
7344 | break; |
7345 | } |
7346 | if (RA.isMaxValue()) { |
7347 | Pred = ICmpInst::ICMP_EQ; |
7348 | Changed = true; |
7349 | break; |
7350 | } |
7351 | if (RA.isMinValue()) goto trivially_true; |
7352 | |
7353 | Pred = ICmpInst::ICMP_UGT; |
7354 | RHS = getConstant(RA - 1); |
7355 | Changed = true; |
7356 | break; |
7357 | case ICmpInst::ICMP_ULE: |
7358 | if ((RA + 1).isMaxValue()) { |
7359 | Pred = ICmpInst::ICMP_NE; |
7360 | RHS = getConstant(RA + 1); |
7361 | Changed = true; |
7362 | break; |
7363 | } |
7364 | if (RA.isMinValue()) { |
7365 | Pred = ICmpInst::ICMP_EQ; |
7366 | Changed = true; |
7367 | break; |
7368 | } |
7369 | if (RA.isMaxValue()) goto trivially_true; |
7370 | |
7371 | Pred = ICmpInst::ICMP_ULT; |
7372 | RHS = getConstant(RA + 1); |
7373 | Changed = true; |
7374 | break; |
7375 | case ICmpInst::ICMP_SGE: |
7376 | if ((RA - 1).isMinSignedValue()) { |
7377 | Pred = ICmpInst::ICMP_NE; |
7378 | RHS = getConstant(RA - 1); |
7379 | Changed = true; |
7380 | break; |
7381 | } |
7382 | if (RA.isMaxSignedValue()) { |
7383 | Pred = ICmpInst::ICMP_EQ; |
7384 | Changed = true; |
7385 | break; |
7386 | } |
7387 | if (RA.isMinSignedValue()) goto trivially_true; |
7388 | |
7389 | Pred = ICmpInst::ICMP_SGT; |
7390 | RHS = getConstant(RA - 1); |
7391 | Changed = true; |
7392 | break; |
7393 | case ICmpInst::ICMP_SLE: |
7394 | if ((RA + 1).isMaxSignedValue()) { |
7395 | Pred = ICmpInst::ICMP_NE; |
7396 | RHS = getConstant(RA + 1); |
7397 | Changed = true; |
7398 | break; |
7399 | } |
7400 | if (RA.isMinSignedValue()) { |
7401 | Pred = ICmpInst::ICMP_EQ; |
7402 | Changed = true; |
7403 | break; |
7404 | } |
7405 | if (RA.isMaxSignedValue()) goto trivially_true; |
7406 | |
7407 | Pred = ICmpInst::ICMP_SLT; |
7408 | RHS = getConstant(RA + 1); |
7409 | Changed = true; |
7410 | break; |
7411 | case ICmpInst::ICMP_UGT: |
7412 | if (RA.isMinValue()) { |
7413 | Pred = ICmpInst::ICMP_NE; |
7414 | Changed = true; |
7415 | break; |
7416 | } |
7417 | if ((RA + 1).isMaxValue()) { |
7418 | Pred = ICmpInst::ICMP_EQ; |
7419 | RHS = getConstant(RA + 1); |
7420 | Changed = true; |
7421 | break; |
7422 | } |
7423 | if (RA.isMaxValue()) goto trivially_false; |
7424 | break; |
7425 | case ICmpInst::ICMP_ULT: |
7426 | if (RA.isMaxValue()) { |
7427 | Pred = ICmpInst::ICMP_NE; |
7428 | Changed = true; |
7429 | break; |
7430 | } |
7431 | if ((RA - 1).isMinValue()) { |
7432 | Pred = ICmpInst::ICMP_EQ; |
7433 | RHS = getConstant(RA - 1); |
7434 | Changed = true; |
7435 | break; |
7436 | } |
7437 | if (RA.isMinValue()) goto trivially_false; |
7438 | break; |
7439 | case ICmpInst::ICMP_SGT: |
7440 | if (RA.isMinSignedValue()) { |
7441 | Pred = ICmpInst::ICMP_NE; |
7442 | Changed = true; |
7443 | break; |
7444 | } |
7445 | if ((RA + 1).isMaxSignedValue()) { |
7446 | Pred = ICmpInst::ICMP_EQ; |
7447 | RHS = getConstant(RA + 1); |
7448 | Changed = true; |
7449 | break; |
7450 | } |
7451 | if (RA.isMaxSignedValue()) goto trivially_false; |
7452 | break; |
7453 | case ICmpInst::ICMP_SLT: |
7454 | if (RA.isMaxSignedValue()) { |
7455 | Pred = ICmpInst::ICMP_NE; |
7456 | Changed = true; |
7457 | break; |
7458 | } |
7459 | if ((RA - 1).isMinSignedValue()) { |
7460 | Pred = ICmpInst::ICMP_EQ; |
7461 | RHS = getConstant(RA - 1); |
7462 | Changed = true; |
7463 | break; |
7464 | } |
7465 | if (RA.isMinSignedValue()) goto trivially_false; |
7466 | break; |
7467 | } |
7468 | } |
7469 | |
7470 | // Check for obvious equality. |
7471 | if (HasSameValue(LHS, RHS)) { |
7472 | if (ICmpInst::isTrueWhenEqual(Pred)) |
7473 | goto trivially_true; |
7474 | if (ICmpInst::isFalseWhenEqual(Pred)) |
7475 | goto trivially_false; |
7476 | } |
7477 | |
7478 | // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by |
7479 | // adding or subtracting 1 from one of the operands. |
7480 | switch (Pred) { |
7481 | case ICmpInst::ICMP_SLE: |
7482 | if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { |
7483 | RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, |
7484 | SCEV::FlagNSW); |
7485 | Pred = ICmpInst::ICMP_SLT; |
7486 | Changed = true; |
7487 | } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { |
7488 | LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, |
7489 | SCEV::FlagNSW); |
7490 | Pred = ICmpInst::ICMP_SLT; |
7491 | Changed = true; |
7492 | } |
7493 | break; |
7494 | case ICmpInst::ICMP_SGE: |
7495 | if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { |
7496 | RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, |
7497 | SCEV::FlagNSW); |
7498 | Pred = ICmpInst::ICMP_SGT; |
7499 | Changed = true; |
7500 | } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { |
7501 | LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, |
7502 | SCEV::FlagNSW); |
7503 | Pred = ICmpInst::ICMP_SGT; |
7504 | Changed = true; |
7505 | } |
7506 | break; |
7507 | case ICmpInst::ICMP_ULE: |
7508 | if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { |
7509 | RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, |
7510 | SCEV::FlagNUW); |
7511 | Pred = ICmpInst::ICMP_ULT; |
7512 | Changed = true; |
7513 | } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { |
7514 | LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); |
7515 | Pred = ICmpInst::ICMP_ULT; |
7516 | Changed = true; |
7517 | } |
7518 | break; |
7519 | case ICmpInst::ICMP_UGE: |
7520 | if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { |
7521 | RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); |
7522 | Pred = ICmpInst::ICMP_UGT; |
7523 | Changed = true; |
7524 | } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { |
7525 | LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, |
7526 | SCEV::FlagNUW); |
7527 | Pred = ICmpInst::ICMP_UGT; |
7528 | Changed = true; |
7529 | } |
7530 | break; |
7531 | default: |
7532 | break; |
7533 | } |
7534 | |
7535 | // TODO: More simplifications are possible here. |
7536 | |
7537 | // Recursively simplify until we either hit a recursion limit or nothing |
7538 | // changes. |
7539 | if (Changed) |
7540 | return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); |
7541 | |
7542 | return Changed; |
7543 | |
7544 | trivially_true: |
7545 | // Return 0 == 0. |
7546 | LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); |
7547 | Pred = ICmpInst::ICMP_EQ; |
7548 | return true; |
7549 | |
7550 | trivially_false: |
7551 | // Return 0 != 0. |
7552 | LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); |
7553 | Pred = ICmpInst::ICMP_NE; |
7554 | return true; |
7555 | } |
7556 | |
7557 | bool ScalarEvolution::isKnownNegative(const SCEV *S) { |
7558 | return getSignedRange(S).getSignedMax().isNegative(); |
7559 | } |
7560 | |
7561 | bool ScalarEvolution::isKnownPositive(const SCEV *S) { |
7562 | return getSignedRange(S).getSignedMin().isStrictlyPositive(); |
7563 | } |
7564 | |
7565 | bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { |
7566 | return !getSignedRange(S).getSignedMin().isNegative(); |
7567 | } |
7568 | |
7569 | bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { |
7570 | return !getSignedRange(S).getSignedMax().isStrictlyPositive(); |
7571 | } |
7572 | |
7573 | bool ScalarEvolution::isKnownNonZero(const SCEV *S) { |
7574 | return isKnownNegative(S) || isKnownPositive(S); |
7575 | } |
7576 | |
7577 | bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, |
7578 | const SCEV *LHS, const SCEV *RHS) { |
7579 | // Canonicalize the inputs first. |
7580 | (void)SimplifyICmpOperands(Pred, LHS, RHS); |
7581 | |
7582 | // If LHS or RHS is an addrec, check to see if the condition is true in |
7583 | // every iteration of the loop. |
7584 | // If LHS and RHS are both addrec, both conditions must be true in |
7585 | // every iteration of the loop. |
7586 | const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); |
7587 | const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); |
7588 | bool LeftGuarded = false; |
7589 | bool RightGuarded = false; |
7590 | if (LAR) { |
7591 | const Loop *L = LAR->getLoop(); |
7592 | if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) && |
7593 | isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) { |
7594 | if (!RAR) return true; |
7595 | LeftGuarded = true; |
7596 | } |
7597 | } |
7598 | if (RAR) { |
7599 | const Loop *L = RAR->getLoop(); |
7600 | if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) && |
7601 | isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) { |
7602 | if (!LAR) return true; |
7603 | RightGuarded = true; |
7604 | } |
7605 | } |
7606 | if (LeftGuarded && RightGuarded) |
7607 | return true; |
7608 | |
7609 | if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) |
7610 | return true; |
7611 | |
7612 | // Otherwise see what can be done with known constant ranges. |
7613 | return isKnownPredicateViaConstantRanges(Pred, LHS, RHS); |
7614 | } |
7615 | |
7616 | bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, |
7617 | ICmpInst::Predicate Pred, |
7618 | bool &Increasing) { |
7619 | bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); |
7620 | |
7621 | #ifndef NDEBUG |
7622 | // Verify an invariant: inverting the predicate should turn a monotonically |
7623 | // increasing change to a monotonically decreasing one, and vice versa. |
7624 | bool IncreasingSwapped; |
7625 | bool ResultSwapped = isMonotonicPredicateImpl( |
7626 | LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); |
7627 | |
7628 | assert(Result == ResultSwapped && "should be able to analyze both!")((Result == ResultSwapped && "should be able to analyze both!" ) ? static_cast<void> (0) : __assert_fail ("Result == ResultSwapped && \"should be able to analyze both!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 7628, __PRETTY_FUNCTION__)); |
7629 | if (ResultSwapped) |
7630 | assert(Increasing == !IncreasingSwapped &&((Increasing == !IncreasingSwapped && "monotonicity should flip as we flip the predicate" ) ? static_cast<void> (0) : __assert_fail ("Increasing == !IncreasingSwapped && \"monotonicity should flip as we flip the predicate\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 7631, __PRETTY_FUNCTION__)) |
7631 | "monotonicity should flip as we flip the predicate")((Increasing == !IncreasingSwapped && "monotonicity should flip as we flip the predicate" ) ? static_cast<void> (0) : __assert_fail ("Increasing == !IncreasingSwapped && \"monotonicity should flip as we flip the predicate\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 7631, __PRETTY_FUNCTION__)); |
7632 | #endif |
7633 | |
7634 | return Result; |
7635 | } |
7636 | |
7637 | bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, |
7638 | ICmpInst::Predicate Pred, |
7639 | bool &Increasing) { |
7640 | |
7641 | // A zero step value for LHS means the induction variable is essentially a |
7642 | // loop invariant value. We don't really depend on the predicate actually |
7643 | // flipping from false to true (for increasing predicates, and the other way |
7644 | // around for decreasing predicates), all we care about is that *if* the |
7645 | // predicate changes then it only changes from false to true. |
7646 | // |
7647 | // A zero step value in itself is not very useful, but there may be places |
7648 | // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be |
7649 | // as general as possible. |
7650 | |
7651 | switch (Pred) { |
7652 | default: |
7653 | return false; // Conservative answer |
7654 | |
7655 | case ICmpInst::ICMP_UGT: |
7656 | case ICmpInst::ICMP_UGE: |
7657 | case ICmpInst::ICMP_ULT: |
7658 | case ICmpInst::ICMP_ULE: |
7659 | if (!LHS->hasNoUnsignedWrap()) |
7660 | return false; |
7661 | |
7662 | Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE; |
7663 | return true; |
7664 | |
7665 | case ICmpInst::ICMP_SGT: |
7666 | case ICmpInst::ICMP_SGE: |
7667 | case ICmpInst::ICMP_SLT: |
7668 | case ICmpInst::ICMP_SLE: { |
7669 | if (!LHS->hasNoSignedWrap()) |
7670 | return false; |
7671 | |
7672 | const SCEV *Step = LHS->getStepRecurrence(*this); |
7673 | |
7674 | if (isKnownNonNegative(Step)) { |
7675 | Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE; |
7676 | return true; |
7677 | } |
7678 | |
7679 | if (isKnownNonPositive(Step)) { |
7680 | Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE; |
7681 | return true; |
7682 | } |
7683 | |
7684 | return false; |
7685 | } |
7686 | |
7687 | } |
7688 | |
7689 | llvm_unreachable("switch has default clause!")::llvm::llvm_unreachable_internal("switch has default clause!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 7689); |
7690 | } |
7691 | |
7692 | bool ScalarEvolution::isLoopInvariantPredicate( |
7693 | ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, |
7694 | ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, |
7695 | const SCEV *&InvariantRHS) { |
7696 | |
7697 | // If there is a loop-invariant, force it into the RHS, otherwise bail out. |
7698 | if (!isLoopInvariant(RHS, L)) { |
7699 | if (!isLoopInvariant(LHS, L)) |
7700 | return false; |
7701 | |
7702 | std::swap(LHS, RHS); |
7703 | Pred = ICmpInst::getSwappedPredicate(Pred); |
7704 | } |
7705 | |
7706 | const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS); |
7707 | if (!ArLHS || ArLHS->getLoop() != L) |
7708 | return false; |
7709 | |
7710 | bool Increasing; |
7711 | if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) |
7712 | return false; |
7713 | |
7714 | // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to |
7715 | // true as the loop iterates, and the backedge is control dependent on |
7716 | // "ArLHS `Pred` RHS" == true then we can reason as follows: |
7717 | // |
7718 | // * if the predicate was false in the first iteration then the predicate |
7719 | // is never evaluated again, since the loop exits without taking the |
7720 | // backedge. |
7721 | // * if the predicate was true in the first iteration then it will |
7722 | // continue to be true for all future iterations since it is |
7723 | // monotonically increasing. |
7724 | // |
7725 | // For both the above possibilities, we can replace the loop varying |
7726 | // predicate with its value on the first iteration of the loop (which is |
7727 | // loop invariant). |
7728 | // |
7729 | // A similar reasoning applies for a monotonically decreasing predicate, by |
7730 | // replacing true with false and false with true in the above two bullets. |
7731 | |
7732 | auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); |
7733 | |
7734 | if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) |
7735 | return false; |
7736 | |
7737 | InvariantPred = Pred; |
7738 | InvariantLHS = ArLHS->getStart(); |
7739 | InvariantRHS = RHS; |
7740 | return true; |
7741 | } |
7742 | |
7743 | bool ScalarEvolution::isKnownPredicateViaConstantRanges( |
7744 | ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { |
7745 | if (HasSameValue(LHS, RHS)) |
7746 | return ICmpInst::isTrueWhenEqual(Pred); |
7747 | |
7748 | // This code is split out from isKnownPredicate because it is called from |
7749 | // within isLoopEntryGuardedByCond. |
7750 | |
7751 | auto CheckRanges = |
7752 | [&](const ConstantRange &RangeLHS, const ConstantRange &RangeRHS) { |
7753 | return ConstantRange::makeSatisfyingICmpRegion(Pred, RangeRHS) |
7754 | .contains(RangeLHS); |
7755 | }; |
7756 | |
7757 | // The check at the top of the function catches the case where the values are |
7758 | // known to be equal. |
7759 | if (Pred == CmpInst::ICMP_EQ) |
7760 | return false; |
7761 | |
7762 | if (Pred == CmpInst::ICMP_NE) |
7763 | return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) || |
7764 | CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)) || |
7765 | isKnownNonZero(getMinusSCEV(LHS, RHS)); |
7766 | |
7767 | if (CmpInst::isSigned(Pred)) |
7768 | return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)); |
7769 | |
7770 | return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)); |
7771 | } |
7772 | |
7773 | bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, |
7774 | const SCEV *LHS, |
7775 | const SCEV *RHS) { |
7776 | |
7777 | // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer. |
7778 | // Return Y via OutY. |
7779 | auto MatchBinaryAddToConst = |
7780 | [this](const SCEV *Result, const SCEV *X, APInt &OutY, |
7781 | SCEV::NoWrapFlags ExpectedFlags) { |
7782 | const SCEV *NonConstOp, *ConstOp; |
7783 | SCEV::NoWrapFlags FlagsPresent; |
7784 | |
7785 | if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) || |
7786 | !isa<SCEVConstant>(ConstOp) || NonConstOp != X) |
7787 | return false; |
7788 | |
7789 | OutY = cast<SCEVConstant>(ConstOp)->getAPInt(); |
7790 | return (FlagsPresent & ExpectedFlags) == ExpectedFlags; |
7791 | }; |
7792 | |
7793 | APInt C; |
7794 | |
7795 | switch (Pred) { |
7796 | default: |
7797 | break; |
7798 | |
7799 | case ICmpInst::ICMP_SGE: |
7800 | std::swap(LHS, RHS); |
7801 | case ICmpInst::ICMP_SLE: |
7802 | // X s<= (X + C)<nsw> if C >= 0 |
7803 | if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) |
7804 | return true; |
7805 | |
7806 | // (X + C)<nsw> s<= X if C <= 0 |
7807 | if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && |
7808 | !C.isStrictlyPositive()) |
7809 | return true; |
7810 | break; |
7811 | |
7812 | case ICmpInst::ICMP_SGT: |
7813 | std::swap(LHS, RHS); |
7814 | case ICmpInst::ICMP_SLT: |
7815 | // X s< (X + C)<nsw> if C > 0 |
7816 | if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && |
7817 | C.isStrictlyPositive()) |
7818 | return true; |
7819 | |
7820 | // (X + C)<nsw> s< X if C < 0 |
7821 | if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) |
7822 | return true; |
7823 | break; |
7824 | } |
7825 | |
7826 | return false; |
7827 | } |
7828 | |
7829 | bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, |
7830 | const SCEV *LHS, |
7831 | const SCEV *RHS) { |
7832 | if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate) |
7833 | return false; |
7834 | |
7835 | // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on |
7836 | // the stack can result in exponential time complexity. |
7837 | SaveAndRestore<bool> Restore(ProvingSplitPredicate, true); |
7838 | |
7839 | // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L |
7840 | // |
7841 | // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use |
7842 | // isKnownPredicate. isKnownPredicate is more powerful, but also more |
7843 | // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the |
7844 | // interesting cases seen in practice. We can consider "upgrading" L >= 0 to |
7845 | // use isKnownPredicate later if needed. |
7846 | return isKnownNonNegative(RHS) && |
7847 | isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) && |
7848 | isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); |
7849 | } |
7850 | |
7851 | bool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB, |
7852 | ICmpInst::Predicate Pred, |
7853 | const SCEV *LHS, const SCEV *RHS) { |
7854 | // No need to even try if we know the module has no guards. |
7855 | if (!HasGuards) |
7856 | return false; |
7857 | |
7858 | return any_of(*BB, [&](Instruction &I) { |
7859 | using namespace llvm::PatternMatch; |
7860 | |
7861 | Value *Condition; |
7862 | return match(&I, m_Intrinsic<Intrinsic::experimental_guard>( |
7863 | m_Value(Condition))) && |
7864 | isImpliedCond(Pred, LHS, RHS, Condition, false); |
7865 | }); |
7866 | } |
7867 | |
7868 | /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is |
7869 | /// protected by a conditional between LHS and RHS. This is used to |
7870 | /// to eliminate casts. |
7871 | bool |
7872 | ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, |
7873 | ICmpInst::Predicate Pred, |
7874 | const SCEV *LHS, const SCEV *RHS) { |
7875 | // Interpret a null as meaning no loop, where there is obviously no guard |
7876 | // (interprocedural conditions notwithstanding). |
7877 | if (!L) return true; |
7878 | |
7879 | if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS)) |
7880 | return true; |
7881 | |
7882 | BasicBlock *Latch = L->getLoopLatch(); |
7883 | if (!Latch) |
7884 | return false; |
7885 | |
7886 | BranchInst *LoopContinuePredicate = |
7887 | dyn_cast<BranchInst>(Latch->getTerminator()); |
7888 | if (LoopContinuePredicate && LoopContinuePredicate->isConditional() && |
7889 | isImpliedCond(Pred, LHS, RHS, |
7890 | LoopContinuePredicate->getCondition(), |
7891 | LoopContinuePredicate->getSuccessor(0) != L->getHeader())) |
7892 | return true; |
7893 | |
7894 | // We don't want more than one activation of the following loops on the stack |
7895 | // -- that can lead to O(n!) time complexity. |
7896 | if (WalkingBEDominatingConds) |
7897 | return false; |
7898 | |
7899 | SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true); |
7900 | |
7901 | // See if we can exploit a trip count to prove the predicate. |
7902 | const auto &BETakenInfo = getBackedgeTakenInfo(L); |
7903 | const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); |
7904 | if (LatchBECount != getCouldNotCompute()) { |
7905 | // We know that Latch branches back to the loop header exactly |
7906 | // LatchBECount times. This means the backdege condition at Latch is |
7907 | // equivalent to "{0,+,1} u< LatchBECount". |
7908 | Type *Ty = LatchBECount->getType(); |
7909 | auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); |
7910 | const SCEV *LoopCounter = |
7911 | getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); |
7912 | if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, |
7913 | LatchBECount)) |
7914 | return true; |
7915 | } |
7916 | |
7917 | // Check conditions due to any @llvm.assume intrinsics. |
7918 | for (auto &AssumeVH : AC.assumptions()) { |
7919 | if (!AssumeVH) |
7920 | continue; |
7921 | auto *CI = cast<CallInst>(AssumeVH); |
7922 | if (!DT.dominates(CI, Latch->getTerminator())) |
7923 | continue; |
7924 | |
7925 | if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) |
7926 | return true; |
7927 | } |
7928 | |
7929 | // If the loop is not reachable from the entry block, we risk running into an |
7930 | // infinite loop as we walk up into the dom tree. These loops do not matter |
7931 | // anyway, so we just return a conservative answer when we see them. |
7932 | if (!DT.isReachableFromEntry(L->getHeader())) |
7933 | return false; |
7934 | |
7935 | if (isImpliedViaGuard(Latch, Pred, LHS, RHS)) |
7936 | return true; |
7937 | |
7938 | for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; |
7939 | DTN != HeaderDTN; DTN = DTN->getIDom()) { |
7940 | |
7941 | assert(DTN && "should reach the loop header before reaching the root!")((DTN && "should reach the loop header before reaching the root!" ) ? static_cast<void> (0) : __assert_fail ("DTN && \"should reach the loop header before reaching the root!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 7941, __PRETTY_FUNCTION__)); |
7942 | |
7943 | BasicBlock *BB = DTN->getBlock(); |
7944 | if (isImpliedViaGuard(BB, Pred, LHS, RHS)) |
7945 | return true; |
7946 | |
7947 | BasicBlock *PBB = BB->getSinglePredecessor(); |
7948 | if (!PBB) |
7949 | continue; |
7950 | |
7951 | BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator()); |
7952 | if (!ContinuePredicate || !ContinuePredicate->isConditional()) |
7953 | continue; |
7954 | |
7955 | Value *Condition = ContinuePredicate->getCondition(); |
7956 | |
7957 | // If we have an edge `E` within the loop body that dominates the only |
7958 | // latch, the condition guarding `E` also guards the backedge. This |
7959 | // reasoning works only for loops with a single latch. |
7960 | |
7961 | BasicBlockEdge DominatingEdge(PBB, BB); |
7962 | if (DominatingEdge.isSingleEdge()) { |
7963 | // We're constructively (and conservatively) enumerating edges within the |
7964 | // loop body that dominate the latch. The dominator tree better agree |
7965 | // with us on this: |
7966 | assert(DT.dominates(DominatingEdge, Latch) && "should be!")((DT.dominates(DominatingEdge, Latch) && "should be!" ) ? static_cast<void> (0) : __assert_fail ("DT.dominates(DominatingEdge, Latch) && \"should be!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 7966, __PRETTY_FUNCTION__)); |
7967 | |
7968 | if (isImpliedCond(Pred, LHS, RHS, Condition, |
7969 | BB != ContinuePredicate->getSuccessor(0))) |
7970 | return true; |
7971 | } |
7972 | } |
7973 | |
7974 | return false; |
7975 | } |
7976 | |
7977 | bool |
7978 | ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, |
7979 | ICmpInst::Predicate Pred, |
7980 | const SCEV *LHS, const SCEV *RHS) { |
7981 | // Interpret a null as meaning no loop, where there is obviously no guard |
7982 | // (interprocedural conditions notwithstanding). |
7983 | if (!L) return false; |
7984 | |
7985 | if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS)) |
7986 | return true; |
7987 | |
7988 | // Starting at the loop predecessor, climb up the predecessor chain, as long |
7989 | // as there are predecessors that can be found that have unique successors |
7990 | // leading to the original header. |
7991 | for (std::pair<BasicBlock *, BasicBlock *> |
7992 | Pair(L->getLoopPredecessor(), L->getHeader()); |
7993 | Pair.first; |
7994 | Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { |
7995 | |
7996 | if (isImpliedViaGuard(Pair.first, Pred, LHS, RHS)) |
7997 | return true; |
7998 | |
7999 | BranchInst *LoopEntryPredicate = |
8000 | dyn_cast<BranchInst>(Pair.first->getTerminator()); |
8001 | if (!LoopEntryPredicate || |
8002 | LoopEntryPredicate->isUnconditional()) |
8003 | continue; |
8004 | |
8005 | if (isImpliedCond(Pred, LHS, RHS, |
8006 | LoopEntryPredicate->getCondition(), |
8007 | LoopEntryPredicate->getSuccessor(0) != Pair.second)) |
8008 | return true; |
8009 | } |
8010 | |
8011 | // Check conditions due to any @llvm.assume intrinsics. |
8012 | for (auto &AssumeVH : AC.assumptions()) { |
8013 | if (!AssumeVH) |
8014 | continue; |
8015 | auto *CI = cast<CallInst>(AssumeVH); |
8016 | if (!DT.dominates(CI, L->getHeader())) |
8017 | continue; |
8018 | |
8019 | if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) |
8020 | return true; |
8021 | } |
8022 | |
8023 | return false; |
8024 | } |
8025 | |
8026 | namespace { |
8027 | /// RAII wrapper to prevent recursive application of isImpliedCond. |
8028 | /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are |
8029 | /// currently evaluating isImpliedCond. |
8030 | struct MarkPendingLoopPredicate { |
8031 | Value *Cond; |
8032 | DenseSet<Value*> &LoopPreds; |
8033 | bool Pending; |
8034 | |
8035 | MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP) |
8036 | : Cond(C), LoopPreds(LP) { |
8037 | Pending = !LoopPreds.insert(Cond).second; |
8038 | } |
8039 | ~MarkPendingLoopPredicate() { |
8040 | if (!Pending) |
8041 | LoopPreds.erase(Cond); |
8042 | } |
8043 | }; |
8044 | } // end anonymous namespace |
8045 | |
8046 | bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, |
8047 | const SCEV *LHS, const SCEV *RHS, |
8048 | Value *FoundCondValue, |
8049 | bool Inverse) { |
8050 | MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates); |
8051 | if (Mark.Pending) |
8052 | return false; |
8053 | |
8054 | // Recursively handle And and Or conditions. |
8055 | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { |
8056 | if (BO->getOpcode() == Instruction::And) { |
8057 | if (!Inverse) |
8058 | return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || |
8059 | isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); |
8060 | } else if (BO->getOpcode() == Instruction::Or) { |
8061 | if (Inverse) |
8062 | return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || |
8063 | isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); |
8064 | } |
8065 | } |
8066 | |
8067 | ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); |
8068 | if (!ICI) return false; |
8069 | |
8070 | // Now that we found a conditional branch that dominates the loop or controls |
8071 | // the loop latch. Check to see if it is the comparison we are looking for. |
8072 | ICmpInst::Predicate FoundPred; |
8073 | if (Inverse) |
8074 | FoundPred = ICI->getInversePredicate(); |
8075 | else |
8076 | FoundPred = ICI->getPredicate(); |
8077 | |
8078 | const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); |
8079 | const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); |
8080 | |
8081 | return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); |
8082 | } |
8083 | |
8084 | bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, |
8085 | const SCEV *RHS, |
8086 | ICmpInst::Predicate FoundPred, |
8087 | const SCEV *FoundLHS, |
8088 | const SCEV *FoundRHS) { |
8089 | // Balance the types. |
8090 | if (getTypeSizeInBits(LHS->getType()) < |
8091 | getTypeSizeInBits(FoundLHS->getType())) { |
8092 | if (CmpInst::isSigned(Pred)) { |
8093 | LHS = getSignExtendExpr(LHS, FoundLHS->getType()); |
8094 | RHS = getSignExtendExpr(RHS, FoundLHS->getType()); |
8095 | } else { |
8096 | LHS = getZeroExtendExpr(LHS, FoundLHS->getType()); |
8097 | RHS = getZeroExtendExpr(RHS, FoundLHS->getType()); |
8098 | } |
8099 | } else if (getTypeSizeInBits(LHS->getType()) > |
8100 | getTypeSizeInBits(FoundLHS->getType())) { |
8101 | if (CmpInst::isSigned(FoundPred)) { |
8102 | FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); |
8103 | FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); |
8104 | } else { |
8105 | FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); |
8106 | FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); |
8107 | } |
8108 | } |
8109 | |
8110 | // Canonicalize the query to match the way instcombine will have |
8111 | // canonicalized the comparison. |
8112 | if (SimplifyICmpOperands(Pred, LHS, RHS)) |
8113 | if (LHS == RHS) |
8114 | return CmpInst::isTrueWhenEqual(Pred); |
8115 | if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) |
8116 | if (FoundLHS == FoundRHS) |
8117 | return CmpInst::isFalseWhenEqual(FoundPred); |
8118 | |
8119 | // Check to see if we can make the LHS or RHS match. |
8120 | if (LHS == FoundRHS || RHS == FoundLHS) { |
8121 | if (isa<SCEVConstant>(RHS)) { |
8122 | std::swap(FoundLHS, FoundRHS); |
8123 | FoundPred = ICmpInst::getSwappedPredicate(FoundPred); |
8124 | } else { |
8125 | std::swap(LHS, RHS); |
8126 | Pred = ICmpInst::getSwappedPredicate(Pred); |
8127 | } |
8128 | } |
8129 | |
8130 | // Check whether the found predicate is the same as the desired predicate. |
8131 | if (FoundPred == Pred) |
8132 | return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); |
8133 | |
8134 | // Check whether swapping the found predicate makes it the same as the |
8135 | // desired predicate. |
8136 | if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { |
8137 | if (isa<SCEVConstant>(RHS)) |
8138 | return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); |
8139 | else |
8140 | return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), |
8141 | RHS, LHS, FoundLHS, FoundRHS); |
8142 | } |
8143 | |
8144 | // Unsigned comparison is the same as signed comparison when both the operands |
8145 | // are non-negative. |
8146 | if (CmpInst::isUnsigned(FoundPred) && |
8147 | CmpInst::getSignedPredicate(FoundPred) == Pred && |
8148 | isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) |
8149 | return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); |
8150 | |
8151 | // Check if we can make progress by sharpening ranges. |
8152 | if (FoundPred == ICmpInst::ICMP_NE && |
8153 | (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) { |
8154 | |
8155 | const SCEVConstant *C = nullptr; |
8156 | const SCEV *V = nullptr; |
8157 | |
8158 | if (isa<SCEVConstant>(FoundLHS)) { |
8159 | C = cast<SCEVConstant>(FoundLHS); |
8160 | V = FoundRHS; |
8161 | } else { |
8162 | C = cast<SCEVConstant>(FoundRHS); |
8163 | V = FoundLHS; |
8164 | } |
8165 | |
8166 | // The guarding predicate tells us that C != V. If the known range |
8167 | // of V is [C, t), we can sharpen the range to [C + 1, t). The |
8168 | // range we consider has to correspond to same signedness as the |
8169 | // predicate we're interested in folding. |
8170 | |
8171 | APInt Min = ICmpInst::isSigned(Pred) ? |
8172 | getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin(); |
8173 | |
8174 | if (Min == C->getAPInt()) { |
8175 | // Given (V >= Min && V != Min) we conclude V >= (Min + 1). |
8176 | // This is true even if (Min + 1) wraps around -- in case of |
8177 | // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). |
8178 | |
8179 | APInt SharperMin = Min + 1; |
8180 | |
8181 | switch (Pred) { |
8182 | case ICmpInst::ICMP_SGE: |
8183 | case ICmpInst::ICMP_UGE: |
8184 | // We know V `Pred` SharperMin. If this implies LHS `Pred` |
8185 | // RHS, we're done. |
8186 | if (isImpliedCondOperands(Pred, LHS, RHS, V, |
8187 | getConstant(SharperMin))) |
8188 | return true; |
8189 | |
8190 | case ICmpInst::ICMP_SGT: |
8191 | case ICmpInst::ICMP_UGT: |
8192 | // We know from the range information that (V `Pred` Min || |
8193 | // V == Min). We know from the guarding condition that !(V |
8194 | // == Min). This gives us |
8195 | // |
8196 | // V `Pred` Min || V == Min && !(V == Min) |
8197 | // => V `Pred` Min |
8198 | // |
8199 | // If V `Pred` Min implies LHS `Pred` RHS, we're done. |
8200 | |
8201 | if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) |
8202 | return true; |
8203 | |
8204 | default: |
8205 | // No change |
8206 | break; |
8207 | } |
8208 | } |
8209 | } |
8210 | |
8211 | // Check whether the actual condition is beyond sufficient. |
8212 | if (FoundPred == ICmpInst::ICMP_EQ) |
8213 | if (ICmpInst::isTrueWhenEqual(Pred)) |
8214 | if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) |
8215 | return true; |
8216 | if (Pred == ICmpInst::ICMP_NE) |
8217 | if (!ICmpInst::isTrueWhenEqual(FoundPred)) |
8218 | if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) |
8219 | return true; |
8220 | |
8221 | // Otherwise assume the worst. |
8222 | return false; |
8223 | } |
8224 | |
8225 | bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, |
8226 | const SCEV *&L, const SCEV *&R, |
8227 | SCEV::NoWrapFlags &Flags) { |
8228 | const auto *AE = dyn_cast<SCEVAddExpr>(Expr); |
8229 | if (!AE || AE->getNumOperands() != 2) |
8230 | return false; |
8231 | |
8232 | L = AE->getOperand(0); |
8233 | R = AE->getOperand(1); |
8234 | Flags = AE->getNoWrapFlags(); |
8235 | return true; |
8236 | } |
8237 | |
8238 | bool ScalarEvolution::computeConstantDifference(const SCEV *Less, |
8239 | const SCEV *More, |
8240 | APInt &C) { |
8241 | // We avoid subtracting expressions here because this function is usually |
8242 | // fairly deep in the call stack (i.e. is called many times). |
8243 | |
8244 | if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) { |
8245 | const auto *LAR = cast<SCEVAddRecExpr>(Less); |
8246 | const auto *MAR = cast<SCEVAddRecExpr>(More); |
8247 | |
8248 | if (LAR->getLoop() != MAR->getLoop()) |
8249 | return false; |
8250 | |
8251 | // We look at affine expressions only; not for correctness but to keep |
8252 | // getStepRecurrence cheap. |
8253 | if (!LAR->isAffine() || !MAR->isAffine()) |
8254 | return false; |
8255 | |
8256 | if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) |
8257 | return false; |
8258 | |
8259 | Less = LAR->getStart(); |
8260 | More = MAR->getStart(); |
8261 | |
8262 | // fall through |
8263 | } |
8264 | |
8265 | if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) { |
8266 | const auto &M = cast<SCEVConstant>(More)->getAPInt(); |
8267 | const auto &L = cast<SCEVConstant>(Less)->getAPInt(); |
8268 | C = M - L; |
8269 | return true; |
8270 | } |
8271 | |
8272 | const SCEV *L, *R; |
8273 | SCEV::NoWrapFlags Flags; |
8274 | if (splitBinaryAdd(Less, L, R, Flags)) |
8275 | if (const auto *LC = dyn_cast<SCEVConstant>(L)) |
8276 | if (R == More) { |
8277 | C = -(LC->getAPInt()); |
8278 | return true; |
8279 | } |
8280 | |
8281 | if (splitBinaryAdd(More, L, R, Flags)) |
8282 | if (const auto *LC = dyn_cast<SCEVConstant>(L)) |
8283 | if (R == Less) { |
8284 | C = LC->getAPInt(); |
8285 | return true; |
8286 | } |
8287 | |
8288 | return false; |
8289 | } |
8290 | |
8291 | bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( |
8292 | ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, |
8293 | const SCEV *FoundLHS, const SCEV *FoundRHS) { |
8294 | if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT) |
8295 | return false; |
8296 | |
8297 | const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS); |
8298 | if (!AddRecLHS) |
8299 | return false; |
8300 | |
8301 | const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS); |
8302 | if (!AddRecFoundLHS) |
8303 | return false; |
8304 | |
8305 | // We'd like to let SCEV reason about control dependencies, so we constrain |
8306 | // both the inequalities to be about add recurrences on the same loop. This |
8307 | // way we can use isLoopEntryGuardedByCond later. |
8308 | |
8309 | const Loop *L = AddRecFoundLHS->getLoop(); |
8310 | if (L != AddRecLHS->getLoop()) |
8311 | return false; |
8312 | |
8313 | // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) |
8314 | // |
8315 | // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) |
8316 | // ... (2) |
8317 | // |
8318 | // Informal proof for (2), assuming (1) [*]: |
8319 | // |
8320 | // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] |
8321 | // |
8322 | // Then |
8323 | // |
8324 | // FoundLHS s< FoundRHS s< INT_MIN - C |
8325 | // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] |
8326 | // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] |
8327 | // <=> (FoundLHS + INT_MIN + C + INT_MIN) s< |
8328 | // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] |
8329 | // <=> FoundLHS + C s< FoundRHS + C |
8330 | // |
8331 | // [*]: (1) can be proved by ruling out overflow. |
8332 | // |
8333 | // [**]: This can be proved by analyzing all the four possibilities: |
8334 | // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and |
8335 | // (A s>= 0, B s>= 0). |
8336 | // |
8337 | // Note: |
8338 | // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" |
8339 | // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS |
8340 | // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS |
8341 | // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is |
8342 | // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + |
8343 | // C)". |
8344 | |
8345 | APInt LDiff, RDiff; |
8346 | if (!computeConstantDifference(FoundLHS, LHS, LDiff) || |
8347 | !computeConstantDifference(FoundRHS, RHS, RDiff) || |
8348 | LDiff != RDiff) |
8349 | return false; |
8350 | |
8351 | if (LDiff == 0) |
8352 | return true; |
8353 | |
8354 | APInt FoundRHSLimit; |
8355 | |
8356 | if (Pred == CmpInst::ICMP_ULT) { |
8357 | FoundRHSLimit = -RDiff; |
8358 | } else { |
8359 | assert(Pred == CmpInst::ICMP_SLT && "Checked above!")((Pred == CmpInst::ICMP_SLT && "Checked above!") ? static_cast <void> (0) : __assert_fail ("Pred == CmpInst::ICMP_SLT && \"Checked above!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 8359, __PRETTY_FUNCTION__)); |
8360 | FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff; |
8361 | } |
8362 | |
8363 | // Try to prove (1) or (2), as needed. |
8364 | return isLoopEntryGuardedByCond(L, Pred, FoundRHS, |
8365 | getConstant(FoundRHSLimit)); |
8366 | } |
8367 | |
8368 | bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, |
8369 | const SCEV *LHS, const SCEV *RHS, |
8370 | const SCEV *FoundLHS, |
8371 | const SCEV *FoundRHS) { |
8372 | if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) |
8373 | return true; |
8374 | |
8375 | if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) |
8376 | return true; |
8377 | |
8378 | return isImpliedCondOperandsHelper(Pred, LHS, RHS, |
8379 | FoundLHS, FoundRHS) || |
8380 | // ~x < ~y --> x > y |
8381 | isImpliedCondOperandsHelper(Pred, LHS, RHS, |
8382 | getNotSCEV(FoundRHS), |
8383 | getNotSCEV(FoundLHS)); |
8384 | } |
8385 | |
8386 | |
8387 | /// If Expr computes ~A, return A else return nullptr |
8388 | static const SCEV *MatchNotExpr(const SCEV *Expr) { |
8389 | const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); |
8390 | if (!Add || Add->getNumOperands() != 2 || |
8391 | !Add->getOperand(0)->isAllOnesValue()) |
8392 | return nullptr; |
8393 | |
8394 | const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); |
8395 | if (!AddRHS || AddRHS->getNumOperands() != 2 || |
8396 | !AddRHS->getOperand(0)->isAllOnesValue()) |
8397 | return nullptr; |
8398 | |
8399 | return AddRHS->getOperand(1); |
8400 | } |
8401 | |
8402 | |
8403 | /// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values? |
8404 | template<typename MaxExprType> |
8405 | static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, |
8406 | const SCEV *Candidate) { |
8407 | const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr); |
8408 | if (!MaxExpr) return false; |
8409 | |
8410 | return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); |
8411 | } |
8412 | |
8413 | |
8414 | /// Is MaybeMinExpr an SMin or UMin of Candidate and some other values? |
8415 | template<typename MaxExprType> |
8416 | static bool IsMinConsistingOf(ScalarEvolution &SE, |
8417 | const SCEV *MaybeMinExpr, |
8418 | const SCEV *Candidate) { |
8419 | const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr); |
8420 | if (!MaybeMaxExpr) |
8421 | return false; |
8422 | |
8423 | return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate)); |
8424 | } |
8425 | |
8426 | static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, |
8427 | ICmpInst::Predicate Pred, |
8428 | const SCEV *LHS, const SCEV *RHS) { |
8429 | |
8430 | // If both sides are affine addrecs for the same loop, with equal |
8431 | // steps, and we know the recurrences don't wrap, then we only |
8432 | // need to check the predicate on the starting values. |
8433 | |
8434 | if (!ICmpInst::isRelational(Pred)) |
8435 | return false; |
8436 | |
8437 | const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); |
8438 | if (!LAR) |
8439 | return false; |
8440 | const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); |
8441 | if (!RAR) |
8442 | return false; |
8443 | if (LAR->getLoop() != RAR->getLoop()) |
8444 | return false; |
8445 | if (!LAR->isAffine() || !RAR->isAffine()) |
8446 | return false; |
8447 | |
8448 | if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) |
8449 | return false; |
8450 | |
8451 | SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? |
8452 | SCEV::FlagNSW : SCEV::FlagNUW; |
8453 | if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)) |
8454 | return false; |
8455 | |
8456 | return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); |
8457 | } |
8458 | |
8459 | /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max |
8460 | /// expression? |
8461 | static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, |
8462 | ICmpInst::Predicate Pred, |
8463 | const SCEV *LHS, const SCEV *RHS) { |
8464 | switch (Pred) { |
8465 | default: |
8466 | return false; |
8467 | |
8468 | case ICmpInst::ICMP_SGE: |
8469 | std::swap(LHS, RHS); |
8470 | // fall through |
8471 | case ICmpInst::ICMP_SLE: |
8472 | return |
8473 | // min(A, ...) <= A |
8474 | IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) || |
8475 | // A <= max(A, ...) |
8476 | IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS); |
8477 | |
8478 | case ICmpInst::ICMP_UGE: |
8479 | std::swap(LHS, RHS); |
8480 | // fall through |
8481 | case ICmpInst::ICMP_ULE: |
8482 | return |
8483 | // min(A, ...) <= A |
8484 | IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) || |
8485 | // A <= max(A, ...) |
8486 | IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS); |
8487 | } |
8488 | |
8489 | llvm_unreachable("covered switch fell through?!")::llvm::llvm_unreachable_internal("covered switch fell through?!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 8489); |
8490 | } |
8491 | |
8492 | bool |
8493 | ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, |
8494 | const SCEV *LHS, const SCEV *RHS, |
8495 | const SCEV *FoundLHS, |
8496 | const SCEV *FoundRHS) { |
8497 | auto IsKnownPredicateFull = |
8498 | [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { |
8499 | return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || |
8500 | IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || |
8501 | IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || |
8502 | isKnownPredicateViaNoOverflow(Pred, LHS, RHS); |
8503 | }; |
8504 | |
8505 | switch (Pred) { |
8506 | default: llvm_unreachable("Unexpected ICmpInst::Predicate value!")::llvm::llvm_unreachable_internal("Unexpected ICmpInst::Predicate value!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 8506); |
8507 | case ICmpInst::ICMP_EQ: |
8508 | case ICmpInst::ICMP_NE: |
8509 | if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) |
8510 | return true; |
8511 | break; |
8512 | case ICmpInst::ICMP_SLT: |
8513 | case ICmpInst::ICMP_SLE: |
8514 | if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) && |
8515 | IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS)) |
8516 | return true; |
8517 | break; |
8518 | case ICmpInst::ICMP_SGT: |
8519 | case ICmpInst::ICMP_SGE: |
8520 | if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) && |
8521 | IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS)) |
8522 | return true; |
8523 | break; |
8524 | case ICmpInst::ICMP_ULT: |
8525 | case ICmpInst::ICMP_ULE: |
8526 | if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) && |
8527 | IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS)) |
8528 | return true; |
8529 | break; |
8530 | case ICmpInst::ICMP_UGT: |
8531 | case ICmpInst::ICMP_UGE: |
8532 | if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) && |
8533 | IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS)) |
8534 | return true; |
8535 | break; |
8536 | } |
8537 | |
8538 | return false; |
8539 | } |
8540 | |
8541 | bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, |
8542 | const SCEV *LHS, |
8543 | const SCEV *RHS, |
8544 | const SCEV *FoundLHS, |
8545 | const SCEV *FoundRHS) { |
8546 | if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS)) |
8547 | // The restriction on `FoundRHS` be lifted easily -- it exists only to |
8548 | // reduce the compile time impact of this optimization. |
8549 | return false; |
8550 | |
8551 | const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS); |
8552 | if (!AddLHS || AddLHS->getOperand(1) != FoundLHS || |
8553 | !isa<SCEVConstant>(AddLHS->getOperand(0))) |
8554 | return false; |
8555 | |
8556 | APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt(); |
8557 | |
8558 | // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the |
8559 | // antecedent "`FoundLHS` `Pred` `FoundRHS`". |
8560 | ConstantRange FoundLHSRange = |
8561 | ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS); |
8562 | |
8563 | // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range |
8564 | // for `LHS`: |
8565 | APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt(); |
8566 | ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend)); |
8567 | |
8568 | // We can also compute the range of values for `LHS` that satisfy the |
8569 | // consequent, "`LHS` `Pred` `RHS`": |
8570 | APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt(); |
8571 | ConstantRange SatisfyingLHSRange = |
8572 | ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); |
8573 | |
8574 | // The antecedent implies the consequent if every value of `LHS` that |
8575 | // satisfies the antecedent also satisfies the consequent. |
8576 | return SatisfyingLHSRange.contains(LHSRange); |
8577 | } |
8578 | |
8579 | bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, |
8580 | bool IsSigned, bool NoWrap) { |
8581 | if (NoWrap) return false; |
8582 | |
8583 | unsigned BitWidth = getTypeSizeInBits(RHS->getType()); |
8584 | const SCEV *One = getOne(Stride->getType()); |
8585 | |
8586 | if (IsSigned) { |
8587 | APInt MaxRHS = getSignedRange(RHS).getSignedMax(); |
8588 | APInt MaxValue = APInt::getSignedMaxValue(BitWidth); |
8589 | APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) |
8590 | .getSignedMax(); |
8591 | |
8592 | // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! |
8593 | return (MaxValue - MaxStrideMinusOne).slt(MaxRHS); |
8594 | } |
8595 | |
8596 | APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); |
8597 | APInt MaxValue = APInt::getMaxValue(BitWidth); |
8598 | APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) |
8599 | .getUnsignedMax(); |
8600 | |
8601 | // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! |
8602 | return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); |
8603 | } |
8604 | |
8605 | bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, |
8606 | bool IsSigned, bool NoWrap) { |
8607 | if (NoWrap) return false; |
8608 | |
8609 | unsigned BitWidth = getTypeSizeInBits(RHS->getType()); |
8610 | const SCEV *One = getOne(Stride->getType()); |
8611 | |
8612 | if (IsSigned) { |
8613 | APInt MinRHS = getSignedRange(RHS).getSignedMin(); |
8614 | APInt MinValue = APInt::getSignedMinValue(BitWidth); |
8615 | APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) |
8616 | .getSignedMax(); |
8617 | |
8618 | // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! |
8619 | return (MinValue + MaxStrideMinusOne).sgt(MinRHS); |
8620 | } |
8621 | |
8622 | APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); |
8623 | APInt MinValue = APInt::getMinValue(BitWidth); |
8624 | APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) |
8625 | .getUnsignedMax(); |
8626 | |
8627 | // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! |
8628 | return (MinValue + MaxStrideMinusOne).ugt(MinRHS); |
8629 | } |
8630 | |
8631 | const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, |
8632 | bool Equality) { |
8633 | const SCEV *One = getOne(Step->getType()); |
8634 | Delta = Equality ? getAddExpr(Delta, Step) |
8635 | : getAddExpr(Delta, getMinusSCEV(Step, One)); |
8636 | return getUDivExpr(Delta, Step); |
8637 | } |
8638 | |
8639 | ScalarEvolution::ExitLimit |
8640 | ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, |
8641 | const Loop *L, bool IsSigned, |
8642 | bool ControlsExit, bool AllowPredicates) { |
8643 | SCEVUnionPredicate P; |
8644 | // We handle only IV < Invariant |
8645 | if (!isLoopInvariant(RHS, L)) |
8646 | return getCouldNotCompute(); |
8647 | |
8648 | const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); |
8649 | if (!IV && AllowPredicates) |
8650 | // Try to make this an AddRec using runtime tests, in the first X |
8651 | // iterations of this loop, where X is the SCEV expression found by the |
8652 | // algorithm below. |
8653 | IV = convertSCEVToAddRecWithPredicates(LHS, L, P); |
8654 | |
8655 | // Avoid weird loops |
8656 | if (!IV || IV->getLoop() != L || !IV->isAffine()) |
8657 | return getCouldNotCompute(); |
8658 | |
8659 | bool NoWrap = ControlsExit && |
8660 | IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); |
8661 | |
8662 | const SCEV *Stride = IV->getStepRecurrence(*this); |
8663 | |
8664 | // Avoid negative or zero stride values |
8665 | if (!isKnownPositive(Stride)) |
8666 | return getCouldNotCompute(); |
8667 | |
8668 | // Avoid proven overflow cases: this will ensure that the backedge taken count |
8669 | // will not generate any unsigned overflow. Relaxed no-overflow conditions |
8670 | // exploit NoWrapFlags, allowing to optimize in presence of undefined |
8671 | // behaviors like the case of C language. |
8672 | if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) |
8673 | return getCouldNotCompute(); |
8674 | |
8675 | ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT |
8676 | : ICmpInst::ICMP_ULT; |
8677 | const SCEV *Start = IV->getStart(); |
8678 | const SCEV *End = RHS; |
8679 | if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) { |
8680 | const SCEV *Diff = getMinusSCEV(RHS, Start); |
8681 | // If we have NoWrap set, then we can assume that the increment won't |
8682 | // overflow, in which case if RHS - Start is a constant, we don't need to |
8683 | // do a max operation since we can just figure it out statically |
8684 | if (NoWrap && isa<SCEVConstant>(Diff)) { |
8685 | APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt(); |
8686 | if (D.isNegative()) |
8687 | End = Start; |
8688 | } else |
8689 | End = IsSigned ? getSMaxExpr(RHS, Start) |
8690 | : getUMaxExpr(RHS, Start); |
8691 | } |
8692 | |
8693 | const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); |
8694 | |
8695 | APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() |
8696 | : getUnsignedRange(Start).getUnsignedMin(); |
8697 | |
8698 | APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() |
8699 | : getUnsignedRange(Stride).getUnsignedMin(); |
8700 | |
8701 | unsigned BitWidth = getTypeSizeInBits(LHS->getType()); |
8702 | APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) |
8703 | : APInt::getMaxValue(BitWidth) - (MinStride - 1); |
8704 | |
8705 | // Although End can be a MAX expression we estimate MaxEnd considering only |
8706 | // the case End = RHS. This is safe because in the other case (End - Start) |
8707 | // is zero, leading to a zero maximum backedge taken count. |
8708 | APInt MaxEnd = |
8709 | IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) |
8710 | : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); |
8711 | |
8712 | const SCEV *MaxBECount; |
8713 | if (isa<SCEVConstant>(BECount)) |
8714 | MaxBECount = BECount; |
8715 | else |
8716 | MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), |
8717 | getConstant(MinStride), false); |
8718 | |
8719 | if (isa<SCEVCouldNotCompute>(MaxBECount)) |
8720 | MaxBECount = BECount; |
8721 | |
8722 | return ExitLimit(BECount, MaxBECount, P); |
8723 | } |
8724 | |
8725 | ScalarEvolution::ExitLimit |
8726 | ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, |
8727 | const Loop *L, bool IsSigned, |
8728 | bool ControlsExit, bool AllowPredicates) { |
8729 | SCEVUnionPredicate P; |
8730 | // We handle only IV > Invariant |
8731 | if (!isLoopInvariant(RHS, L)) |
8732 | return getCouldNotCompute(); |
8733 | |
8734 | const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); |
8735 | if (!IV && AllowPredicates) |
8736 | // Try to make this an AddRec using runtime tests, in the first X |
8737 | // iterations of this loop, where X is the SCEV expression found by the |
8738 | // algorithm below. |
8739 | IV = convertSCEVToAddRecWithPredicates(LHS, L, P); |
8740 | |
8741 | // Avoid weird loops |
8742 | if (!IV || IV->getLoop() != L || !IV->isAffine()) |
8743 | return getCouldNotCompute(); |
8744 | |
8745 | bool NoWrap = ControlsExit && |
8746 | IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); |
8747 | |
8748 | const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); |
8749 | |
8750 | // Avoid negative or zero stride values |
8751 | if (!isKnownPositive(Stride)) |
8752 | return getCouldNotCompute(); |
8753 | |
8754 | // Avoid proven overflow cases: this will ensure that the backedge taken count |
8755 | // will not generate any unsigned overflow. Relaxed no-overflow conditions |
8756 | // exploit NoWrapFlags, allowing to optimize in presence of undefined |
8757 | // behaviors like the case of C language. |
8758 | if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) |
8759 | return getCouldNotCompute(); |
8760 | |
8761 | ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT |
8762 | : ICmpInst::ICMP_UGT; |
8763 | |
8764 | const SCEV *Start = IV->getStart(); |
8765 | const SCEV *End = RHS; |
8766 | if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) { |
8767 | const SCEV *Diff = getMinusSCEV(RHS, Start); |
8768 | // If we have NoWrap set, then we can assume that the increment won't |
8769 | // overflow, in which case if RHS - Start is a constant, we don't need to |
8770 | // do a max operation since we can just figure it out statically |
8771 | if (NoWrap && isa<SCEVConstant>(Diff)) { |
8772 | APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt(); |
8773 | if (!D.isNegative()) |
8774 | End = Start; |
8775 | } else |
8776 | End = IsSigned ? getSMinExpr(RHS, Start) |
8777 | : getUMinExpr(RHS, Start); |
8778 | } |
8779 | |
8780 | const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); |
8781 | |
8782 | APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax() |
8783 | : getUnsignedRange(Start).getUnsignedMax(); |
8784 | |
8785 | APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() |
8786 | : getUnsignedRange(Stride).getUnsignedMin(); |
8787 | |
8788 | unsigned BitWidth = getTypeSizeInBits(LHS->getType()); |
8789 | APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) |
8790 | : APInt::getMinValue(BitWidth) + (MinStride - 1); |
8791 | |
8792 | // Although End can be a MIN expression we estimate MinEnd considering only |
8793 | // the case End = RHS. This is safe because in the other case (Start - End) |
8794 | // is zero, leading to a zero maximum backedge taken count. |
8795 | APInt MinEnd = |
8796 | IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit) |
8797 | : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit); |
8798 | |
8799 | |
8800 | const SCEV *MaxBECount = getCouldNotCompute(); |
Value stored to 'MaxBECount' during its initialization is never read | |
8801 | if (isa<SCEVConstant>(BECount)) |
8802 | MaxBECount = BECount; |
8803 | else |
8804 | MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), |
8805 | getConstant(MinStride), false); |
8806 | |
8807 | if (isa<SCEVCouldNotCompute>(MaxBECount)) |
8808 | MaxBECount = BECount; |
8809 | |
8810 | return ExitLimit(BECount, MaxBECount, P); |
8811 | } |
8812 | |
8813 | const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, |
8814 | ScalarEvolution &SE) const { |
8815 | if (Range.isFullSet()) // Infinite loop. |
8816 | return SE.getCouldNotCompute(); |
8817 | |
8818 | // If the start is a non-zero constant, shift the range to simplify things. |
8819 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) |
8820 | if (!SC->getValue()->isZero()) { |
8821 | SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); |
8822 | Operands[0] = SE.getZero(SC->getType()); |
8823 | const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), |
8824 | getNoWrapFlags(FlagNW)); |
8825 | if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) |
8826 | return ShiftedAddRec->getNumIterationsInRange( |
8827 | Range.subtract(SC->getAPInt()), SE); |
8828 | // This is strange and shouldn't happen. |
8829 | return SE.getCouldNotCompute(); |
8830 | } |
8831 | |
8832 | // The only time we can solve this is when we have all constant indices. |
8833 | // Otherwise, we cannot determine the overflow conditions. |
8834 | if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); })) |
8835 | return SE.getCouldNotCompute(); |
8836 | |
8837 | // Okay at this point we know that all elements of the chrec are constants and |
8838 | // that the start element is zero. |
8839 | |
8840 | // First check to see if the range contains zero. If not, the first |
8841 | // iteration exits. |
8842 | unsigned BitWidth = SE.getTypeSizeInBits(getType()); |
8843 | if (!Range.contains(APInt(BitWidth, 0))) |
8844 | return SE.getZero(getType()); |
8845 | |
8846 | if (isAffine()) { |
8847 | // If this is an affine expression then we have this situation: |
8848 | // Solve {0,+,A} in Range === Ax in Range |
8849 | |
8850 | // We know that zero is in the range. If A is positive then we know that |
8851 | // the upper value of the range must be the first possible exit value. |
8852 | // If A is negative then the lower of the range is the last possible loop |
8853 | // value. Also note that we already checked for a full range. |
8854 | APInt One(BitWidth,1); |
8855 | APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt(); |
8856 | APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); |
8857 | |
8858 | // The exit value should be (End+A)/A. |
8859 | APInt ExitVal = (End + A).udiv(A); |
8860 | ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); |
8861 | |
8862 | // Evaluate at the exit value. If we really did fall out of the valid |
8863 | // range, then we computed our trip count, otherwise wrap around or other |
8864 | // things must have happened. |
8865 | ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); |
8866 | if (Range.contains(Val->getValue())) |
8867 | return SE.getCouldNotCompute(); // Something strange happened |
8868 | |
8869 | // Ensure that the previous value is in the range. This is a sanity check. |
8870 | assert(Range.contains(((Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt ::get(SE.getContext(), ExitVal - One), SE)->getValue()) && "Linear scev computation is off in a bad way!") ? static_cast <void> (0) : __assert_fail ("Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && \"Linear scev computation is off in a bad way!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 8873, __PRETTY_FUNCTION__)) |
8871 | EvaluateConstantChrecAtConstant(this,((Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt ::get(SE.getContext(), ExitVal - One), SE)->getValue()) && "Linear scev computation is off in a bad way!") ? static_cast <void> (0) : __assert_fail ("Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && \"Linear scev computation is off in a bad way!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 8873, __PRETTY_FUNCTION__)) |
8872 | ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&((Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt ::get(SE.getContext(), ExitVal - One), SE)->getValue()) && "Linear scev computation is off in a bad way!") ? static_cast <void> (0) : __assert_fail ("Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && \"Linear scev computation is off in a bad way!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 8873, __PRETTY_FUNCTION__)) |
8873 | "Linear scev computation is off in a bad way!")((Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt ::get(SE.getContext(), ExitVal - One), SE)->getValue()) && "Linear scev computation is off in a bad way!") ? static_cast <void> (0) : __assert_fail ("Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && \"Linear scev computation is off in a bad way!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 8873, __PRETTY_FUNCTION__)); |
8874 | return SE.getConstant(ExitValue); |
8875 | } else if (isQuadratic()) { |
8876 | // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the |
8877 | // quadratic equation to solve it. To do this, we must frame our problem in |
8878 | // terms of figuring out when zero is crossed, instead of when |
8879 | // Range.getUpper() is crossed. |
8880 | SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); |
8881 | NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); |
8882 | const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), |
8883 | // getNoWrapFlags(FlagNW) |
8884 | FlagAnyWrap); |
8885 | |
8886 | // Next, solve the constructed addrec |
8887 | auto Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); |
8888 | const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); |
8889 | const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); |
8890 | if (R1) { |
8891 | // Pick the smallest positive root value. |
8892 | if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp( |
8893 | ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { |
8894 | if (!CB->getZExtValue()) |
8895 | std::swap(R1, R2); // R1 is the minimum root now. |
8896 | |
8897 | // Make sure the root is not off by one. The returned iteration should |
8898 | // not be in the range, but the previous one should be. When solving |
8899 | // for "X*X < 5", for example, we should not return a root of 2. |
8900 | ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, |
8901 | R1->getValue(), |
8902 | SE); |
8903 | if (Range.contains(R1Val->getValue())) { |
8904 | // The next iteration must be out of the range... |
8905 | ConstantInt *NextVal = |
8906 | ConstantInt::get(SE.getContext(), R1->getAPInt() + 1); |
8907 | |
8908 | R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); |
8909 | if (!Range.contains(R1Val->getValue())) |
8910 | return SE.getConstant(NextVal); |
8911 | return SE.getCouldNotCompute(); // Something strange happened |
8912 | } |
8913 | |
8914 | // If R1 was not in the range, then it is a good return value. Make |
8915 | // sure that R1-1 WAS in the range though, just in case. |
8916 | ConstantInt *NextVal = |
8917 | ConstantInt::get(SE.getContext(), R1->getAPInt() - 1); |
8918 | R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); |
8919 | if (Range.contains(R1Val->getValue())) |
8920 | return R1; |
8921 | return SE.getCouldNotCompute(); // Something strange happened |
8922 | } |
8923 | } |
8924 | } |
8925 | |
8926 | return SE.getCouldNotCompute(); |
8927 | } |
8928 | |
8929 | namespace { |
8930 | struct FindUndefs { |
8931 | bool Found; |
8932 | FindUndefs() : Found(false) {} |
8933 | |
8934 | bool follow(const SCEV *S) { |
8935 | if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) { |
8936 | if (isa<UndefValue>(C->getValue())) |
8937 | Found = true; |
8938 | } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { |
8939 | if (isa<UndefValue>(C->getValue())) |
8940 | Found = true; |
8941 | } |
8942 | |
8943 | // Keep looking if we haven't found it yet. |
8944 | return !Found; |
8945 | } |
8946 | bool isDone() const { |
8947 | // Stop recursion if we have found an undef. |
8948 | return Found; |
8949 | } |
8950 | }; |
8951 | } |
8952 | |
8953 | // Return true when S contains at least an undef value. |
8954 | static inline bool |
8955 | containsUndefs(const SCEV *S) { |
8956 | FindUndefs F; |
8957 | SCEVTraversal<FindUndefs> ST(F); |
8958 | ST.visitAll(S); |
8959 | |
8960 | return F.Found; |
8961 | } |
8962 | |
8963 | namespace { |
8964 | // Collect all steps of SCEV expressions. |
8965 | struct SCEVCollectStrides { |
8966 | ScalarEvolution &SE; |
8967 | SmallVectorImpl<const SCEV *> &Strides; |
8968 | |
8969 | SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S) |
8970 | : SE(SE), Strides(S) {} |
8971 | |
8972 | bool follow(const SCEV *S) { |
8973 | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) |
8974 | Strides.push_back(AR->getStepRecurrence(SE)); |
8975 | return true; |
8976 | } |
8977 | bool isDone() const { return false; } |
8978 | }; |
8979 | |
8980 | // Collect all SCEVUnknown and SCEVMulExpr expressions. |
8981 | struct SCEVCollectTerms { |
8982 | SmallVectorImpl<const SCEV *> &Terms; |
8983 | |
8984 | SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) |
8985 | : Terms(T) {} |
8986 | |
8987 | bool follow(const SCEV *S) { |
8988 | if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) { |
8989 | if (!containsUndefs(S)) |
8990 | Terms.push_back(S); |
8991 | |
8992 | // Stop recursion: once we collected a term, do not walk its operands. |
8993 | return false; |
8994 | } |
8995 | |
8996 | // Keep looking. |
8997 | return true; |
8998 | } |
8999 | bool isDone() const { return false; } |
9000 | }; |
9001 | |
9002 | // Check if a SCEV contains an AddRecExpr. |
9003 | struct SCEVHasAddRec { |
9004 | bool &ContainsAddRec; |
9005 | |
9006 | SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { |
9007 | ContainsAddRec = false; |
9008 | } |
9009 | |
9010 | bool follow(const SCEV *S) { |
9011 | if (isa<SCEVAddRecExpr>(S)) { |
9012 | ContainsAddRec = true; |
9013 | |
9014 | // Stop recursion: once we collected a term, do not walk its operands. |
9015 | return false; |
9016 | } |
9017 | |
9018 | // Keep looking. |
9019 | return true; |
9020 | } |
9021 | bool isDone() const { return false; } |
9022 | }; |
9023 | |
9024 | // Find factors that are multiplied with an expression that (possibly as a |
9025 | // subexpression) contains an AddRecExpr. In the expression: |
9026 | // |
9027 | // 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) |
9028 | // |
9029 | // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" |
9030 | // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size |
9031 | // parameters as they form a product with an induction variable. |
9032 | // |
9033 | // This collector expects all array size parameters to be in the same MulExpr. |
9034 | // It might be necessary to later add support for collecting parameters that are |
9035 | // spread over different nested MulExpr. |
9036 | struct SCEVCollectAddRecMultiplies { |
9037 | SmallVectorImpl<const SCEV *> &Terms; |
9038 | ScalarEvolution &SE; |
9039 | |
9040 | SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE) |
9041 | : Terms(T), SE(SE) {} |
9042 | |
9043 | bool follow(const SCEV *S) { |
9044 | if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { |
9045 | bool HasAddRec = false; |
9046 | SmallVector<const SCEV *, 0> Operands; |
9047 | for (auto Op : Mul->operands()) { |
9048 | if (isa<SCEVUnknown>(Op)) { |
9049 | Operands.push_back(Op); |
9050 | } else { |
9051 | bool ContainsAddRec; |
9052 | SCEVHasAddRec ContiansAddRec(ContainsAddRec); |
9053 | visitAll(Op, ContiansAddRec); |
9054 | HasAddRec |= ContainsAddRec; |
9055 | } |
9056 | } |
9057 | if (Operands.size() == 0) |
9058 | return true; |
9059 | |
9060 | if (!HasAddRec) |
9061 | return false; |
9062 | |
9063 | Terms.push_back(SE.getMulExpr(Operands)); |
9064 | // Stop recursion: once we collected a term, do not walk its operands. |
9065 | return false; |
9066 | } |
9067 | |
9068 | // Keep looking. |
9069 | return true; |
9070 | } |
9071 | bool isDone() const { return false; } |
9072 | }; |
9073 | } |
9074 | |
9075 | /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in |
9076 | /// two places: |
9077 | /// 1) The strides of AddRec expressions. |
9078 | /// 2) Unknowns that are multiplied with AddRec expressions. |
9079 | void ScalarEvolution::collectParametricTerms(const SCEV *Expr, |
9080 | SmallVectorImpl<const SCEV *> &Terms) { |
9081 | SmallVector<const SCEV *, 4> Strides; |
9082 | SCEVCollectStrides StrideCollector(*this, Strides); |
9083 | visitAll(Expr, StrideCollector); |
9084 | |
9085 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Strides:\n"; for ( const SCEV *S : Strides) dbgs() << *S << "\n"; }; } } while (0) |
9086 | dbgs() << "Strides:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Strides:\n"; for ( const SCEV *S : Strides) dbgs() << *S << "\n"; }; } } while (0) |
9087 | for (const SCEV *S : Strides)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Strides:\n"; for ( const SCEV *S : Strides) dbgs() << *S << "\n"; }; } } while (0) |
9088 | dbgs() << *S << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Strides:\n"; for ( const SCEV *S : Strides) dbgs() << *S << "\n"; }; } } while (0) |
9089 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Strides:\n"; for ( const SCEV *S : Strides) dbgs() << *S << "\n"; }; } } while (0); |
9090 | |
9091 | for (const SCEV *S : Strides) { |
9092 | SCEVCollectTerms TermCollector(Terms); |
9093 | visitAll(S, TermCollector); |
9094 | } |
9095 | |
9096 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9097 | dbgs() << "Terms:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9098 | for (const SCEV *T : Terms)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9099 | dbgs() << *T << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9100 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0); |
9101 | |
9102 | SCEVCollectAddRecMultiplies MulCollector(Terms, *this); |
9103 | visitAll(Expr, MulCollector); |
9104 | } |
9105 | |
9106 | static bool findArrayDimensionsRec(ScalarEvolution &SE, |
9107 | SmallVectorImpl<const SCEV *> &Terms, |
9108 | SmallVectorImpl<const SCEV *> &Sizes) { |
9109 | int Last = Terms.size() - 1; |
9110 | const SCEV *Step = Terms[Last]; |
9111 | |
9112 | // End of recursion. |
9113 | if (Last == 0) { |
9114 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) { |
9115 | SmallVector<const SCEV *, 2> Qs; |
9116 | for (const SCEV *Op : M->operands()) |
9117 | if (!isa<SCEVConstant>(Op)) |
9118 | Qs.push_back(Op); |
9119 | |
9120 | Step = SE.getMulExpr(Qs); |
9121 | } |
9122 | |
9123 | Sizes.push_back(Step); |
9124 | return true; |
9125 | } |
9126 | |
9127 | for (const SCEV *&Term : Terms) { |
9128 | // Normalize the terms before the next call to findArrayDimensionsRec. |
9129 | const SCEV *Q, *R; |
9130 | SCEVDivision::divide(SE, Term, Step, &Q, &R); |
9131 | |
9132 | // Bail out when GCD does not evenly divide one of the terms. |
9133 | if (!R->isZero()) |
9134 | return false; |
9135 | |
9136 | Term = Q; |
9137 | } |
9138 | |
9139 | // Remove all SCEVConstants. |
9140 | Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) { |
9141 | return isa<SCEVConstant>(E); |
9142 | }), |
9143 | Terms.end()); |
9144 | |
9145 | if (Terms.size() > 0) |
9146 | if (!findArrayDimensionsRec(SE, Terms, Sizes)) |
9147 | return false; |
9148 | |
9149 | Sizes.push_back(Step); |
9150 | return true; |
9151 | } |
9152 | |
9153 | // Returns true when S contains at least a SCEVUnknown parameter. |
9154 | static inline bool |
9155 | containsParameters(const SCEV *S) { |
9156 | struct FindParameter { |
9157 | bool FoundParameter; |
9158 | FindParameter() : FoundParameter(false) {} |
9159 | |
9160 | bool follow(const SCEV *S) { |
9161 | if (isa<SCEVUnknown>(S)) { |
9162 | FoundParameter = true; |
9163 | // Stop recursion: we found a parameter. |
9164 | return false; |
9165 | } |
9166 | // Keep looking. |
9167 | return true; |
9168 | } |
9169 | bool isDone() const { |
9170 | // Stop recursion if we have found a parameter. |
9171 | return FoundParameter; |
9172 | } |
9173 | }; |
9174 | |
9175 | FindParameter F; |
9176 | SCEVTraversal<FindParameter> ST(F); |
9177 | ST.visitAll(S); |
9178 | |
9179 | return F.FoundParameter; |
9180 | } |
9181 | |
9182 | // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. |
9183 | static inline bool |
9184 | containsParameters(SmallVectorImpl<const SCEV *> &Terms) { |
9185 | for (const SCEV *T : Terms) |
9186 | if (containsParameters(T)) |
9187 | return true; |
9188 | return false; |
9189 | } |
9190 | |
9191 | // Return the number of product terms in S. |
9192 | static inline int numberOfTerms(const SCEV *S) { |
9193 | if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S)) |
9194 | return Expr->getNumOperands(); |
9195 | return 1; |
9196 | } |
9197 | |
9198 | static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { |
9199 | if (isa<SCEVConstant>(T)) |
9200 | return nullptr; |
9201 | |
9202 | if (isa<SCEVUnknown>(T)) |
9203 | return T; |
9204 | |
9205 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) { |
9206 | SmallVector<const SCEV *, 2> Factors; |
9207 | for (const SCEV *Op : M->operands()) |
9208 | if (!isa<SCEVConstant>(Op)) |
9209 | Factors.push_back(Op); |
9210 | |
9211 | return SE.getMulExpr(Factors); |
9212 | } |
9213 | |
9214 | return T; |
9215 | } |
9216 | |
9217 | /// Return the size of an element read or written by Inst. |
9218 | const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { |
9219 | Type *Ty; |
9220 | if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) |
9221 | Ty = Store->getValueOperand()->getType(); |
9222 | else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) |
9223 | Ty = Load->getType(); |
9224 | else |
9225 | return nullptr; |
9226 | |
9227 | Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); |
9228 | return getSizeOfExpr(ETy, Ty); |
9229 | } |
9230 | |
9231 | void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, |
9232 | SmallVectorImpl<const SCEV *> &Sizes, |
9233 | const SCEV *ElementSize) const { |
9234 | if (Terms.size() < 1 || !ElementSize) |
9235 | return; |
9236 | |
9237 | // Early return when Terms do not contain parameters: we do not delinearize |
9238 | // non parametric SCEVs. |
9239 | if (!containsParameters(Terms)) |
9240 | return; |
9241 | |
9242 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9243 | dbgs() << "Terms:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9244 | for (const SCEV *T : Terms)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9245 | dbgs() << *T << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0) |
9246 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms:\n"; for (const SCEV *T : Terms) dbgs() << *T << "\n"; }; } } while (0); |
9247 | |
9248 | // Remove duplicates. |
9249 | std::sort(Terms.begin(), Terms.end()); |
9250 | Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); |
9251 | |
9252 | // Put larger terms first. |
9253 | std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) { |
9254 | return numberOfTerms(LHS) > numberOfTerms(RHS); |
9255 | }); |
9256 | |
9257 | ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); |
9258 | |
9259 | // Try to divide all terms by the element size. If term is not divisible by |
9260 | // element size, proceed with the original term. |
9261 | for (const SCEV *&Term : Terms) { |
9262 | const SCEV *Q, *R; |
9263 | SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); |
9264 | if (!Q->isZero()) |
9265 | Term = Q; |
9266 | } |
9267 | |
9268 | SmallVector<const SCEV *, 4> NewTerms; |
9269 | |
9270 | // Remove constant factors. |
9271 | for (const SCEV *T : Terms) |
9272 | if (const SCEV *NewT = removeConstantFactors(SE, T)) |
9273 | NewTerms.push_back(NewT); |
9274 | |
9275 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms after sorting:\n" ; for (const SCEV *T : NewTerms) dbgs() << *T << "\n" ; }; } } while (0) |
9276 | dbgs() << "Terms after sorting:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms after sorting:\n" ; for (const SCEV *T : NewTerms) dbgs() << *T << "\n" ; }; } } while (0) |
9277 | for (const SCEV *T : NewTerms)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms after sorting:\n" ; for (const SCEV *T : NewTerms) dbgs() << *T << "\n" ; }; } } while (0) |
9278 | dbgs() << *T << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms after sorting:\n" ; for (const SCEV *T : NewTerms) dbgs() << *T << "\n" ; }; } } while (0) |
9279 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Terms after sorting:\n" ; for (const SCEV *T : NewTerms) dbgs() << *T << "\n" ; }; } } while (0); |
9280 | |
9281 | if (NewTerms.empty() || |
9282 | !findArrayDimensionsRec(SE, NewTerms, Sizes)) { |
9283 | Sizes.clear(); |
9284 | return; |
9285 | } |
9286 | |
9287 | // The last element to be pushed into Sizes is the size of an element. |
9288 | Sizes.push_back(ElementSize); |
9289 | |
9290 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Sizes:\n"; for (const SCEV *S : Sizes) dbgs() << *S << "\n"; }; } } while (0) |
9291 | dbgs() << "Sizes:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Sizes:\n"; for (const SCEV *S : Sizes) dbgs() << *S << "\n"; }; } } while (0) |
9292 | for (const SCEV *S : Sizes)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Sizes:\n"; for (const SCEV *S : Sizes) dbgs() << *S << "\n"; }; } } while (0) |
9293 | dbgs() << *S << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Sizes:\n"; for (const SCEV *S : Sizes) dbgs() << *S << "\n"; }; } } while (0) |
9294 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Sizes:\n"; for (const SCEV *S : Sizes) dbgs() << *S << "\n"; }; } } while (0); |
9295 | } |
9296 | |
9297 | void ScalarEvolution::computeAccessFunctions( |
9298 | const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, |
9299 | SmallVectorImpl<const SCEV *> &Sizes) { |
9300 | |
9301 | // Early exit in case this SCEV is not an affine multivariate function. |
9302 | if (Sizes.empty()) |
9303 | return; |
9304 | |
9305 | if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr)) |
9306 | if (!AR->isAffine()) |
9307 | return; |
9308 | |
9309 | const SCEV *Res = Expr; |
9310 | int Last = Sizes.size() - 1; |
9311 | for (int i = Last; i >= 0; i--) { |
9312 | const SCEV *Q, *R; |
9313 | SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R); |
9314 | |
9315 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[ i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n" ; dbgs() << "Quotient: " << *Q << "\n"; dbgs () << "Remainder: " << *R << "\n"; }; } } while (0) |
9316 | dbgs() << "Res: " << *Res << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[ i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n" ; dbgs() << "Quotient: " << *Q << "\n"; dbgs () << "Remainder: " << *R << "\n"; }; } } while (0) |
9317 | dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[ i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n" ; dbgs() << "Quotient: " << *Q << "\n"; dbgs () << "Remainder: " << *R << "\n"; }; } } while (0) |
9318 | dbgs() << "Res divided by Sizes[i]:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[ i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n" ; dbgs() << "Quotient: " << *Q << "\n"; dbgs () << "Remainder: " << *R << "\n"; }; } } while (0) |
9319 | dbgs() << "Quotient: " << *Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[ i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n" ; dbgs() << "Quotient: " << *Q << "\n"; dbgs () << "Remainder: " << *R << "\n"; }; } } while (0) |
9320 | dbgs() << "Remainder: " << *R << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[ i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n" ; dbgs() << "Quotient: " << *Q << "\n"; dbgs () << "Remainder: " << *R << "\n"; }; } } while (0) |
9321 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[ i] << "\n"; dbgs() << "Res divided by Sizes[i]:\n" ; dbgs() << "Quotient: " << *Q << "\n"; dbgs () << "Remainder: " << *R << "\n"; }; } } while (0); |
9322 | |
9323 | Res = Q; |
9324 | |
9325 | // Do not record the last subscript corresponding to the size of elements in |
9326 | // the array. |
9327 | if (i == Last) { |
9328 | |
9329 | // Bail out if the remainder is too complex. |
9330 | if (isa<SCEVAddRecExpr>(R)) { |
9331 | Subscripts.clear(); |
9332 | Sizes.clear(); |
9333 | return; |
9334 | } |
9335 | |
9336 | continue; |
9337 | } |
9338 | |
9339 | // Record the access function for the current subscript. |
9340 | Subscripts.push_back(R); |
9341 | } |
9342 | |
9343 | // Also push in last position the remainder of the last division: it will be |
9344 | // the access function of the innermost dimension. |
9345 | Subscripts.push_back(Res); |
9346 | |
9347 | std::reverse(Subscripts.begin(), Subscripts.end()); |
9348 | |
9349 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Subscripts:\n"; for (const SCEV *S : Subscripts) dbgs() << *S << "\n" ; }; } } while (0) |
9350 | dbgs() << "Subscripts:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Subscripts:\n"; for (const SCEV *S : Subscripts) dbgs() << *S << "\n" ; }; } } while (0) |
9351 | for (const SCEV *S : Subscripts)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Subscripts:\n"; for (const SCEV *S : Subscripts) dbgs() << *S << "\n" ; }; } } while (0) |
9352 | dbgs() << *S << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Subscripts:\n"; for (const SCEV *S : Subscripts) dbgs() << *S << "\n" ; }; } } while (0) |
9353 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "Subscripts:\n"; for (const SCEV *S : Subscripts) dbgs() << *S << "\n" ; }; } } while (0); |
9354 | } |
9355 | |
9356 | /// Splits the SCEV into two vectors of SCEVs representing the subscripts and |
9357 | /// sizes of an array access. Returns the remainder of the delinearization that |
9358 | /// is the offset start of the array. The SCEV->delinearize algorithm computes |
9359 | /// the multiples of SCEV coefficients: that is a pattern matching of sub |
9360 | /// expressions in the stride and base of a SCEV corresponding to the |
9361 | /// computation of a GCD (greatest common divisor) of base and stride. When |
9362 | /// SCEV->delinearize fails, it returns the SCEV unchanged. |
9363 | /// |
9364 | /// For example: when analyzing the memory access A[i][j][k] in this loop nest |
9365 | /// |
9366 | /// void foo(long n, long m, long o, double A[n][m][o]) { |
9367 | /// |
9368 | /// for (long i = 0; i < n; i++) |
9369 | /// for (long j = 0; j < m; j++) |
9370 | /// for (long k = 0; k < o; k++) |
9371 | /// A[i][j][k] = 1.0; |
9372 | /// } |
9373 | /// |
9374 | /// the delinearization input is the following AddRec SCEV: |
9375 | /// |
9376 | /// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> |
9377 | /// |
9378 | /// From this SCEV, we are able to say that the base offset of the access is %A |
9379 | /// because it appears as an offset that does not divide any of the strides in |
9380 | /// the loops: |
9381 | /// |
9382 | /// CHECK: Base offset: %A |
9383 | /// |
9384 | /// and then SCEV->delinearize determines the size of some of the dimensions of |
9385 | /// the array as these are the multiples by which the strides are happening: |
9386 | /// |
9387 | /// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. |
9388 | /// |
9389 | /// Note that the outermost dimension remains of UnknownSize because there are |
9390 | /// no strides that would help identifying the size of the last dimension: when |
9391 | /// the array has been statically allocated, one could compute the size of that |
9392 | /// dimension by dividing the overall size of the array by the size of the known |
9393 | /// dimensions: %m * %o * 8. |
9394 | /// |
9395 | /// Finally delinearize provides the access functions for the array reference |
9396 | /// that does correspond to A[i][j][k] of the above C testcase: |
9397 | /// |
9398 | /// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] |
9399 | /// |
9400 | /// The testcases are checking the output of a function pass: |
9401 | /// DelinearizationPass that walks through all loads and stores of a function |
9402 | /// asking for the SCEV of the memory access with respect to all enclosing |
9403 | /// loops, calling SCEV->delinearize on that and printing the results. |
9404 | |
9405 | void ScalarEvolution::delinearize(const SCEV *Expr, |
9406 | SmallVectorImpl<const SCEV *> &Subscripts, |
9407 | SmallVectorImpl<const SCEV *> &Sizes, |
9408 | const SCEV *ElementSize) { |
9409 | // First step: collect parametric terms. |
9410 | SmallVector<const SCEV *, 4> Terms; |
9411 | collectParametricTerms(Expr, Terms); |
9412 | |
9413 | if (Terms.empty()) |
9414 | return; |
9415 | |
9416 | // Second step: find subscript sizes. |
9417 | findArrayDimensions(Terms, Sizes, ElementSize); |
9418 | |
9419 | if (Sizes.empty()) |
9420 | return; |
9421 | |
9422 | // Third step: compute the access functions for each subscript. |
9423 | computeAccessFunctions(Expr, Subscripts, Sizes); |
9424 | |
9425 | if (Subscripts.empty()) |
9426 | return; |
9427 | |
9428 | DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9429 | dbgs() << "succeeded to delinearize " << *Expr << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9430 | dbgs() << "ArrayDecl[UnknownSize]";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9431 | for (const SCEV *S : Sizes)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9432 | dbgs() << "[" << *S << "]";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9433 | |
9434 | dbgs() << "\nArrayRef";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9435 | for (const SCEV *S : Subscripts)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9436 | dbgs() << "[" << *S << "]";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9437 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0) |
9438 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("scalar-evolution")) { { dbgs() << "succeeded to delinearize " << *Expr << "\n"; dbgs() << "ArrayDecl[UnknownSize]" ; for (const SCEV *S : Sizes) dbgs() << "[" << *S << "]"; dbgs() << "\nArrayRef"; for (const SCEV * S : Subscripts) dbgs() << "[" << *S << "]"; dbgs() << "\n"; }; } } while (0); |
9439 | } |
9440 | |
9441 | //===----------------------------------------------------------------------===// |
9442 | // SCEVCallbackVH Class Implementation |
9443 | //===----------------------------------------------------------------------===// |
9444 | |
9445 | void ScalarEvolution::SCEVCallbackVH::deleted() { |
9446 | assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!")((SE && "SCEVCallbackVH called with a null ScalarEvolution!" ) ? static_cast<void> (0) : __assert_fail ("SE && \"SCEVCallbackVH called with a null ScalarEvolution!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9446, __PRETTY_FUNCTION__)); |
9447 | if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) |
9448 | SE->ConstantEvolutionLoopExitValue.erase(PN); |
9449 | SE->eraseValueFromMap(getValPtr()); |
9450 | // this now dangles! |
9451 | } |
9452 | |
9453 | void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { |
9454 | assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!")((SE && "SCEVCallbackVH called with a null ScalarEvolution!" ) ? static_cast<void> (0) : __assert_fail ("SE && \"SCEVCallbackVH called with a null ScalarEvolution!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9454, __PRETTY_FUNCTION__)); |
9455 | |
9456 | // Forget all the expressions associated with users of the old value, |
9457 | // so that future queries will recompute the expressions using the new |
9458 | // value. |
9459 | Value *Old = getValPtr(); |
9460 | SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end()); |
9461 | SmallPtrSet<User *, 8> Visited; |
9462 | while (!Worklist.empty()) { |
9463 | User *U = Worklist.pop_back_val(); |
9464 | // Deleting the Old value will cause this to dangle. Postpone |
9465 | // that until everything else is done. |
9466 | if (U == Old) |
9467 | continue; |
9468 | if (!Visited.insert(U).second) |
9469 | continue; |
9470 | if (PHINode *PN = dyn_cast<PHINode>(U)) |
9471 | SE->ConstantEvolutionLoopExitValue.erase(PN); |
9472 | SE->eraseValueFromMap(U); |
9473 | Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); |
9474 | } |
9475 | // Delete the Old value. |
9476 | if (PHINode *PN = dyn_cast<PHINode>(Old)) |
9477 | SE->ConstantEvolutionLoopExitValue.erase(PN); |
9478 | SE->eraseValueFromMap(Old); |
9479 | // this now dangles! |
9480 | } |
9481 | |
9482 | ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) |
9483 | : CallbackVH(V), SE(se) {} |
9484 | |
9485 | //===----------------------------------------------------------------------===// |
9486 | // ScalarEvolution Class Implementation |
9487 | //===----------------------------------------------------------------------===// |
9488 | |
9489 | ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, |
9490 | AssumptionCache &AC, DominatorTree &DT, |
9491 | LoopInfo &LI) |
9492 | : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), |
9493 | CouldNotCompute(new SCEVCouldNotCompute()), |
9494 | WalkingBEDominatingConds(false), ProvingSplitPredicate(false), |
9495 | ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), |
9496 | FirstUnknown(nullptr) { |
9497 | |
9498 | // To use guards for proving predicates, we need to scan every instruction in |
9499 | // relevant basic blocks, and not just terminators. Doing this is a waste of |
9500 | // time if the IR does not actually contain any calls to |
9501 | // @llvm.experimental.guard, so do a quick check and remember this beforehand. |
9502 | // |
9503 | // This pessimizes the case where a pass that preserves ScalarEvolution wants |
9504 | // to _add_ guards to the module when there weren't any before, and wants |
9505 | // ScalarEvolution to optimize based on those guards. For now we prefer to be |
9506 | // efficient in lieu of being smart in that rather obscure case. |
9507 | |
9508 | auto *GuardDecl = F.getParent()->getFunction( |
9509 | Intrinsic::getName(Intrinsic::experimental_guard)); |
9510 | HasGuards = GuardDecl && !GuardDecl->use_empty(); |
9511 | } |
9512 | |
9513 | ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) |
9514 | : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), |
9515 | LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), |
9516 | ValueExprMap(std::move(Arg.ValueExprMap)), |
9517 | WalkingBEDominatingConds(false), ProvingSplitPredicate(false), |
9518 | BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), |
9519 | PredicatedBackedgeTakenCounts( |
9520 | std::move(Arg.PredicatedBackedgeTakenCounts)), |
9521 | ConstantEvolutionLoopExitValue( |
9522 | std::move(Arg.ConstantEvolutionLoopExitValue)), |
9523 | ValuesAtScopes(std::move(Arg.ValuesAtScopes)), |
9524 | LoopDispositions(std::move(Arg.LoopDispositions)), |
9525 | BlockDispositions(std::move(Arg.BlockDispositions)), |
9526 | UnsignedRanges(std::move(Arg.UnsignedRanges)), |
9527 | SignedRanges(std::move(Arg.SignedRanges)), |
9528 | UniqueSCEVs(std::move(Arg.UniqueSCEVs)), |
9529 | UniquePreds(std::move(Arg.UniquePreds)), |
9530 | SCEVAllocator(std::move(Arg.SCEVAllocator)), |
9531 | FirstUnknown(Arg.FirstUnknown) { |
9532 | Arg.FirstUnknown = nullptr; |
9533 | } |
9534 | |
9535 | ScalarEvolution::~ScalarEvolution() { |
9536 | // Iterate through all the SCEVUnknown instances and call their |
9537 | // destructors, so that they release their references to their values. |
9538 | for (SCEVUnknown *U = FirstUnknown; U;) { |
9539 | SCEVUnknown *Tmp = U; |
9540 | U = U->Next; |
9541 | Tmp->~SCEVUnknown(); |
9542 | } |
9543 | FirstUnknown = nullptr; |
9544 | |
9545 | ExprValueMap.clear(); |
9546 | ValueExprMap.clear(); |
9547 | HasRecMap.clear(); |
9548 | |
9549 | // Free any extra memory created for ExitNotTakenInfo in the unlikely event |
9550 | // that a loop had multiple computable exits. |
9551 | for (auto &BTCI : BackedgeTakenCounts) |
9552 | BTCI.second.clear(); |
9553 | for (auto &BTCI : PredicatedBackedgeTakenCounts) |
9554 | BTCI.second.clear(); |
9555 | |
9556 | assert(PendingLoopPredicates.empty() && "isImpliedCond garbage")((PendingLoopPredicates.empty() && "isImpliedCond garbage" ) ? static_cast<void> (0) : __assert_fail ("PendingLoopPredicates.empty() && \"isImpliedCond garbage\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9556, __PRETTY_FUNCTION__)); |
9557 | assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!")((!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!" ) ? static_cast<void> (0) : __assert_fail ("!WalkingBEDominatingConds && \"isLoopBackedgeGuardedByCond garbage!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9557, __PRETTY_FUNCTION__)); |
9558 | assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!")((!ProvingSplitPredicate && "ProvingSplitPredicate garbage!" ) ? static_cast<void> (0) : __assert_fail ("!ProvingSplitPredicate && \"ProvingSplitPredicate garbage!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9558, __PRETTY_FUNCTION__)); |
9559 | } |
9560 | |
9561 | bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { |
9562 | return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L)); |
9563 | } |
9564 | |
9565 | static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, |
9566 | const Loop *L) { |
9567 | // Print all inner loops first |
9568 | for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) |
9569 | PrintLoopInfo(OS, SE, *I); |
9570 | |
9571 | OS << "Loop "; |
9572 | L->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
9573 | OS << ": "; |
9574 | |
9575 | SmallVector<BasicBlock *, 8> ExitBlocks; |
9576 | L->getExitBlocks(ExitBlocks); |
9577 | if (ExitBlocks.size() != 1) |
9578 | OS << "<multiple exits> "; |
9579 | |
9580 | if (SE->hasLoopInvariantBackedgeTakenCount(L)) { |
9581 | OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); |
9582 | } else { |
9583 | OS << "Unpredictable backedge-taken count. "; |
9584 | } |
9585 | |
9586 | OS << "\n" |
9587 | "Loop "; |
9588 | L->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
9589 | OS << ": "; |
9590 | |
9591 | if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { |
9592 | OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); |
9593 | } else { |
9594 | OS << "Unpredictable max backedge-taken count. "; |
9595 | } |
9596 | |
9597 | OS << "\n" |
9598 | "Loop "; |
9599 | L->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
9600 | OS << ": "; |
9601 | |
9602 | SCEVUnionPredicate Pred; |
9603 | auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); |
9604 | if (!isa<SCEVCouldNotCompute>(PBT)) { |
9605 | OS << "Predicated backedge-taken count is " << *PBT << "\n"; |
9606 | OS << " Predicates:\n"; |
9607 | Pred.print(OS, 4); |
9608 | } else { |
9609 | OS << "Unpredictable predicated backedge-taken count. "; |
9610 | } |
9611 | OS << "\n"; |
9612 | } |
9613 | |
9614 | static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) { |
9615 | switch (LD) { |
9616 | case ScalarEvolution::LoopVariant: |
9617 | return "Variant"; |
9618 | case ScalarEvolution::LoopInvariant: |
9619 | return "Invariant"; |
9620 | case ScalarEvolution::LoopComputable: |
9621 | return "Computable"; |
9622 | } |
9623 | llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!")::llvm::llvm_unreachable_internal("Unknown ScalarEvolution::LoopDisposition kind!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9623); |
9624 | } |
9625 | |
9626 | void ScalarEvolution::print(raw_ostream &OS) const { |
9627 | // ScalarEvolution's implementation of the print method is to print |
9628 | // out SCEV values of all instructions that are interesting. Doing |
9629 | // this potentially causes it to create new SCEV objects though, |
9630 | // which technically conflicts with the const qualifier. This isn't |
9631 | // observable from outside the class though, so casting away the |
9632 | // const isn't dangerous. |
9633 | ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); |
9634 | |
9635 | OS << "Classifying expressions for: "; |
9636 | F.printAsOperand(OS, /*PrintType=*/false); |
9637 | OS << "\n"; |
9638 | for (Instruction &I : instructions(F)) |
9639 | if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) { |
9640 | OS << I << '\n'; |
9641 | OS << " --> "; |
9642 | const SCEV *SV = SE.getSCEV(&I); |
9643 | SV->print(OS); |
9644 | if (!isa<SCEVCouldNotCompute>(SV)) { |
9645 | OS << " U: "; |
9646 | SE.getUnsignedRange(SV).print(OS); |
9647 | OS << " S: "; |
9648 | SE.getSignedRange(SV).print(OS); |
9649 | } |
9650 | |
9651 | const Loop *L = LI.getLoopFor(I.getParent()); |
9652 | |
9653 | const SCEV *AtUse = SE.getSCEVAtScope(SV, L); |
9654 | if (AtUse != SV) { |
9655 | OS << " --> "; |
9656 | AtUse->print(OS); |
9657 | if (!isa<SCEVCouldNotCompute>(AtUse)) { |
9658 | OS << " U: "; |
9659 | SE.getUnsignedRange(AtUse).print(OS); |
9660 | OS << " S: "; |
9661 | SE.getSignedRange(AtUse).print(OS); |
9662 | } |
9663 | } |
9664 | |
9665 | if (L) { |
9666 | OS << "\t\t" "Exits: "; |
9667 | const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); |
9668 | if (!SE.isLoopInvariant(ExitValue, L)) { |
9669 | OS << "<<Unknown>>"; |
9670 | } else { |
9671 | OS << *ExitValue; |
9672 | } |
9673 | |
9674 | bool First = true; |
9675 | for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { |
9676 | if (First) { |
9677 | OS << "\t\t" "LoopDispositions: { "; |
9678 | First = false; |
9679 | } else { |
9680 | OS << ", "; |
9681 | } |
9682 | |
9683 | Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
9684 | OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); |
9685 | } |
9686 | |
9687 | for (auto *InnerL : depth_first(L)) { |
9688 | if (InnerL == L) |
9689 | continue; |
9690 | if (First) { |
9691 | OS << "\t\t" "LoopDispositions: { "; |
9692 | First = false; |
9693 | } else { |
9694 | OS << ", "; |
9695 | } |
9696 | |
9697 | InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
9698 | OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL)); |
9699 | } |
9700 | |
9701 | OS << " }"; |
9702 | } |
9703 | |
9704 | OS << "\n"; |
9705 | } |
9706 | |
9707 | OS << "Determining loop execution counts for: "; |
9708 | F.printAsOperand(OS, /*PrintType=*/false); |
9709 | OS << "\n"; |
9710 | for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I) |
9711 | PrintLoopInfo(OS, &SE, *I); |
9712 | } |
9713 | |
9714 | ScalarEvolution::LoopDisposition |
9715 | ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { |
9716 | auto &Values = LoopDispositions[S]; |
9717 | for (auto &V : Values) { |
9718 | if (V.getPointer() == L) |
9719 | return V.getInt(); |
9720 | } |
9721 | Values.emplace_back(L, LoopVariant); |
9722 | LoopDisposition D = computeLoopDisposition(S, L); |
9723 | auto &Values2 = LoopDispositions[S]; |
9724 | for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { |
9725 | if (V.getPointer() == L) { |
9726 | V.setInt(D); |
9727 | break; |
9728 | } |
9729 | } |
9730 | return D; |
9731 | } |
9732 | |
9733 | ScalarEvolution::LoopDisposition |
9734 | ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { |
9735 | switch (static_cast<SCEVTypes>(S->getSCEVType())) { |
9736 | case scConstant: |
9737 | return LoopInvariant; |
9738 | case scTruncate: |
9739 | case scZeroExtend: |
9740 | case scSignExtend: |
9741 | return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L); |
9742 | case scAddRecExpr: { |
9743 | const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); |
9744 | |
9745 | // If L is the addrec's loop, it's computable. |
9746 | if (AR->getLoop() == L) |
9747 | return LoopComputable; |
9748 | |
9749 | // Add recurrences are never invariant in the function-body (null loop). |
9750 | if (!L) |
9751 | return LoopVariant; |
9752 | |
9753 | // This recurrence is variant w.r.t. L if L contains AR's loop. |
9754 | if (L->contains(AR->getLoop())) |
9755 | return LoopVariant; |
9756 | |
9757 | // This recurrence is invariant w.r.t. L if AR's loop contains L. |
9758 | if (AR->getLoop()->contains(L)) |
9759 | return LoopInvariant; |
9760 | |
9761 | // This recurrence is variant w.r.t. L if any of its operands |
9762 | // are variant. |
9763 | for (auto *Op : AR->operands()) |
9764 | if (!isLoopInvariant(Op, L)) |
9765 | return LoopVariant; |
9766 | |
9767 | // Otherwise it's loop-invariant. |
9768 | return LoopInvariant; |
9769 | } |
9770 | case scAddExpr: |
9771 | case scMulExpr: |
9772 | case scUMaxExpr: |
9773 | case scSMaxExpr: { |
9774 | bool HasVarying = false; |
9775 | for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { |
9776 | LoopDisposition D = getLoopDisposition(Op, L); |
9777 | if (D == LoopVariant) |
9778 | return LoopVariant; |
9779 | if (D == LoopComputable) |
9780 | HasVarying = true; |
9781 | } |
9782 | return HasVarying ? LoopComputable : LoopInvariant; |
9783 | } |
9784 | case scUDivExpr: { |
9785 | const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); |
9786 | LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L); |
9787 | if (LD == LoopVariant) |
9788 | return LoopVariant; |
9789 | LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L); |
9790 | if (RD == LoopVariant) |
9791 | return LoopVariant; |
9792 | return (LD == LoopInvariant && RD == LoopInvariant) ? |
9793 | LoopInvariant : LoopComputable; |
9794 | } |
9795 | case scUnknown: |
9796 | // All non-instruction values are loop invariant. All instructions are loop |
9797 | // invariant if they are not contained in the specified loop. |
9798 | // Instructions are never considered invariant in the function body |
9799 | // (null loop) because they are defined within the "loop". |
9800 | if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) |
9801 | return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; |
9802 | return LoopInvariant; |
9803 | case scCouldNotCompute: |
9804 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!")::llvm::llvm_unreachable_internal("Attempt to use a SCEVCouldNotCompute object!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9804); |
9805 | } |
9806 | llvm_unreachable("Unknown SCEV kind!")::llvm::llvm_unreachable_internal("Unknown SCEV kind!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9806); |
9807 | } |
9808 | |
9809 | bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { |
9810 | return getLoopDisposition(S, L) == LoopInvariant; |
9811 | } |
9812 | |
9813 | bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { |
9814 | return getLoopDisposition(S, L) == LoopComputable; |
9815 | } |
9816 | |
9817 | ScalarEvolution::BlockDisposition |
9818 | ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { |
9819 | auto &Values = BlockDispositions[S]; |
9820 | for (auto &V : Values) { |
9821 | if (V.getPointer() == BB) |
9822 | return V.getInt(); |
9823 | } |
9824 | Values.emplace_back(BB, DoesNotDominateBlock); |
9825 | BlockDisposition D = computeBlockDisposition(S, BB); |
9826 | auto &Values2 = BlockDispositions[S]; |
9827 | for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { |
9828 | if (V.getPointer() == BB) { |
9829 | V.setInt(D); |
9830 | break; |
9831 | } |
9832 | } |
9833 | return D; |
9834 | } |
9835 | |
9836 | ScalarEvolution::BlockDisposition |
9837 | ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { |
9838 | switch (static_cast<SCEVTypes>(S->getSCEVType())) { |
9839 | case scConstant: |
9840 | return ProperlyDominatesBlock; |
9841 | case scTruncate: |
9842 | case scZeroExtend: |
9843 | case scSignExtend: |
9844 | return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB); |
9845 | case scAddRecExpr: { |
9846 | // This uses a "dominates" query instead of "properly dominates" query |
9847 | // to test for proper dominance too, because the instruction which |
9848 | // produces the addrec's value is a PHI, and a PHI effectively properly |
9849 | // dominates its entire containing block. |
9850 | const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); |
9851 | if (!DT.dominates(AR->getLoop()->getHeader(), BB)) |
9852 | return DoesNotDominateBlock; |
9853 | } |
9854 | // FALL THROUGH into SCEVNAryExpr handling. |
9855 | case scAddExpr: |
9856 | case scMulExpr: |
9857 | case scUMaxExpr: |
9858 | case scSMaxExpr: { |
9859 | const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); |
9860 | bool Proper = true; |
9861 | for (const SCEV *NAryOp : NAry->operands()) { |
9862 | BlockDisposition D = getBlockDisposition(NAryOp, BB); |
9863 | if (D == DoesNotDominateBlock) |
9864 | return DoesNotDominateBlock; |
9865 | if (D == DominatesBlock) |
9866 | Proper = false; |
9867 | } |
9868 | return Proper ? ProperlyDominatesBlock : DominatesBlock; |
9869 | } |
9870 | case scUDivExpr: { |
9871 | const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); |
9872 | const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); |
9873 | BlockDisposition LD = getBlockDisposition(LHS, BB); |
9874 | if (LD == DoesNotDominateBlock) |
9875 | return DoesNotDominateBlock; |
9876 | BlockDisposition RD = getBlockDisposition(RHS, BB); |
9877 | if (RD == DoesNotDominateBlock) |
9878 | return DoesNotDominateBlock; |
9879 | return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ? |
9880 | ProperlyDominatesBlock : DominatesBlock; |
9881 | } |
9882 | case scUnknown: |
9883 | if (Instruction *I = |
9884 | dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) { |
9885 | if (I->getParent() == BB) |
9886 | return DominatesBlock; |
9887 | if (DT.properlyDominates(I->getParent(), BB)) |
9888 | return ProperlyDominatesBlock; |
9889 | return DoesNotDominateBlock; |
9890 | } |
9891 | return ProperlyDominatesBlock; |
9892 | case scCouldNotCompute: |
9893 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!")::llvm::llvm_unreachable_internal("Attempt to use a SCEVCouldNotCompute object!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9893); |
9894 | } |
9895 | llvm_unreachable("Unknown SCEV kind!")::llvm::llvm_unreachable_internal("Unknown SCEV kind!", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 9895); |
9896 | } |
9897 | |
9898 | bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { |
9899 | return getBlockDisposition(S, BB) >= DominatesBlock; |
9900 | } |
9901 | |
9902 | bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { |
9903 | return getBlockDisposition(S, BB) == ProperlyDominatesBlock; |
9904 | } |
9905 | |
9906 | bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { |
9907 | // Search for a SCEV expression node within an expression tree. |
9908 | // Implements SCEVTraversal::Visitor. |
9909 | struct SCEVSearch { |
9910 | const SCEV *Node; |
9911 | bool IsFound; |
9912 | |
9913 | SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} |
9914 | |
9915 | bool follow(const SCEV *S) { |
9916 | IsFound |= (S == Node); |
9917 | return !IsFound; |
9918 | } |
9919 | bool isDone() const { return IsFound; } |
9920 | }; |
9921 | |
9922 | SCEVSearch Search(Op); |
9923 | visitAll(S, Search); |
9924 | return Search.IsFound; |
9925 | } |
9926 | |
9927 | void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { |
9928 | ValuesAtScopes.erase(S); |
9929 | LoopDispositions.erase(S); |
9930 | BlockDispositions.erase(S); |
9931 | UnsignedRanges.erase(S); |
9932 | SignedRanges.erase(S); |
9933 | ExprValueMap.erase(S); |
9934 | HasRecMap.erase(S); |
9935 | |
9936 | auto RemoveSCEVFromBackedgeMap = |
9937 | [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) { |
9938 | for (auto I = Map.begin(), E = Map.end(); I != E;) { |
9939 | BackedgeTakenInfo &BEInfo = I->second; |
9940 | if (BEInfo.hasOperand(S, this)) { |
9941 | BEInfo.clear(); |
9942 | Map.erase(I++); |
9943 | } else |
9944 | ++I; |
9945 | } |
9946 | }; |
9947 | |
9948 | RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); |
9949 | RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); |
9950 | } |
9951 | |
9952 | typedef DenseMap<const Loop *, std::string> VerifyMap; |
9953 | |
9954 | /// replaceSubString - Replaces all occurrences of From in Str with To. |
9955 | static void replaceSubString(std::string &Str, StringRef From, StringRef To) { |
9956 | size_t Pos = 0; |
9957 | while ((Pos = Str.find(From, Pos)) != std::string::npos) { |
9958 | Str.replace(Pos, From.size(), To.data(), To.size()); |
9959 | Pos += To.size(); |
9960 | } |
9961 | } |
9962 | |
9963 | /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. |
9964 | static void |
9965 | getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { |
9966 | std::string &S = Map[L]; |
9967 | if (S.empty()) { |
9968 | raw_string_ostream OS(S); |
9969 | SE.getBackedgeTakenCount(L)->print(OS); |
9970 | |
9971 | // false and 0 are semantically equivalent. This can happen in dead loops. |
9972 | replaceSubString(OS.str(), "false", "0"); |
9973 | // Remove wrap flags, their use in SCEV is highly fragile. |
9974 | // FIXME: Remove this when SCEV gets smarter about them. |
9975 | replaceSubString(OS.str(), "<nw>", ""); |
9976 | replaceSubString(OS.str(), "<nsw>", ""); |
9977 | replaceSubString(OS.str(), "<nuw>", ""); |
9978 | } |
9979 | |
9980 | for (auto *R : reverse(*L)) |
9981 | getLoopBackedgeTakenCounts(R, Map, SE); // recurse. |
9982 | } |
9983 | |
9984 | void ScalarEvolution::verify() const { |
9985 | ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); |
9986 | |
9987 | // Gather stringified backedge taken counts for all loops using SCEV's caches. |
9988 | // FIXME: It would be much better to store actual values instead of strings, |
9989 | // but SCEV pointers will change if we drop the caches. |
9990 | VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; |
9991 | for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) |
9992 | getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); |
9993 | |
9994 | // Gather stringified backedge taken counts for all loops using a fresh |
9995 | // ScalarEvolution object. |
9996 | ScalarEvolution SE2(F, TLI, AC, DT, LI); |
9997 | for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) |
9998 | getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2); |
9999 | |
10000 | // Now compare whether they're the same with and without caches. This allows |
10001 | // verifying that no pass changed the cache. |
10002 | assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&((BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && "New loops suddenly appeared!") ? static_cast<void> (0 ) : __assert_fail ("BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && \"New loops suddenly appeared!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 10003, __PRETTY_FUNCTION__)) |
10003 | "New loops suddenly appeared!")((BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && "New loops suddenly appeared!") ? static_cast<void> (0 ) : __assert_fail ("BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && \"New loops suddenly appeared!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 10003, __PRETTY_FUNCTION__)); |
10004 | |
10005 | for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), |
10006 | OldE = BackedgeDumpsOld.end(), |
10007 | NewI = BackedgeDumpsNew.begin(); |
10008 | OldI != OldE; ++OldI, ++NewI) { |
10009 | assert(OldI->first == NewI->first && "Loop order changed!")((OldI->first == NewI->first && "Loop order changed!" ) ? static_cast<void> (0) : __assert_fail ("OldI->first == NewI->first && \"Loop order changed!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 10009, __PRETTY_FUNCTION__)); |
10010 | |
10011 | // Compare the stringified SCEVs. We don't care if undef backedgetaken count |
10012 | // changes. |
10013 | // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This |
10014 | // means that a pass is buggy or SCEV has to learn a new pattern but is |
10015 | // usually not harmful. |
10016 | if (OldI->second != NewI->second && |
10017 | OldI->second.find("undef") == std::string::npos && |
10018 | NewI->second.find("undef") == std::string::npos && |
10019 | OldI->second != "***COULDNOTCOMPUTE***" && |
10020 | NewI->second != "***COULDNOTCOMPUTE***") { |
10021 | dbgs() << "SCEVValidator: SCEV for loop '" |
10022 | << OldI->first->getHeader()->getName() |
10023 | << "' changed from '" << OldI->second |
10024 | << "' to '" << NewI->second << "'!\n"; |
10025 | std::abort(); |
10026 | } |
10027 | } |
10028 | |
10029 | // TODO: Verify more things. |
10030 | } |
10031 | |
10032 | char ScalarEvolutionAnalysis::PassID; |
10033 | |
10034 | ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, |
10035 | AnalysisManager<Function> &AM) { |
10036 | return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F), |
10037 | AM.getResult<AssumptionAnalysis>(F), |
10038 | AM.getResult<DominatorTreeAnalysis>(F), |
10039 | AM.getResult<LoopAnalysis>(F)); |
10040 | } |
10041 | |
10042 | PreservedAnalyses |
10043 | ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> &AM) { |
10044 | AM.getResult<ScalarEvolutionAnalysis>(F).print(OS); |
10045 | return PreservedAnalyses::all(); |
10046 | } |
10047 | |
10048 | INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",static void* initializeScalarEvolutionWrapperPassPassOnce(PassRegistry &Registry) { |
10049 | "Scalar Evolution Analysis", false, true)static void* initializeScalarEvolutionWrapperPassPassOnce(PassRegistry &Registry) { |
10050 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); |
10051 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); |
10052 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); |
10053 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); |
10054 | INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",PassInfo *PI = new PassInfo("Scalar Evolution Analysis", "scalar-evolution" , & ScalarEvolutionWrapperPass ::ID, PassInfo::NormalCtor_t (callDefaultCtor< ScalarEvolutionWrapperPass >), false, true); Registry.registerPass(*PI, true); return PI; } void llvm ::initializeScalarEvolutionWrapperPassPass(PassRegistry & Registry) { static volatile sys::cas_flag initialized = 0; sys ::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); if (old_val == 0) { initializeScalarEvolutionWrapperPassPassOnce (Registry); sys::MemoryFence(); ; ; initialized = 2; ; } else { sys::cas_flag tmp = initialized; sys::MemoryFence(); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } ; } |
10055 | "Scalar Evolution Analysis", false, true)PassInfo *PI = new PassInfo("Scalar Evolution Analysis", "scalar-evolution" , & ScalarEvolutionWrapperPass ::ID, PassInfo::NormalCtor_t (callDefaultCtor< ScalarEvolutionWrapperPass >), false, true); Registry.registerPass(*PI, true); return PI; } void llvm ::initializeScalarEvolutionWrapperPassPass(PassRegistry & Registry) { static volatile sys::cas_flag initialized = 0; sys ::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); if (old_val == 0) { initializeScalarEvolutionWrapperPassPassOnce (Registry); sys::MemoryFence(); ; ; initialized = 2; ; } else { sys::cas_flag tmp = initialized; sys::MemoryFence(); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } ; } |
10056 | char ScalarEvolutionWrapperPass::ID = 0; |
10057 | |
10058 | ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { |
10059 | initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); |
10060 | } |
10061 | |
10062 | bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { |
10063 | SE.reset(new ScalarEvolution( |
10064 | F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), |
10065 | getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), |
10066 | getAnalysis<DominatorTreeWrapperPass>().getDomTree(), |
10067 | getAnalysis<LoopInfoWrapperPass>().getLoopInfo())); |
10068 | return false; |
10069 | } |
10070 | |
10071 | void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } |
10072 | |
10073 | void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { |
10074 | SE->print(OS); |
10075 | } |
10076 | |
10077 | void ScalarEvolutionWrapperPass::verifyAnalysis() const { |
10078 | if (!VerifySCEV) |
10079 | return; |
10080 | |
10081 | SE->verify(); |
10082 | } |
10083 | |
10084 | void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
10085 | AU.setPreservesAll(); |
10086 | AU.addRequiredTransitive<AssumptionCacheTracker>(); |
10087 | AU.addRequiredTransitive<LoopInfoWrapperPass>(); |
10088 | AU.addRequiredTransitive<DominatorTreeWrapperPass>(); |
10089 | AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); |
10090 | } |
10091 | |
10092 | const SCEVPredicate * |
10093 | ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS, |
10094 | const SCEVConstant *RHS) { |
10095 | FoldingSetNodeID ID; |
10096 | // Unique this node based on the arguments |
10097 | ID.AddInteger(SCEVPredicate::P_Equal); |
10098 | ID.AddPointer(LHS); |
10099 | ID.AddPointer(RHS); |
10100 | void *IP = nullptr; |
10101 | if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) |
10102 | return S; |
10103 | SCEVEqualPredicate *Eq = new (SCEVAllocator) |
10104 | SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); |
10105 | UniquePreds.InsertNode(Eq, IP); |
10106 | return Eq; |
10107 | } |
10108 | |
10109 | const SCEVPredicate *ScalarEvolution::getWrapPredicate( |
10110 | const SCEVAddRecExpr *AR, |
10111 | SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { |
10112 | FoldingSetNodeID ID; |
10113 | // Unique this node based on the arguments |
10114 | ID.AddInteger(SCEVPredicate::P_Wrap); |
10115 | ID.AddPointer(AR); |
10116 | ID.AddInteger(AddedFlags); |
10117 | void *IP = nullptr; |
10118 | if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) |
10119 | return S; |
10120 | auto *OF = new (SCEVAllocator) |
10121 | SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags); |
10122 | UniquePreds.InsertNode(OF, IP); |
10123 | return OF; |
10124 | } |
10125 | |
10126 | namespace { |
10127 | |
10128 | class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> { |
10129 | public: |
10130 | // Rewrites \p S in the context of a loop L and the predicate A. |
10131 | // If Assume is true, rewrite is free to add further predicates to A |
10132 | // such that the result will be an AddRecExpr. |
10133 | static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
10134 | SCEVUnionPredicate &A, bool Assume) { |
10135 | SCEVPredicateRewriter Rewriter(L, SE, A, Assume); |
10136 | return Rewriter.visit(S); |
10137 | } |
10138 | |
10139 | SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, |
10140 | SCEVUnionPredicate &P, bool Assume) |
10141 | : SCEVRewriteVisitor(SE), P(P), L(L), Assume(Assume) {} |
10142 | |
10143 | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
10144 | auto ExprPreds = P.getPredicatesForExpr(Expr); |
10145 | for (auto *Pred : ExprPreds) |
10146 | if (const auto *IPred = dyn_cast<const SCEVEqualPredicate>(Pred)) |
10147 | if (IPred->getLHS() == Expr) |
10148 | return IPred->getRHS(); |
10149 | |
10150 | return Expr; |
10151 | } |
10152 | |
10153 | const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { |
10154 | const SCEV *Operand = visit(Expr->getOperand()); |
10155 | const SCEVAddRecExpr *AR = dyn_cast<const SCEVAddRecExpr>(Operand); |
10156 | if (AR && AR->getLoop() == L && AR->isAffine()) { |
10157 | // This couldn't be folded because the operand didn't have the nuw |
10158 | // flag. Add the nusw flag as an assumption that we could make. |
10159 | const SCEV *Step = AR->getStepRecurrence(SE); |
10160 | Type *Ty = Expr->getType(); |
10161 | if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW)) |
10162 | return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty), |
10163 | SE.getSignExtendExpr(Step, Ty), L, |
10164 | AR->getNoWrapFlags()); |
10165 | } |
10166 | return SE.getZeroExtendExpr(Operand, Expr->getType()); |
10167 | } |
10168 | |
10169 | const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { |
10170 | const SCEV *Operand = visit(Expr->getOperand()); |
10171 | const SCEVAddRecExpr *AR = dyn_cast<const SCEVAddRecExpr>(Operand); |
10172 | if (AR && AR->getLoop() == L && AR->isAffine()) { |
10173 | // This couldn't be folded because the operand didn't have the nsw |
10174 | // flag. Add the nssw flag as an assumption that we could make. |
10175 | const SCEV *Step = AR->getStepRecurrence(SE); |
10176 | Type *Ty = Expr->getType(); |
10177 | if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW)) |
10178 | return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty), |
10179 | SE.getSignExtendExpr(Step, Ty), L, |
10180 | AR->getNoWrapFlags()); |
10181 | } |
10182 | return SE.getSignExtendExpr(Operand, Expr->getType()); |
10183 | } |
10184 | |
10185 | private: |
10186 | bool addOverflowAssumption(const SCEVAddRecExpr *AR, |
10187 | SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { |
10188 | auto *A = SE.getWrapPredicate(AR, AddedFlags); |
10189 | if (!Assume) { |
10190 | // Check if we've already made this assumption. |
10191 | if (P.implies(A)) |
10192 | return true; |
10193 | return false; |
10194 | } |
10195 | P.add(A); |
10196 | return true; |
10197 | } |
10198 | |
10199 | SCEVUnionPredicate &P; |
10200 | const Loop *L; |
10201 | bool Assume; |
10202 | }; |
10203 | } // end anonymous namespace |
10204 | |
10205 | const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, |
10206 | SCEVUnionPredicate &Preds) { |
10207 | return SCEVPredicateRewriter::rewrite(S, L, *this, Preds, false); |
10208 | } |
10209 | |
10210 | const SCEVAddRecExpr * |
10211 | ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, |
10212 | SCEVUnionPredicate &Preds) { |
10213 | SCEVUnionPredicate TransformPreds; |
10214 | S = SCEVPredicateRewriter::rewrite(S, L, *this, TransformPreds, true); |
10215 | auto *AddRec = dyn_cast<SCEVAddRecExpr>(S); |
10216 | |
10217 | if (!AddRec) |
10218 | return nullptr; |
10219 | |
10220 | // Since the transformation was successful, we can now transfer the SCEV |
10221 | // predicates. |
10222 | Preds.add(&TransformPreds); |
10223 | return AddRec; |
10224 | } |
10225 | |
10226 | /// SCEV predicates |
10227 | SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, |
10228 | SCEVPredicateKind Kind) |
10229 | : FastID(ID), Kind(Kind) {} |
10230 | |
10231 | SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, |
10232 | const SCEVUnknown *LHS, |
10233 | const SCEVConstant *RHS) |
10234 | : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {} |
10235 | |
10236 | bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { |
10237 | const auto *Op = dyn_cast<const SCEVEqualPredicate>(N); |
10238 | |
10239 | if (!Op) |
10240 | return false; |
10241 | |
10242 | return Op->LHS == LHS && Op->RHS == RHS; |
10243 | } |
10244 | |
10245 | bool SCEVEqualPredicate::isAlwaysTrue() const { return false; } |
10246 | |
10247 | const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } |
10248 | |
10249 | void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { |
10250 | OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; |
10251 | } |
10252 | |
10253 | SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID, |
10254 | const SCEVAddRecExpr *AR, |
10255 | IncrementWrapFlags Flags) |
10256 | : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {} |
10257 | |
10258 | const SCEV *SCEVWrapPredicate::getExpr() const { return AR; } |
10259 | |
10260 | bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const { |
10261 | const auto *Op = dyn_cast<SCEVWrapPredicate>(N); |
10262 | |
10263 | return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags; |
10264 | } |
10265 | |
10266 | bool SCEVWrapPredicate::isAlwaysTrue() const { |
10267 | SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags(); |
10268 | IncrementWrapFlags IFlags = Flags; |
10269 | |
10270 | if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags) |
10271 | IFlags = clearFlags(IFlags, IncrementNSSW); |
10272 | |
10273 | return IFlags == IncrementAnyWrap; |
10274 | } |
10275 | |
10276 | void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const { |
10277 | OS.indent(Depth) << *getExpr() << " Added Flags: "; |
10278 | if (SCEVWrapPredicate::IncrementNUSW & getFlags()) |
10279 | OS << "<nusw>"; |
10280 | if (SCEVWrapPredicate::IncrementNSSW & getFlags()) |
10281 | OS << "<nssw>"; |
10282 | OS << "\n"; |
10283 | } |
10284 | |
10285 | SCEVWrapPredicate::IncrementWrapFlags |
10286 | SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, |
10287 | ScalarEvolution &SE) { |
10288 | IncrementWrapFlags ImpliedFlags = IncrementAnyWrap; |
10289 | SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags(); |
10290 | |
10291 | // We can safely transfer the NSW flag as NSSW. |
10292 | if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags) |
10293 | ImpliedFlags = IncrementNSSW; |
10294 | |
10295 | if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) { |
10296 | // If the increment is positive, the SCEV NUW flag will also imply the |
10297 | // WrapPredicate NUSW flag. |
10298 | if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) |
10299 | if (Step->getValue()->getValue().isNonNegative()) |
10300 | ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW); |
10301 | } |
10302 | |
10303 | return ImpliedFlags; |
10304 | } |
10305 | |
10306 | /// Union predicates don't get cached so create a dummy set ID for it. |
10307 | SCEVUnionPredicate::SCEVUnionPredicate() |
10308 | : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} |
10309 | |
10310 | bool SCEVUnionPredicate::isAlwaysTrue() const { |
10311 | return all_of(Preds, |
10312 | [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); |
10313 | } |
10314 | |
10315 | ArrayRef<const SCEVPredicate *> |
10316 | SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { |
10317 | auto I = SCEVToPreds.find(Expr); |
10318 | if (I == SCEVToPreds.end()) |
10319 | return ArrayRef<const SCEVPredicate *>(); |
10320 | return I->second; |
10321 | } |
10322 | |
10323 | bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { |
10324 | if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) |
10325 | return all_of(Set->Preds, |
10326 | [this](const SCEVPredicate *I) { return this->implies(I); }); |
10327 | |
10328 | auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); |
10329 | if (ScevPredsIt == SCEVToPreds.end()) |
10330 | return false; |
10331 | auto &SCEVPreds = ScevPredsIt->second; |
10332 | |
10333 | return any_of(SCEVPreds, |
10334 | [N](const SCEVPredicate *I) { return I->implies(N); }); |
10335 | } |
10336 | |
10337 | const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } |
10338 | |
10339 | void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { |
10340 | for (auto Pred : Preds) |
10341 | Pred->print(OS, Depth); |
10342 | } |
10343 | |
10344 | void SCEVUnionPredicate::add(const SCEVPredicate *N) { |
10345 | if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) { |
10346 | for (auto Pred : Set->Preds) |
10347 | add(Pred); |
10348 | return; |
10349 | } |
10350 | |
10351 | if (implies(N)) |
10352 | return; |
10353 | |
10354 | const SCEV *Key = N->getExpr(); |
10355 | assert(Key && "Only SCEVUnionPredicate doesn't have an "((Key && "Only SCEVUnionPredicate doesn't have an " " associated expression!" ) ? static_cast<void> (0) : __assert_fail ("Key && \"Only SCEVUnionPredicate doesn't have an \" \" associated expression!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 10356, __PRETTY_FUNCTION__)) |
10356 | " associated expression!")((Key && "Only SCEVUnionPredicate doesn't have an " " associated expression!" ) ? static_cast<void> (0) : __assert_fail ("Key && \"Only SCEVUnionPredicate doesn't have an \" \" associated expression!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Analysis/ScalarEvolution.cpp" , 10356, __PRETTY_FUNCTION__)); |
10357 | |
10358 | SCEVToPreds[Key].push_back(N); |
10359 | Preds.push_back(N); |
10360 | } |
10361 | |
10362 | PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, |
10363 | Loop &L) |
10364 | : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {} |
10365 | |
10366 | const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { |
10367 | const SCEV *Expr = SE.getSCEV(V); |
10368 | RewriteEntry &Entry = RewriteMap[Expr]; |
10369 | |
10370 | // If we already have an entry and the version matches, return it. |
10371 | if (Entry.second && Generation == Entry.first) |
10372 | return Entry.second; |
10373 | |
10374 | // We found an entry but it's stale. Rewrite the stale entry |
10375 | // acording to the current predicate. |
10376 | if (Entry.second) |
10377 | Expr = Entry.second; |
10378 | |
10379 | const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds); |
10380 | Entry = {Generation, NewSCEV}; |
10381 | |
10382 | return NewSCEV; |
10383 | } |
10384 | |
10385 | const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { |
10386 | if (!BackedgeCount) { |
10387 | SCEVUnionPredicate BackedgePred; |
10388 | BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); |
10389 | addPredicate(BackedgePred); |
10390 | } |
10391 | return BackedgeCount; |
10392 | } |
10393 | |
10394 | void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { |
10395 | if (Preds.implies(&Pred)) |
10396 | return; |
10397 | Preds.add(&Pred); |
10398 | updateGeneration(); |
10399 | } |
10400 | |
10401 | const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { |
10402 | return Preds; |
10403 | } |
10404 | |
10405 | void PredicatedScalarEvolution::updateGeneration() { |
10406 | // If the generation number wrapped recompute everything. |
10407 | if (++Generation == 0) { |
10408 | for (auto &II : RewriteMap) { |
10409 | const SCEV *Rewritten = II.second.second; |
10410 | II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)}; |
10411 | } |
10412 | } |
10413 | } |
10414 | |
10415 | void PredicatedScalarEvolution::setNoOverflow( |
10416 | Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { |
10417 | const SCEV *Expr = getSCEV(V); |
10418 | const auto *AR = cast<SCEVAddRecExpr>(Expr); |
10419 | |
10420 | auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE); |
10421 | |
10422 | // Clear the statically implied flags. |
10423 | Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags); |
10424 | addPredicate(*SE.getWrapPredicate(AR, Flags)); |
10425 | |
10426 | auto II = FlagsMap.insert({V, Flags}); |
10427 | if (!II.second) |
10428 | II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second); |
10429 | } |
10430 | |
10431 | bool PredicatedScalarEvolution::hasNoOverflow( |
10432 | Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { |
10433 | const SCEV *Expr = getSCEV(V); |
10434 | const auto *AR = cast<SCEVAddRecExpr>(Expr); |
10435 | |
10436 | Flags = SCEVWrapPredicate::clearFlags( |
10437 | Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE)); |
10438 | |
10439 | auto II = FlagsMap.find(V); |
10440 | |
10441 | if (II != FlagsMap.end()) |
10442 | Flags = SCEVWrapPredicate::clearFlags(Flags, II->second); |
10443 | |
10444 | return Flags == SCEVWrapPredicate::IncrementAnyWrap; |
10445 | } |
10446 | |
10447 | const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { |
10448 | const SCEV *Expr = this->getSCEV(V); |
10449 | auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, Preds); |
10450 | |
10451 | if (!New) |
10452 | return nullptr; |
10453 | |
10454 | updateGeneration(); |
10455 | RewriteMap[SE.getSCEV(V)] = {Generation, New}; |
10456 | return New; |
10457 | } |
10458 | |
10459 | PredicatedScalarEvolution::PredicatedScalarEvolution( |
10460 | const PredicatedScalarEvolution &Init) |
10461 | : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), |
10462 | Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { |
10463 | for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I) |
10464 | FlagsMap.insert(*I); |
10465 | } |
10466 | |
10467 | void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { |
10468 | // For each block. |
10469 | for (auto *BB : L.getBlocks()) |
10470 | for (auto &I : *BB) { |
10471 | if (!SE.isSCEVable(I.getType())) |
10472 | continue; |
10473 | |
10474 | auto *Expr = SE.getSCEV(&I); |
10475 | auto II = RewriteMap.find(Expr); |
10476 | |
10477 | if (II == RewriteMap.end()) |
10478 | continue; |
10479 | |
10480 | // Don't print things that are not interesting. |
10481 | if (II->second.second == Expr) |
10482 | continue; |
10483 | |
10484 | OS.indent(Depth) << "[PSE]" << I << ":\n"; |
10485 | OS.indent(Depth + 2) << *Expr << "\n"; |
10486 | OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; |
10487 | } |
10488 | } |