Bug Summary

File:llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Warning:line 1904, column 31
Called C++ object pointer is uninitialized

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name IndVarSimplify.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Transforms/Scalar -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-07-26-235520-9401-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/None.h"
32#include "llvm/ADT/Optional.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallSet.h"
36#include "llvm/ADT/SmallVector.h"
37#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/iterator_range.h"
39#include "llvm/Analysis/LoopInfo.h"
40#include "llvm/Analysis/LoopPass.h"
41#include "llvm/Analysis/MemorySSA.h"
42#include "llvm/Analysis/MemorySSAUpdater.h"
43#include "llvm/Analysis/ScalarEvolution.h"
44#include "llvm/Analysis/ScalarEvolutionExpressions.h"
45#include "llvm/Analysis/TargetLibraryInfo.h"
46#include "llvm/Analysis/TargetTransformInfo.h"
47#include "llvm/Analysis/ValueTracking.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/ConstantRange.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/InstrTypes.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/Operator.h"
64#include "llvm/IR/PassManager.h"
65#include "llvm/IR/PatternMatch.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/User.h"
69#include "llvm/IR/Value.h"
70#include "llvm/IR/ValueHandle.h"
71#include "llvm/InitializePasses.h"
72#include "llvm/Pass.h"
73#include "llvm/Support/Casting.h"
74#include "llvm/Support/CommandLine.h"
75#include "llvm/Support/Compiler.h"
76#include "llvm/Support/Debug.h"
77#include "llvm/Support/ErrorHandling.h"
78#include "llvm/Support/MathExtras.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Transforms/Scalar.h"
81#include "llvm/Transforms/Scalar/LoopPassManager.h"
82#include "llvm/Transforms/Utils/BasicBlockUtils.h"
83#include "llvm/Transforms/Utils/Local.h"
84#include "llvm/Transforms/Utils/LoopUtils.h"
85#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86#include "llvm/Transforms/Utils/SimplifyIndVar.h"
87#include <cassert>
88#include <cstdint>
89#include <utility>
90
91using namespace llvm;
92
93#define DEBUG_TYPE"indvars" "indvars"
94
95STATISTIC(NumWidened , "Number of indvars widened")static llvm::Statistic NumWidened = {"indvars", "NumWidened",
"Number of indvars widened"}
;
96STATISTIC(NumReplaced , "Number of exit values replaced")static llvm::Statistic NumReplaced = {"indvars", "NumReplaced"
, "Number of exit values replaced"}
;
97STATISTIC(NumLFTR , "Number of loop exit tests replaced")static llvm::Statistic NumLFTR = {"indvars", "NumLFTR", "Number of loop exit tests replaced"
}
;
98STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated")static llvm::Statistic NumElimExt = {"indvars", "NumElimExt",
"Number of IV sign/zero extends eliminated"}
;
99STATISTIC(NumElimIV , "Number of congruent IVs eliminated")static llvm::Statistic NumElimIV = {"indvars", "NumElimIV", "Number of congruent IVs eliminated"
}
;
100
101// Trip count verification can be enabled by default under NDEBUG if we
102// implement a strong expression equivalence checker in SCEV. Until then, we
103// use the verify-indvars flag, which may assert in some cases.
104static cl::opt<bool> VerifyIndvars(
105 "verify-indvars", cl::Hidden,
106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107 "effect in release builds. (Note: this adds additional SCEV "
108 "queries potentially changing the analysis result)"));
109
110static cl::opt<ReplaceExitVal> ReplaceExitValue(
111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value")llvm::cl::OptionEnumValue { "never", int(NeverRepl), "never replace exit value"
}
,
114 clEnumValN(OnlyCheapRepl, "cheap",llvm::cl::OptionEnumValue { "cheap", int(OnlyCheapRepl), "only replace exit value when the cost is cheap"
}
115 "only replace exit value when the cost is cheap")llvm::cl::OptionEnumValue { "cheap", int(OnlyCheapRepl), "only replace exit value when the cost is cheap"
}
,
116 clEnumValN(NoHardUse, "noharduse",llvm::cl::OptionEnumValue { "noharduse", int(NoHardUse), "only replace exit values when loop def likely dead"
}
117 "only replace exit values when loop def likely dead")llvm::cl::OptionEnumValue { "noharduse", int(NoHardUse), "only replace exit values when loop def likely dead"
}
,
118 clEnumValN(AlwaysRepl, "always",llvm::cl::OptionEnumValue { "always", int(AlwaysRepl), "always replace exit value whenever possible"
}
119 "always replace exit value whenever possible")llvm::cl::OptionEnumValue { "always", int(AlwaysRepl), "always replace exit value whenever possible"
}
));
120
121static cl::opt<bool> UsePostIncrementRanges(
122 "indvars-post-increment-ranges", cl::Hidden,
123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124 cl::init(true));
125
126static cl::opt<bool>
127DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128 cl::desc("Disable Linear Function Test Replace optimization"));
129
130static cl::opt<bool>
131LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132 cl::desc("Predicate conditions in read only loops"));
133
134static cl::opt<bool>
135AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136 cl::desc("Allow widening of indvars to eliminate s/zext"));
137
138namespace {
139
140struct RewritePhi;
141
142class IndVarSimplify {
143 LoopInfo *LI;
144 ScalarEvolution *SE;
145 DominatorTree *DT;
146 const DataLayout &DL;
147 TargetLibraryInfo *TLI;
148 const TargetTransformInfo *TTI;
149 std::unique_ptr<MemorySSAUpdater> MSSAU;
150
151 SmallVector<WeakTrackingVH, 16> DeadInsts;
152 bool WidenIndVars;
153
154 bool handleFloatingPointIV(Loop *L, PHINode *PH);
155 bool rewriteNonIntegerIVs(Loop *L);
156
157 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
158 /// Try to eliminate loop exits based on analyzeable exit counts
159 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
160 /// Try to form loop invariant tests for loop exits by changing how many
161 /// iterations of the loop run when that is unobservable.
162 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
163
164 bool rewriteFirstIterationLoopExitValues(Loop *L);
165
166 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
167 const SCEV *ExitCount,
168 PHINode *IndVar, SCEVExpander &Rewriter);
169
170 bool sinkUnusedInvariants(Loop *L);
171
172public:
173 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
174 const DataLayout &DL, TargetLibraryInfo *TLI,
175 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
176 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
177 WidenIndVars(WidenIndVars) {
178 if (MSSA)
179 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
180 }
181
182 bool run(Loop *L);
183};
184
185} // end anonymous namespace
186
187//===----------------------------------------------------------------------===//
188// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
189//===----------------------------------------------------------------------===//
190
191/// Convert APF to an integer, if possible.
192static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
193 bool isExact = false;
194 // See if we can convert this to an int64_t
195 uint64_t UIntVal;
196 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
197 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
198 !isExact)
199 return false;
200 IntVal = UIntVal;
201 return true;
202}
203
204/// If the loop has floating induction variable then insert corresponding
205/// integer induction variable if possible.
206/// For example,
207/// for(double i = 0; i < 10000; ++i)
208/// bar(i)
209/// is converted into
210/// for(int i = 0; i < 10000; ++i)
211/// bar((double)i);
212bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
213 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
214 unsigned BackEdge = IncomingEdge^1;
215
216 // Check incoming value.
217 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
218
219 int64_t InitValue;
220 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
221 return false;
222
223 // Check IV increment. Reject this PN if increment operation is not
224 // an add or increment value can not be represented by an integer.
225 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
226 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
227
228 // If this is not an add of the PHI with a constantfp, or if the constant fp
229 // is not an integer, bail out.
230 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
231 int64_t IncValue;
232 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
233 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
234 return false;
235
236 // Check Incr uses. One user is PN and the other user is an exit condition
237 // used by the conditional terminator.
238 Value::user_iterator IncrUse = Incr->user_begin();
239 Instruction *U1 = cast<Instruction>(*IncrUse++);
240 if (IncrUse == Incr->user_end()) return false;
241 Instruction *U2 = cast<Instruction>(*IncrUse++);
242 if (IncrUse != Incr->user_end()) return false;
243
244 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
245 // only used by a branch, we can't transform it.
246 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
247 if (!Compare)
248 Compare = dyn_cast<FCmpInst>(U2);
249 if (!Compare || !Compare->hasOneUse() ||
250 !isa<BranchInst>(Compare->user_back()))
251 return false;
252
253 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
254
255 // We need to verify that the branch actually controls the iteration count
256 // of the loop. If not, the new IV can overflow and no one will notice.
257 // The branch block must be in the loop and one of the successors must be out
258 // of the loop.
259 assert(TheBr->isConditional() && "Can't use fcmp if not conditional")(static_cast <bool> (TheBr->isConditional() &&
"Can't use fcmp if not conditional") ? void (0) : __assert_fail
("TheBr->isConditional() && \"Can't use fcmp if not conditional\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 259, __extension__ __PRETTY_FUNCTION__))
;
260 if (!L->contains(TheBr->getParent()) ||
261 (L->contains(TheBr->getSuccessor(0)) &&
262 L->contains(TheBr->getSuccessor(1))))
263 return false;
264
265 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
266 // transform it.
267 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
268 int64_t ExitValue;
269 if (ExitValueVal == nullptr ||
270 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
271 return false;
272
273 // Find new predicate for integer comparison.
274 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
275 switch (Compare->getPredicate()) {
276 default: return false; // Unknown comparison.
277 case CmpInst::FCMP_OEQ:
278 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
279 case CmpInst::FCMP_ONE:
280 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
281 case CmpInst::FCMP_OGT:
282 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
283 case CmpInst::FCMP_OGE:
284 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
285 case CmpInst::FCMP_OLT:
286 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
287 case CmpInst::FCMP_OLE:
288 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
289 }
290
291 // We convert the floating point induction variable to a signed i32 value if
292 // we can. This is only safe if the comparison will not overflow in a way
293 // that won't be trapped by the integer equivalent operations. Check for this
294 // now.
295 // TODO: We could use i64 if it is native and the range requires it.
296
297 // The start/stride/exit values must all fit in signed i32.
298 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
299 return false;
300
301 // If not actually striding (add x, 0.0), avoid touching the code.
302 if (IncValue == 0)
303 return false;
304
305 // Positive and negative strides have different safety conditions.
306 if (IncValue > 0) {
307 // If we have a positive stride, we require the init to be less than the
308 // exit value.
309 if (InitValue >= ExitValue)
310 return false;
311
312 uint32_t Range = uint32_t(ExitValue-InitValue);
313 // Check for infinite loop, either:
314 // while (i <= Exit) or until (i > Exit)
315 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
316 if (++Range == 0) return false; // Range overflows.
317 }
318
319 unsigned Leftover = Range % uint32_t(IncValue);
320
321 // If this is an equality comparison, we require that the strided value
322 // exactly land on the exit value, otherwise the IV condition will wrap
323 // around and do things the fp IV wouldn't.
324 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
325 Leftover != 0)
326 return false;
327
328 // If the stride would wrap around the i32 before exiting, we can't
329 // transform the IV.
330 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
331 return false;
332 } else {
333 // If we have a negative stride, we require the init to be greater than the
334 // exit value.
335 if (InitValue <= ExitValue)
336 return false;
337
338 uint32_t Range = uint32_t(InitValue-ExitValue);
339 // Check for infinite loop, either:
340 // while (i >= Exit) or until (i < Exit)
341 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
342 if (++Range == 0) return false; // Range overflows.
343 }
344
345 unsigned Leftover = Range % uint32_t(-IncValue);
346
347 // If this is an equality comparison, we require that the strided value
348 // exactly land on the exit value, otherwise the IV condition will wrap
349 // around and do things the fp IV wouldn't.
350 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
351 Leftover != 0)
352 return false;
353
354 // If the stride would wrap around the i32 before exiting, we can't
355 // transform the IV.
356 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
357 return false;
358 }
359
360 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
361
362 // Insert new integer induction variable.
363 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
364 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
365 PN->getIncomingBlock(IncomingEdge));
366
367 Value *NewAdd =
368 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
369 Incr->getName()+".int", Incr);
370 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
371
372 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
373 ConstantInt::get(Int32Ty, ExitValue),
374 Compare->getName());
375
376 // In the following deletions, PN may become dead and may be deleted.
377 // Use a WeakTrackingVH to observe whether this happens.
378 WeakTrackingVH WeakPH = PN;
379
380 // Delete the old floating point exit comparison. The branch starts using the
381 // new comparison.
382 NewCompare->takeName(Compare);
383 Compare->replaceAllUsesWith(NewCompare);
384 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
385
386 // Delete the old floating point increment.
387 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
388 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
389
390 // If the FP induction variable still has uses, this is because something else
391 // in the loop uses its value. In order to canonicalize the induction
392 // variable, we chose to eliminate the IV and rewrite it in terms of an
393 // int->fp cast.
394 //
395 // We give preference to sitofp over uitofp because it is faster on most
396 // platforms.
397 if (WeakPH) {
398 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
399 &*PN->getParent()->getFirstInsertionPt());
400 PN->replaceAllUsesWith(Conv);
401 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
402 }
403 return true;
404}
405
406bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
407 // First step. Check to see if there are any floating-point recurrences.
408 // If there are, change them into integer recurrences, permitting analysis by
409 // the SCEV routines.
410 BasicBlock *Header = L->getHeader();
411
412 SmallVector<WeakTrackingVH, 8> PHIs;
413 for (PHINode &PN : Header->phis())
414 PHIs.push_back(&PN);
415
416 bool Changed = false;
417 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
418 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
419 Changed |= handleFloatingPointIV(L, PN);
420
421 // If the loop previously had floating-point IV, ScalarEvolution
422 // may not have been able to compute a trip count. Now that we've done some
423 // re-writing, the trip count may be computable.
424 if (Changed)
425 SE->forgetLoop(L);
426 return Changed;
427}
428
429//===---------------------------------------------------------------------===//
430// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
431// they will exit at the first iteration.
432//===---------------------------------------------------------------------===//
433
434/// Check to see if this loop has loop invariant conditions which lead to loop
435/// exits. If so, we know that if the exit path is taken, it is at the first
436/// loop iteration. This lets us predict exit values of PHI nodes that live in
437/// loop header.
438bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
439 // Verify the input to the pass is already in LCSSA form.
440 assert(L->isLCSSAForm(*DT))(static_cast <bool> (L->isLCSSAForm(*DT)) ? void (0)
: __assert_fail ("L->isLCSSAForm(*DT)", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 440, __extension__ __PRETTY_FUNCTION__))
;
441
442 SmallVector<BasicBlock *, 8> ExitBlocks;
443 L->getUniqueExitBlocks(ExitBlocks);
444
445 bool MadeAnyChanges = false;
446 for (auto *ExitBB : ExitBlocks) {
447 // If there are no more PHI nodes in this exit block, then no more
448 // values defined inside the loop are used on this path.
449 for (PHINode &PN : ExitBB->phis()) {
450 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
451 IncomingValIdx != E; ++IncomingValIdx) {
452 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
453
454 // Can we prove that the exit must run on the first iteration if it
455 // runs at all? (i.e. early exits are fine for our purposes, but
456 // traces which lead to this exit being taken on the 2nd iteration
457 // aren't.) Note that this is about whether the exit branch is
458 // executed, not about whether it is taken.
459 if (!L->getLoopLatch() ||
460 !DT->dominates(IncomingBB, L->getLoopLatch()))
461 continue;
462
463 // Get condition that leads to the exit path.
464 auto *TermInst = IncomingBB->getTerminator();
465
466 Value *Cond = nullptr;
467 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
468 // Must be a conditional branch, otherwise the block
469 // should not be in the loop.
470 Cond = BI->getCondition();
471 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
472 Cond = SI->getCondition();
473 else
474 continue;
475
476 if (!L->isLoopInvariant(Cond))
477 continue;
478
479 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
480
481 // Only deal with PHIs in the loop header.
482 if (!ExitVal || ExitVal->getParent() != L->getHeader())
483 continue;
484
485 // If ExitVal is a PHI on the loop header, then we know its
486 // value along this exit because the exit can only be taken
487 // on the first iteration.
488 auto *LoopPreheader = L->getLoopPreheader();
489 assert(LoopPreheader && "Invalid loop")(static_cast <bool> (LoopPreheader && "Invalid loop"
) ? void (0) : __assert_fail ("LoopPreheader && \"Invalid loop\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 489, __extension__ __PRETTY_FUNCTION__))
;
490 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
491 if (PreheaderIdx != -1) {
492 assert(ExitVal->getParent() == L->getHeader() &&(static_cast <bool> (ExitVal->getParent() == L->getHeader
() && "ExitVal must be in loop header") ? void (0) : __assert_fail
("ExitVal->getParent() == L->getHeader() && \"ExitVal must be in loop header\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 493, __extension__ __PRETTY_FUNCTION__))
493 "ExitVal must be in loop header")(static_cast <bool> (ExitVal->getParent() == L->getHeader
() && "ExitVal must be in loop header") ? void (0) : __assert_fail
("ExitVal->getParent() == L->getHeader() && \"ExitVal must be in loop header\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 493, __extension__ __PRETTY_FUNCTION__))
;
494 MadeAnyChanges = true;
495 PN.setIncomingValue(IncomingValIdx,
496 ExitVal->getIncomingValue(PreheaderIdx));
497 }
498 }
499 }
500 }
501 return MadeAnyChanges;
502}
503
504//===----------------------------------------------------------------------===//
505// IV Widening - Extend the width of an IV to cover its widest uses.
506//===----------------------------------------------------------------------===//
507
508/// Update information about the induction variable that is extended by this
509/// sign or zero extend operation. This is used to determine the final width of
510/// the IV before actually widening it.
511static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
512 ScalarEvolution *SE,
513 const TargetTransformInfo *TTI) {
514 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
515 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
516 return;
517
518 Type *Ty = Cast->getType();
519 uint64_t Width = SE->getTypeSizeInBits(Ty);
520 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
521 return;
522
523 // Check that `Cast` actually extends the induction variable (we rely on this
524 // later). This takes care of cases where `Cast` is extending a truncation of
525 // the narrow induction variable, and thus can end up being narrower than the
526 // "narrow" induction variable.
527 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
528 if (NarrowIVWidth >= Width)
529 return;
530
531 // Cast is either an sext or zext up to this point.
532 // We should not widen an indvar if arithmetics on the wider indvar are more
533 // expensive than those on the narrower indvar. We check only the cost of ADD
534 // because at least an ADD is required to increment the induction variable. We
535 // could compute more comprehensively the cost of all instructions on the
536 // induction variable when necessary.
537 if (TTI &&
538 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
539 TTI->getArithmeticInstrCost(Instruction::Add,
540 Cast->getOperand(0)->getType())) {
541 return;
542 }
543
544 if (!WI.WidestNativeType) {
545 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
546 WI.IsSigned = IsSigned;
547 return;
548 }
549
550 // We extend the IV to satisfy the sign of its first user, arbitrarily.
551 if (WI.IsSigned != IsSigned)
552 return;
553
554 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
555 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
556}
557
558//===----------------------------------------------------------------------===//
559// Live IV Reduction - Minimize IVs live across the loop.
560//===----------------------------------------------------------------------===//
561
562//===----------------------------------------------------------------------===//
563// Simplification of IV users based on SCEV evaluation.
564//===----------------------------------------------------------------------===//
565
566namespace {
567
568class IndVarSimplifyVisitor : public IVVisitor {
569 ScalarEvolution *SE;
570 const TargetTransformInfo *TTI;
571 PHINode *IVPhi;
572
573public:
574 WideIVInfo WI;
575
576 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
577 const TargetTransformInfo *TTI,
578 const DominatorTree *DTree)
579 : SE(SCEV), TTI(TTI), IVPhi(IV) {
580 DT = DTree;
581 WI.NarrowIV = IVPhi;
582 }
583
584 // Implement the interface used by simplifyUsersOfIV.
585 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
586};
587
588} // end anonymous namespace
589
590/// Iteratively perform simplification on a worklist of IV users. Each
591/// successive simplification may push more users which may themselves be
592/// candidates for simplification.
593///
594/// Sign/Zero extend elimination is interleaved with IV simplification.
595bool IndVarSimplify::simplifyAndExtend(Loop *L,
596 SCEVExpander &Rewriter,
597 LoopInfo *LI) {
598 SmallVector<WideIVInfo, 8> WideIVs;
599
600 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
601 Intrinsic::getName(Intrinsic::experimental_guard));
602 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
603
604 SmallVector<PHINode*, 8> LoopPhis;
605 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
606 LoopPhis.push_back(cast<PHINode>(I));
607 }
608 // Each round of simplification iterates through the SimplifyIVUsers worklist
609 // for all current phis, then determines whether any IVs can be
610 // widened. Widening adds new phis to LoopPhis, inducing another round of
611 // simplification on the wide IVs.
612 bool Changed = false;
613 while (!LoopPhis.empty()) {
614 // Evaluate as many IV expressions as possible before widening any IVs. This
615 // forces SCEV to set no-wrap flags before evaluating sign/zero
616 // extension. The first time SCEV attempts to normalize sign/zero extension,
617 // the result becomes final. So for the most predictable results, we delay
618 // evaluation of sign/zero extend evaluation until needed, and avoid running
619 // other SCEV based analysis prior to simplifyAndExtend.
620 do {
621 PHINode *CurrIV = LoopPhis.pop_back_val();
622
623 // Information about sign/zero extensions of CurrIV.
624 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
625
626 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
627 &Visitor);
628
629 if (Visitor.WI.WidestNativeType) {
630 WideIVs.push_back(Visitor.WI);
631 }
632 } while(!LoopPhis.empty());
633
634 // Continue if we disallowed widening.
635 if (!WidenIndVars)
636 continue;
637
638 for (; !WideIVs.empty(); WideIVs.pop_back()) {
639 unsigned ElimExt;
640 unsigned Widened;
641 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
642 DT, DeadInsts, ElimExt, Widened,
643 HasGuards, UsePostIncrementRanges)) {
644 NumElimExt += ElimExt;
645 NumWidened += Widened;
646 Changed = true;
647 LoopPhis.push_back(WidePhi);
648 }
649 }
650 }
651 return Changed;
652}
653
654//===----------------------------------------------------------------------===//
655// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
656//===----------------------------------------------------------------------===//
657
658/// Given an Value which is hoped to be part of an add recurance in the given
659/// loop, return the associated Phi node if so. Otherwise, return null. Note
660/// that this is less general than SCEVs AddRec checking.
661static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
662 Instruction *IncI = dyn_cast<Instruction>(IncV);
663 if (!IncI)
664 return nullptr;
665
666 switch (IncI->getOpcode()) {
667 case Instruction::Add:
668 case Instruction::Sub:
669 break;
670 case Instruction::GetElementPtr:
671 // An IV counter must preserve its type.
672 if (IncI->getNumOperands() == 2)
673 break;
674 LLVM_FALLTHROUGH[[gnu::fallthrough]];
675 default:
676 return nullptr;
677 }
678
679 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
680 if (Phi && Phi->getParent() == L->getHeader()) {
681 if (L->isLoopInvariant(IncI->getOperand(1)))
682 return Phi;
683 return nullptr;
684 }
685 if (IncI->getOpcode() == Instruction::GetElementPtr)
686 return nullptr;
687
688 // Allow add/sub to be commuted.
689 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
690 if (Phi && Phi->getParent() == L->getHeader()) {
691 if (L->isLoopInvariant(IncI->getOperand(0)))
692 return Phi;
693 }
694 return nullptr;
695}
696
697/// Whether the current loop exit test is based on this value. Currently this
698/// is limited to a direct use in the loop condition.
699static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
700 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
701 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
702 // TODO: Allow non-icmp loop test.
703 if (!ICmp)
704 return false;
705
706 // TODO: Allow indirect use.
707 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
708}
709
710/// linearFunctionTestReplace policy. Return true unless we can show that the
711/// current exit test is already sufficiently canonical.
712static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
713 assert(L->getLoopLatch() && "Must be in simplified form")(static_cast <bool> (L->getLoopLatch() && "Must be in simplified form"
) ? void (0) : __assert_fail ("L->getLoopLatch() && \"Must be in simplified form\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 713, __extension__ __PRETTY_FUNCTION__))
;
714
715 // Avoid converting a constant or loop invariant test back to a runtime
716 // test. This is critical for when SCEV's cached ExitCount is less precise
717 // than the current IR (such as after we've proven a particular exit is
718 // actually dead and thus the BE count never reaches our ExitCount.)
719 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
720 if (L->isLoopInvariant(BI->getCondition()))
721 return false;
722
723 // Do LFTR to simplify the exit condition to an ICMP.
724 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
725 if (!Cond)
726 return true;
727
728 // Do LFTR to simplify the exit ICMP to EQ/NE
729 ICmpInst::Predicate Pred = Cond->getPredicate();
730 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
731 return true;
732
733 // Look for a loop invariant RHS
734 Value *LHS = Cond->getOperand(0);
735 Value *RHS = Cond->getOperand(1);
736 if (!L->isLoopInvariant(RHS)) {
737 if (!L->isLoopInvariant(LHS))
738 return true;
739 std::swap(LHS, RHS);
740 }
741 // Look for a simple IV counter LHS
742 PHINode *Phi = dyn_cast<PHINode>(LHS);
743 if (!Phi)
744 Phi = getLoopPhiForCounter(LHS, L);
745
746 if (!Phi)
747 return true;
748
749 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
750 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
751 if (Idx < 0)
752 return true;
753
754 // Do LFTR if the exit condition's IV is *not* a simple counter.
755 Value *IncV = Phi->getIncomingValue(Idx);
756 return Phi != getLoopPhiForCounter(IncV, L);
757}
758
759/// Return true if undefined behavior would provable be executed on the path to
760/// OnPathTo if Root produced a posion result. Note that this doesn't say
761/// anything about whether OnPathTo is actually executed or whether Root is
762/// actually poison. This can be used to assess whether a new use of Root can
763/// be added at a location which is control equivalent with OnPathTo (such as
764/// immediately before it) without introducing UB which didn't previously
765/// exist. Note that a false result conveys no information.
766static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
767 Instruction *OnPathTo,
768 DominatorTree *DT) {
769 // Basic approach is to assume Root is poison, propagate poison forward
770 // through all users we can easily track, and then check whether any of those
771 // users are provable UB and must execute before out exiting block might
772 // exit.
773
774 // The set of all recursive users we've visited (which are assumed to all be
775 // poison because of said visit)
776 SmallSet<const Value *, 16> KnownPoison;
777 SmallVector<const Instruction*, 16> Worklist;
778 Worklist.push_back(Root);
779 while (!Worklist.empty()) {
780 const Instruction *I = Worklist.pop_back_val();
781
782 // If we know this must trigger UB on a path leading our target.
783 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
784 return true;
785
786 // If we can't analyze propagation through this instruction, just skip it
787 // and transitive users. Safe as false is a conservative result.
788 if (!propagatesPoison(cast<Operator>(I)) && I != Root)
789 continue;
790
791 if (KnownPoison.insert(I).second)
792 for (const User *User : I->users())
793 Worklist.push_back(cast<Instruction>(User));
794 }
795
796 // Might be non-UB, or might have a path we couldn't prove must execute on
797 // way to exiting bb.
798 return false;
799}
800
801/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
802/// down to checking that all operands are constant and listing instructions
803/// that may hide undef.
804static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
805 unsigned Depth) {
806 if (isa<Constant>(V))
807 return !isa<UndefValue>(V);
808
809 if (Depth >= 6)
810 return false;
811
812 // Conservatively handle non-constant non-instructions. For example, Arguments
813 // may be undef.
814 Instruction *I = dyn_cast<Instruction>(V);
815 if (!I)
816 return false;
817
818 // Load and return values may be undef.
819 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
820 return false;
821
822 // Optimistically handle other instructions.
823 for (Value *Op : I->operands()) {
824 if (!Visited.insert(Op).second)
825 continue;
826 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
827 return false;
828 }
829 return true;
830}
831
832/// Return true if the given value is concrete. We must prove that undef can
833/// never reach it.
834///
835/// TODO: If we decide that this is a good approach to checking for undef, we
836/// may factor it into a common location.
837static bool hasConcreteDef(Value *V) {
838 SmallPtrSet<Value*, 8> Visited;
839 Visited.insert(V);
840 return hasConcreteDefImpl(V, Visited, 0);
841}
842
843/// Return true if this IV has any uses other than the (soon to be rewritten)
844/// loop exit test.
845static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
846 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
847 Value *IncV = Phi->getIncomingValue(LatchIdx);
848
849 for (User *U : Phi->users())
850 if (U != Cond && U != IncV) return false;
851
852 for (User *U : IncV->users())
853 if (U != Cond && U != Phi) return false;
854 return true;
855}
856
857/// Return true if the given phi is a "counter" in L. A counter is an
858/// add recurance (of integer or pointer type) with an arbitrary start, and a
859/// step of 1. Note that L must have exactly one latch.
860static bool isLoopCounter(PHINode* Phi, Loop *L,
861 ScalarEvolution *SE) {
862 assert(Phi->getParent() == L->getHeader())(static_cast <bool> (Phi->getParent() == L->getHeader
()) ? void (0) : __assert_fail ("Phi->getParent() == L->getHeader()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863 assert(L->getLoopLatch())(static_cast <bool> (L->getLoopLatch()) ? void (0) :
__assert_fail ("L->getLoopLatch()", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 863, __extension__ __PRETTY_FUNCTION__))
;
864
865 if (!SE->isSCEVable(Phi->getType()))
866 return false;
867
868 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
869 if (!AR || AR->getLoop() != L || !AR->isAffine())
870 return false;
871
872 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
873 if (!Step || !Step->isOne())
874 return false;
875
876 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
877 Value *IncV = Phi->getIncomingValue(LatchIdx);
878 return (getLoopPhiForCounter(IncV, L) == Phi &&
879 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
880}
881
882/// Search the loop header for a loop counter (anadd rec w/step of one)
883/// suitable for use by LFTR. If multiple counters are available, select the
884/// "best" one based profitable heuristics.
885///
886/// BECount may be an i8* pointer type. The pointer difference is already
887/// valid count without scaling the address stride, so it remains a pointer
888/// expression as far as SCEV is concerned.
889static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
890 const SCEV *BECount,
891 ScalarEvolution *SE, DominatorTree *DT) {
892 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
893
894 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
895
896 // Loop over all of the PHI nodes, looking for a simple counter.
897 PHINode *BestPhi = nullptr;
898 const SCEV *BestInit = nullptr;
899 BasicBlock *LatchBlock = L->getLoopLatch();
900 assert(LatchBlock && "Must be in simplified form")(static_cast <bool> (LatchBlock && "Must be in simplified form"
) ? void (0) : __assert_fail ("LatchBlock && \"Must be in simplified form\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 900, __extension__ __PRETTY_FUNCTION__))
;
901 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
902
903 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
904 PHINode *Phi = cast<PHINode>(I);
905 if (!isLoopCounter(Phi, L, SE))
906 continue;
907
908 // Avoid comparing an integer IV against a pointer Limit.
909 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
910 continue;
911
912 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
913
914 // AR may be a pointer type, while BECount is an integer type.
915 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
916 // AR may not be a narrower type, or we may never exit.
917 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
918 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
919 continue;
920
921 // Avoid reusing a potentially undef value to compute other values that may
922 // have originally had a concrete definition.
923 if (!hasConcreteDef(Phi)) {
924 // We explicitly allow unknown phis as long as they are already used by
925 // the loop exit test. This is legal since performing LFTR could not
926 // increase the number of undef users.
927 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
928 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
929 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
930 continue;
931 }
932
933 // Avoid introducing undefined behavior due to poison which didn't exist in
934 // the original program. (Annoyingly, the rules for poison and undef
935 // propagation are distinct, so this does NOT cover the undef case above.)
936 // We have to ensure that we don't introduce UB by introducing a use on an
937 // iteration where said IV produces poison. Our strategy here differs for
938 // pointers and integer IVs. For integers, we strip and reinfer as needed,
939 // see code in linearFunctionTestReplace. For pointers, we restrict
940 // transforms as there is no good way to reinfer inbounds once lost.
941 if (!Phi->getType()->isIntegerTy() &&
942 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
943 continue;
944
945 const SCEV *Init = AR->getStart();
946
947 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
948 // Don't force a live loop counter if another IV can be used.
949 if (AlmostDeadIV(Phi, LatchBlock, Cond))
950 continue;
951
952 // Prefer to count-from-zero. This is a more "canonical" counter form. It
953 // also prefers integer to pointer IVs.
954 if (BestInit->isZero() != Init->isZero()) {
955 if (BestInit->isZero())
956 continue;
957 }
958 // If two IVs both count from zero or both count from nonzero then the
959 // narrower is likely a dead phi that has been widened. Use the wider phi
960 // to allow the other to be eliminated.
961 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
962 continue;
963 }
964 BestPhi = Phi;
965 BestInit = Init;
966 }
967 return BestPhi;
968}
969
970/// Insert an IR expression which computes the value held by the IV IndVar
971/// (which must be an loop counter w/unit stride) after the backedge of loop L
972/// is taken ExitCount times.
973static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
974 const SCEV *ExitCount, bool UsePostInc, Loop *L,
975 SCEVExpander &Rewriter, ScalarEvolution *SE) {
976 assert(isLoopCounter(IndVar, L, SE))(static_cast <bool> (isLoopCounter(IndVar, L, SE)) ? void
(0) : __assert_fail ("isLoopCounter(IndVar, L, SE)", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 976, __extension__ __PRETTY_FUNCTION__))
;
977 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
978 const SCEV *IVInit = AR->getStart();
979
980 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
981 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
982 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
983 // the existing GEPs whenever possible.
984 if (IndVar->getType()->isPointerTy() &&
985 !ExitCount->getType()->isPointerTy()) {
986 // IVOffset will be the new GEP offset that is interpreted by GEP as a
987 // signed value. ExitCount on the other hand represents the loop trip count,
988 // which is an unsigned value. FindLoopCounter only allows induction
989 // variables that have a positive unit stride of one. This means we don't
990 // have to handle the case of negative offsets (yet) and just need to zero
991 // extend ExitCount.
992 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
993 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
994 if (UsePostInc)
995 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
996
997 // Expand the code for the iteration count.
998 assert(SE->isLoopInvariant(IVOffset, L) &&(static_cast <bool> (SE->isLoopInvariant(IVOffset, L
) && "Computed iteration count is not loop invariant!"
) ? void (0) : __assert_fail ("SE->isLoopInvariant(IVOffset, L) && \"Computed iteration count is not loop invariant!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 999, __extension__ __PRETTY_FUNCTION__))
999 "Computed iteration count is not loop invariant!")(static_cast <bool> (SE->isLoopInvariant(IVOffset, L
) && "Computed iteration count is not loop invariant!"
) ? void (0) : __assert_fail ("SE->isLoopInvariant(IVOffset, L) && \"Computed iteration count is not loop invariant!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 999, __extension__ __PRETTY_FUNCTION__))
;
1000
1001 // We could handle pointer IVs other than i8*, but we need to compensate for
1002 // gep index scaling.
1003 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),(static_cast <bool> (SE->getSizeOfExpr(IntegerType::
getInt64Ty(IndVar->getContext()), cast<PointerType>(
IndVar->getType()) ->getElementType())->isOne() &&
"unit stride pointer IV must be i8*") ? void (0) : __assert_fail
("SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), cast<PointerType>(IndVar->getType()) ->getElementType())->isOne() && \"unit stride pointer IV must be i8*\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1006, __extension__ __PRETTY_FUNCTION__))
1004 cast<PointerType>(IndVar->getType())(static_cast <bool> (SE->getSizeOfExpr(IntegerType::
getInt64Ty(IndVar->getContext()), cast<PointerType>(
IndVar->getType()) ->getElementType())->isOne() &&
"unit stride pointer IV must be i8*") ? void (0) : __assert_fail
("SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), cast<PointerType>(IndVar->getType()) ->getElementType())->isOne() && \"unit stride pointer IV must be i8*\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1006, __extension__ __PRETTY_FUNCTION__))
1005 ->getElementType())->isOne() &&(static_cast <bool> (SE->getSizeOfExpr(IntegerType::
getInt64Ty(IndVar->getContext()), cast<PointerType>(
IndVar->getType()) ->getElementType())->isOne() &&
"unit stride pointer IV must be i8*") ? void (0) : __assert_fail
("SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), cast<PointerType>(IndVar->getType()) ->getElementType())->isOne() && \"unit stride pointer IV must be i8*\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1006, __extension__ __PRETTY_FUNCTION__))
1006 "unit stride pointer IV must be i8*")(static_cast <bool> (SE->getSizeOfExpr(IntegerType::
getInt64Ty(IndVar->getContext()), cast<PointerType>(
IndVar->getType()) ->getElementType())->isOne() &&
"unit stride pointer IV must be i8*") ? void (0) : __assert_fail
("SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), cast<PointerType>(IndVar->getType()) ->getElementType())->isOne() && \"unit stride pointer IV must be i8*\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1006, __extension__ __PRETTY_FUNCTION__))
;
1007
1008 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1009 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1010 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1011 } else {
1012 // In any other case, convert both IVInit and ExitCount to integers before
1013 // comparing. This may result in SCEV expansion of pointers, but in practice
1014 // SCEV will fold the pointer arithmetic away as such:
1015 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1016 //
1017 // Valid Cases: (1) both integers is most common; (2) both may be pointers
1018 // for simple memset-style loops.
1019 //
1020 // IVInit integer and ExitCount pointer would only occur if a canonical IV
1021 // were generated on top of case #2, which is not expected.
1022
1023 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride")(static_cast <bool> (AR->getStepRecurrence(*SE)->
isOne() && "only handles unit stride") ? void (0) : __assert_fail
("AR->getStepRecurrence(*SE)->isOne() && \"only handles unit stride\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1023, __extension__ __PRETTY_FUNCTION__))
;
1024 // For unit stride, IVCount = Start + ExitCount with 2's complement
1025 // overflow.
1026
1027 // For integer IVs, truncate the IV before computing IVInit + BECount,
1028 // unless we know apriori that the limit must be a constant when evaluated
1029 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
1030 // of the IV in the loop over a (potentially) expensive expansion of the
1031 // widened exit count add(zext(add)) expression.
1032 if (SE->getTypeSizeInBits(IVInit->getType())
1033 > SE->getTypeSizeInBits(ExitCount->getType())) {
1034 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1035 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1036 else
1037 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1038 }
1039
1040 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1041
1042 if (UsePostInc)
1043 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1044
1045 // Expand the code for the iteration count.
1046 assert(SE->isLoopInvariant(IVLimit, L) &&(static_cast <bool> (SE->isLoopInvariant(IVLimit, L)
&& "Computed iteration count is not loop invariant!"
) ? void (0) : __assert_fail ("SE->isLoopInvariant(IVLimit, L) && \"Computed iteration count is not loop invariant!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1047, __extension__ __PRETTY_FUNCTION__))
1047 "Computed iteration count is not loop invariant!")(static_cast <bool> (SE->isLoopInvariant(IVLimit, L)
&& "Computed iteration count is not loop invariant!"
) ? void (0) : __assert_fail ("SE->isLoopInvariant(IVLimit, L) && \"Computed iteration count is not loop invariant!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1047, __extension__ __PRETTY_FUNCTION__))
;
1048 // Ensure that we generate the same type as IndVar, or a smaller integer
1049 // type. In the presence of null pointer values, we have an integer type
1050 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1051 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1052 IndVar->getType() : ExitCount->getType();
1053 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1054 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1055 }
1056}
1057
1058/// This method rewrites the exit condition of the loop to be a canonical !=
1059/// comparison against the incremented loop induction variable. This pass is
1060/// able to rewrite the exit tests of any loop where the SCEV analysis can
1061/// determine a loop-invariant trip count of the loop, which is actually a much
1062/// broader range than just linear tests.
1063bool IndVarSimplify::
1064linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1065 const SCEV *ExitCount,
1066 PHINode *IndVar, SCEVExpander &Rewriter) {
1067 assert(L->getLoopLatch() && "Loop no longer in simplified form?")(static_cast <bool> (L->getLoopLatch() && "Loop no longer in simplified form?"
) ? void (0) : __assert_fail ("L->getLoopLatch() && \"Loop no longer in simplified form?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1067, __extension__ __PRETTY_FUNCTION__))
;
1068 assert(isLoopCounter(IndVar, L, SE))(static_cast <bool> (isLoopCounter(IndVar, L, SE)) ? void
(0) : __assert_fail ("isLoopCounter(IndVar, L, SE)", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1068, __extension__ __PRETTY_FUNCTION__))
;
1069 Instruction * const IncVar =
1070 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1071
1072 // Initialize CmpIndVar to the preincremented IV.
1073 Value *CmpIndVar = IndVar;
1074 bool UsePostInc = false;
1075
1076 // If the exiting block is the same as the backedge block, we prefer to
1077 // compare against the post-incremented value, otherwise we must compare
1078 // against the preincremented value.
1079 if (ExitingBB == L->getLoopLatch()) {
1080 // For pointer IVs, we chose to not strip inbounds which requires us not
1081 // to add a potentially UB introducing use. We need to either a) show
1082 // the loop test we're modifying is already in post-inc form, or b) show
1083 // that adding a use must not introduce UB.
1084 bool SafeToPostInc =
1085 IndVar->getType()->isIntegerTy() ||
1086 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1087 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1088 if (SafeToPostInc) {
1089 UsePostInc = true;
1090 CmpIndVar = IncVar;
1091 }
1092 }
1093
1094 // It may be necessary to drop nowrap flags on the incrementing instruction
1095 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1096 // case the increment might have previously been poison on the last iteration
1097 // only) or if LFTR switches to a different IV that was previously dynamically
1098 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1099 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1100 // check), because the pre-inc addrec flags may be adopted from the original
1101 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1102 // TODO: This handling is inaccurate for one case: If we switch to a
1103 // dynamically dead IV that wraps on the first loop iteration only, which is
1104 // not covered by the post-inc addrec. (If the new IV was not dynamically
1105 // dead, it could not be poison on the first iteration in the first place.)
1106 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1107 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1108 if (BO->hasNoUnsignedWrap())
1109 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1110 if (BO->hasNoSignedWrap())
1111 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1112 }
1113
1114 Value *ExitCnt = genLoopLimit(
1115 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1116 assert(ExitCnt->getType()->isPointerTy() ==(static_cast <bool> (ExitCnt->getType()->isPointerTy
() == IndVar->getType()->isPointerTy() && "genLoopLimit missed a cast"
) ? void (0) : __assert_fail ("ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() && \"genLoopLimit missed a cast\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1118, __extension__ __PRETTY_FUNCTION__))
1117 IndVar->getType()->isPointerTy() &&(static_cast <bool> (ExitCnt->getType()->isPointerTy
() == IndVar->getType()->isPointerTy() && "genLoopLimit missed a cast"
) ? void (0) : __assert_fail ("ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() && \"genLoopLimit missed a cast\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1118, __extension__ __PRETTY_FUNCTION__))
1118 "genLoopLimit missed a cast")(static_cast <bool> (ExitCnt->getType()->isPointerTy
() == IndVar->getType()->isPointerTy() && "genLoopLimit missed a cast"
) ? void (0) : __assert_fail ("ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() && \"genLoopLimit missed a cast\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1118, __extension__ __PRETTY_FUNCTION__))
;
1119
1120 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1121 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1122 ICmpInst::Predicate P;
1123 if (L->contains(BI->getSuccessor(0)))
1124 P = ICmpInst::ICMP_NE;
1125 else
1126 P = ICmpInst::ICMP_EQ;
1127
1128 IRBuilder<> Builder(BI);
1129
1130 // The new loop exit condition should reuse the debug location of the
1131 // original loop exit condition.
1132 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1133 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1134
1135 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1136 // avoid the expensive expansion of the limit expression in the wider type,
1137 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1138 // since we know (from the exit count bitwidth), that we can't self-wrap in
1139 // the narrower type.
1140 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1141 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1142 if (CmpIndVarSize > ExitCntSize) {
1143 assert(!CmpIndVar->getType()->isPointerTy() &&(static_cast <bool> (!CmpIndVar->getType()->isPointerTy
() && !ExitCnt->getType()->isPointerTy()) ? void
(0) : __assert_fail ("!CmpIndVar->getType()->isPointerTy() && !ExitCnt->getType()->isPointerTy()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1144, __extension__ __PRETTY_FUNCTION__))
1144 !ExitCnt->getType()->isPointerTy())(static_cast <bool> (!CmpIndVar->getType()->isPointerTy
() && !ExitCnt->getType()->isPointerTy()) ? void
(0) : __assert_fail ("!CmpIndVar->getType()->isPointerTy() && !ExitCnt->getType()->isPointerTy()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1144, __extension__ __PRETTY_FUNCTION__))
;
1145
1146 // Before resorting to actually inserting the truncate, use the same
1147 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1148 // the other side of the comparison instead. We still evaluate the limit
1149 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1150 // a truncate within in.
1151 bool Extended = false;
1152 const SCEV *IV = SE->getSCEV(CmpIndVar);
1153 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1154 ExitCnt->getType());
1155 const SCEV *ZExtTrunc =
1156 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1157
1158 if (ZExtTrunc == IV) {
1159 Extended = true;
1160 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1161 "wide.trip.count");
1162 } else {
1163 const SCEV *SExtTrunc =
1164 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1165 if (SExtTrunc == IV) {
1166 Extended = true;
1167 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1168 "wide.trip.count");
1169 }
1170 }
1171
1172 if (Extended) {
1173 bool Discard;
1174 L->makeLoopInvariant(ExitCnt, Discard);
1175 } else
1176 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1177 "lftr.wideiv");
1178 }
1179 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("indvars")) { dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n' <<
" op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "=="
) << "\n" << " RHS:\t" << *ExitCnt <<
"\n" << "ExitCount:\t" << *ExitCount << "\n"
<< " was: " << *BI->getCondition() << "\n"
; } } while (false)
1180 << " LHS:" << *CmpIndVar << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("indvars")) { dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n' <<
" op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "=="
) << "\n" << " RHS:\t" << *ExitCnt <<
"\n" << "ExitCount:\t" << *ExitCount << "\n"
<< " was: " << *BI->getCondition() << "\n"
; } } while (false)
1181 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("indvars")) { dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n' <<
" op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "=="
) << "\n" << " RHS:\t" << *ExitCnt <<
"\n" << "ExitCount:\t" << *ExitCount << "\n"
<< " was: " << *BI->getCondition() << "\n"
; } } while (false)
1182 << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("indvars")) { dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n' <<
" op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "=="
) << "\n" << " RHS:\t" << *ExitCnt <<
"\n" << "ExitCount:\t" << *ExitCount << "\n"
<< " was: " << *BI->getCondition() << "\n"
; } } while (false)
1183 << " RHS:\t" << *ExitCnt << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("indvars")) { dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n' <<
" op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "=="
) << "\n" << " RHS:\t" << *ExitCnt <<
"\n" << "ExitCount:\t" << *ExitCount << "\n"
<< " was: " << *BI->getCondition() << "\n"
; } } while (false)
1184 << "ExitCount:\t" << *ExitCount << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("indvars")) { dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n' <<
" op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "=="
) << "\n" << " RHS:\t" << *ExitCnt <<
"\n" << "ExitCount:\t" << *ExitCount << "\n"
<< " was: " << *BI->getCondition() << "\n"
; } } while (false)
1185 << " was: " << *BI->getCondition() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("indvars")) { dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n' <<
" op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "=="
) << "\n" << " RHS:\t" << *ExitCnt <<
"\n" << "ExitCount:\t" << *ExitCount << "\n"
<< " was: " << *BI->getCondition() << "\n"
; } } while (false)
;
1186
1187 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1188 Value *OrigCond = BI->getCondition();
1189 // It's tempting to use replaceAllUsesWith here to fully replace the old
1190 // comparison, but that's not immediately safe, since users of the old
1191 // comparison may not be dominated by the new comparison. Instead, just
1192 // update the branch to use the new comparison; in the common case this
1193 // will make old comparison dead.
1194 BI->setCondition(Cond);
1195 DeadInsts.emplace_back(OrigCond);
1196
1197 ++NumLFTR;
1198 return true;
1199}
1200
1201//===----------------------------------------------------------------------===//
1202// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1203//===----------------------------------------------------------------------===//
1204
1205/// If there's a single exit block, sink any loop-invariant values that
1206/// were defined in the preheader but not used inside the loop into the
1207/// exit block to reduce register pressure in the loop.
1208bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1209 BasicBlock *ExitBlock = L->getExitBlock();
1210 if (!ExitBlock) return false;
1211
1212 BasicBlock *Preheader = L->getLoopPreheader();
1213 if (!Preheader) return false;
1214
1215 bool MadeAnyChanges = false;
1216 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1217 BasicBlock::iterator I(Preheader->getTerminator());
1218 while (I != Preheader->begin()) {
1219 --I;
1220 // New instructions were inserted at the end of the preheader.
1221 if (isa<PHINode>(I))
1222 break;
1223
1224 // Don't move instructions which might have side effects, since the side
1225 // effects need to complete before instructions inside the loop. Also don't
1226 // move instructions which might read memory, since the loop may modify
1227 // memory. Note that it's okay if the instruction might have undefined
1228 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1229 // block.
1230 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1231 continue;
1232
1233 // Skip debug info intrinsics.
1234 if (isa<DbgInfoIntrinsic>(I))
1235 continue;
1236
1237 // Skip eh pad instructions.
1238 if (I->isEHPad())
1239 continue;
1240
1241 // Don't sink alloca: we never want to sink static alloca's out of the
1242 // entry block, and correctly sinking dynamic alloca's requires
1243 // checks for stacksave/stackrestore intrinsics.
1244 // FIXME: Refactor this check somehow?
1245 if (isa<AllocaInst>(I))
1246 continue;
1247
1248 // Determine if there is a use in or before the loop (direct or
1249 // otherwise).
1250 bool UsedInLoop = false;
1251 for (Use &U : I->uses()) {
1252 Instruction *User = cast<Instruction>(U.getUser());
1253 BasicBlock *UseBB = User->getParent();
1254 if (PHINode *P = dyn_cast<PHINode>(User)) {
1255 unsigned i =
1256 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1257 UseBB = P->getIncomingBlock(i);
1258 }
1259 if (UseBB == Preheader || L->contains(UseBB)) {
1260 UsedInLoop = true;
1261 break;
1262 }
1263 }
1264
1265 // If there is, the def must remain in the preheader.
1266 if (UsedInLoop)
1267 continue;
1268
1269 // Otherwise, sink it to the exit block.
1270 Instruction *ToMove = &*I;
1271 bool Done = false;
1272
1273 if (I != Preheader->begin()) {
1274 // Skip debug info intrinsics.
1275 do {
1276 --I;
1277 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
1278
1279 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
1280 Done = true;
1281 } else {
1282 Done = true;
1283 }
1284
1285 MadeAnyChanges = true;
1286 ToMove->moveBefore(*ExitBlock, InsertPt);
1287 if (Done) break;
1288 InsertPt = ToMove->getIterator();
1289 }
1290
1291 return MadeAnyChanges;
1292}
1293
1294static void replaceExitCond(BranchInst *BI, Value *NewCond,
1295 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1296 auto *OldCond = BI->getCondition();
1297 BI->setCondition(NewCond);
1298 if (OldCond->use_empty())
1299 DeadInsts.emplace_back(OldCond);
1300}
1301
1302static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1303 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1304 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1305 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1306 auto *OldCond = BI->getCondition();
1307 auto *NewCond =
1308 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1309 replaceExitCond(BI, NewCond, DeadInsts);
1310}
1311
1312static void replaceWithInvariantCond(
1313 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1314 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1315 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1316 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1317 Rewriter.setInsertPoint(BI);
1318 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1319 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1320 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1321 if (ExitIfTrue)
1322 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1323 IRBuilder<> Builder(BI);
1324 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1325 BI->getCondition()->getName());
1326 replaceExitCond(BI, NewCond, DeadInsts);
1327}
1328
1329static bool optimizeLoopExitWithUnknownExitCount(
1330 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1331 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1332 ScalarEvolution *SE, SCEVExpander &Rewriter,
1333 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1334 ICmpInst::Predicate Pred;
1335 Value *LHS, *RHS;
1336 using namespace PatternMatch;
1337 BasicBlock *TrueSucc, *FalseSucc;
1338 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1339 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1340 return false;
1341
1342 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&(static_cast <bool> ((L->contains(TrueSucc) != L->
contains(FalseSucc)) && "Not a loop exit!") ? void (0
) : __assert_fail ("(L->contains(TrueSucc) != L->contains(FalseSucc)) && \"Not a loop exit!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1343, __extension__ __PRETTY_FUNCTION__))
1343 "Not a loop exit!")(static_cast <bool> ((L->contains(TrueSucc) != L->
contains(FalseSucc)) && "Not a loop exit!") ? void (0
) : __assert_fail ("(L->contains(TrueSucc) != L->contains(FalseSucc)) && \"Not a loop exit!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1343, __extension__ __PRETTY_FUNCTION__))
;
1344
1345 // 'LHS pred RHS' should now mean that we stay in loop.
1346 if (L->contains(FalseSucc))
1347 Pred = CmpInst::getInversePredicate(Pred);
1348
1349 // If we are proving loop exit, invert the predicate.
1350 if (Inverted)
1351 Pred = CmpInst::getInversePredicate(Pred);
1352
1353 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1354 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1355 // Can we prove it to be trivially true?
1356 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1357 foldExit(L, ExitingBB, Inverted, DeadInsts);
1358 return true;
1359 }
1360 // Further logic works for non-inverted condition only.
1361 if (Inverted)
1362 return false;
1363
1364 auto *ARTy = LHSS->getType();
1365 auto *MaxIterTy = MaxIter->getType();
1366 // If possible, adjust types.
1367 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1368 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1369 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1370 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1371 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1372 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1373 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1374 }
1375
1376 if (SkipLastIter) {
1377 const SCEV *One = SE->getOne(MaxIter->getType());
1378 MaxIter = SE->getMinusSCEV(MaxIter, One);
1379 }
1380
1381 // Check if there is a loop-invariant predicate equivalent to our check.
1382 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1383 L, BI, MaxIter);
1384 if (!LIP)
1385 return false;
1386
1387 // Can we prove it to be trivially true?
1388 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1389 foldExit(L, ExitingBB, Inverted, DeadInsts);
1390 else
1391 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1392 Rewriter, DeadInsts);
1393
1394 return true;
1395}
1396
1397bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1398 SmallVector<BasicBlock*, 16> ExitingBlocks;
1399 L->getExitingBlocks(ExitingBlocks);
1400
1401 // Remove all exits which aren't both rewriteable and execute on every
1402 // iteration.
1403 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1404 // If our exitting block exits multiple loops, we can only rewrite the
1405 // innermost one. Otherwise, we're changing how many times the innermost
1406 // loop runs before it exits.
1407 if (LI->getLoopFor(ExitingBB) != L)
1408 return true;
1409
1410 // Can't rewrite non-branch yet.
1411 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1412 if (!BI)
1413 return true;
1414
1415 // If already constant, nothing to do.
1416 if (isa<Constant>(BI->getCondition()))
1417 return true;
1418
1419 // Likewise, the loop latch must be dominated by the exiting BB.
1420 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1421 return true;
1422
1423 return false;
1424 });
1425
1426 if (ExitingBlocks.empty())
1427 return false;
1428
1429 // Get a symbolic upper bound on the loop backedge taken count.
1430 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1431 if (isa<SCEVCouldNotCompute>(MaxExitCount))
1432 return false;
1433
1434 // Visit our exit blocks in order of dominance. We know from the fact that
1435 // all exits must dominate the latch, so there is a total dominance order
1436 // between them.
1437 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1438 // std::sort sorts in ascending order, so we want the inverse of
1439 // the normal dominance relation.
1440 if (A == B) return false;
1441 if (DT->properlyDominates(A, B))
1442 return true;
1443 else {
1444 assert(DT->properlyDominates(B, A) &&(static_cast <bool> (DT->properlyDominates(B, A) &&
"expected total dominance order!") ? void (0) : __assert_fail
("DT->properlyDominates(B, A) && \"expected total dominance order!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1445, __extension__ __PRETTY_FUNCTION__))
1445 "expected total dominance order!")(static_cast <bool> (DT->properlyDominates(B, A) &&
"expected total dominance order!") ? void (0) : __assert_fail
("DT->properlyDominates(B, A) && \"expected total dominance order!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1445, __extension__ __PRETTY_FUNCTION__))
;
1446 return false;
1447 }
1448 });
1449#ifdef ASSERT
1450 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1451 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))(static_cast <bool> (DT->dominates(ExitingBlocks[i-1
], ExitingBlocks[i])) ? void (0) : __assert_fail ("DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1451, __extension__ __PRETTY_FUNCTION__))
;
1452 }
1453#endif
1454
1455 bool Changed = false;
1456 bool SkipLastIter = false;
1457 SmallSet<const SCEV*, 8> DominatingExitCounts;
1458 for (BasicBlock *ExitingBB : ExitingBlocks) {
1459 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1460 if (isa<SCEVCouldNotCompute>(ExitCount)) {
1461 // Okay, we do not know the exit count here. Can we at least prove that it
1462 // will remain the same within iteration space?
1463 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1464 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1465 return optimizeLoopExitWithUnknownExitCount(
1466 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1467 Rewriter, DeadInsts);
1468 };
1469
1470 // TODO: We might have proved that we can skip the last iteration for
1471 // this check. In this case, we only want to check the condition on the
1472 // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1473 // corner case:
1474 //
1475 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1476 //
1477 // If we could not prove that len != 0, then we also could not prove that
1478 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1479 // OptimizeCond will likely not prove anything for it, even if it could
1480 // prove the same fact for len.
1481 //
1482 // As a temporary solution, we query both last and pre-last iterations in
1483 // hope that we will be able to prove triviality for at least one of
1484 // them. We can stop querying MaxExitCount for this case once SCEV
1485 // understands that (MaxExitCount - 1) will not overflow here.
1486 if (OptimizeCond(false, false) || OptimizeCond(true, false))
1487 Changed = true;
1488 else if (SkipLastIter)
1489 if (OptimizeCond(false, true) || OptimizeCond(true, true))
1490 Changed = true;
1491 continue;
1492 }
1493
1494 if (MaxExitCount == ExitCount)
1495 // If the loop has more than 1 iteration, all further checks will be
1496 // executed 1 iteration less.
1497 SkipLastIter = true;
1498
1499 // If we know we'd exit on the first iteration, rewrite the exit to
1500 // reflect this. This does not imply the loop must exit through this
1501 // exit; there may be an earlier one taken on the first iteration.
1502 // TODO: Given we know the backedge can't be taken, we should go ahead
1503 // and break it. Or at least, kill all the header phis and simplify.
1504 if (ExitCount->isZero()) {
1505 foldExit(L, ExitingBB, true, DeadInsts);
1506 Changed = true;
1507 continue;
1508 }
1509
1510 // If we end up with a pointer exit count, bail. Note that we can end up
1511 // with a pointer exit count for one exiting block, and not for another in
1512 // the same loop.
1513 if (!ExitCount->getType()->isIntegerTy() ||
1514 !MaxExitCount->getType()->isIntegerTy())
1515 continue;
1516
1517 Type *WiderType =
1518 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1519 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1520 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1521 assert(MaxExitCount->getType() == ExitCount->getType())(static_cast <bool> (MaxExitCount->getType() == ExitCount
->getType()) ? void (0) : __assert_fail ("MaxExitCount->getType() == ExitCount->getType()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1521, __extension__ __PRETTY_FUNCTION__))
;
1522
1523 // Can we prove that some other exit must be taken strictly before this
1524 // one?
1525 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1526 MaxExitCount, ExitCount)) {
1527 foldExit(L, ExitingBB, false, DeadInsts);
1528 Changed = true;
1529 continue;
1530 }
1531
1532 // As we run, keep track of which exit counts we've encountered. If we
1533 // find a duplicate, we've found an exit which would have exited on the
1534 // exiting iteration, but (from the visit order) strictly follows another
1535 // which does the same and is thus dead.
1536 if (!DominatingExitCounts.insert(ExitCount).second) {
1537 foldExit(L, ExitingBB, false, DeadInsts);
1538 Changed = true;
1539 continue;
1540 }
1541
1542 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1543 // here. If we kept track of the min of dominanting exits so far, we could
1544 // discharge exits with EC >= MDEC. This is less powerful than the existing
1545 // transform (since later exits aren't considered), but potentially more
1546 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1547 // or a >u b. Such a case is not currently known.
1548 }
1549 return Changed;
1550}
1551
1552bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1553 SmallVector<BasicBlock*, 16> ExitingBlocks;
1554 L->getExitingBlocks(ExitingBlocks);
1555
1556 // Finally, see if we can rewrite our exit conditions into a loop invariant
1557 // form. If we have a read-only loop, and we can tell that we must exit down
1558 // a path which does not need any of the values computed within the loop, we
1559 // can rewrite the loop to exit on the first iteration. Note that this
1560 // doesn't either a) tell us the loop exits on the first iteration (unless
1561 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1562 // This transformation looks a lot like a restricted form of dead loop
1563 // elimination, but restricted to read-only loops and without neccesssarily
1564 // needing to kill the loop entirely.
1565 if (!LoopPredication)
1566 return false;
1567
1568 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1569 // through *explicit* control flow. We have to eliminate the possibility of
1570 // implicit exits (see below) before we know it's truly exact.
1571 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1572 if (isa<SCEVCouldNotCompute>(ExactBTC) ||
1573 !SE->isLoopInvariant(ExactBTC, L) ||
1574 !isSafeToExpand(ExactBTC, *SE))
1575 return false;
1576
1577 // If we end up with a pointer exit count, bail. It may be unsized.
1578 if (!ExactBTC->getType()->isIntegerTy())
1579 return false;
1580
1581 auto BadExit = [&](BasicBlock *ExitingBB) {
1582 // If our exiting block exits multiple loops, we can only rewrite the
1583 // innermost one. Otherwise, we're changing how many times the innermost
1584 // loop runs before it exits.
1585 if (LI->getLoopFor(ExitingBB) != L)
1586 return true;
1587
1588 // Can't rewrite non-branch yet.
1589 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1590 if (!BI)
1591 return true;
1592
1593 // If already constant, nothing to do.
1594 if (isa<Constant>(BI->getCondition()))
1595 return true;
1596
1597 // If the exit block has phis, we need to be able to compute the values
1598 // within the loop which contains them. This assumes trivially lcssa phis
1599 // have already been removed; TODO: generalize
1600 BasicBlock *ExitBlock =
1601 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1602 if (!ExitBlock->phis().empty())
1603 return true;
1604
1605 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1606 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1607 !SE->isLoopInvariant(ExitCount, L) ||
1608 !isSafeToExpand(ExitCount, *SE))
1609 return true;
1610
1611 // If we end up with a pointer exit count, bail. It may be unsized.
1612 if (!ExitCount->getType()->isIntegerTy())
1613 return true;
1614
1615 return false;
1616 };
1617
1618 // If we have any exits which can't be predicated themselves, than we can't
1619 // predicate any exit which isn't guaranteed to execute before it. Consider
1620 // two exits (a) and (b) which would both exit on the same iteration. If we
1621 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1622 // we could convert a loop from exiting through (a) to one exiting through
1623 // (b). Note that this problem exists only for exits with the same exit
1624 // count, and we could be more aggressive when exit counts are known inequal.
1625 llvm::sort(ExitingBlocks,
1626 [&](BasicBlock *A, BasicBlock *B) {
1627 // std::sort sorts in ascending order, so we want the inverse of
1628 // the normal dominance relation, plus a tie breaker for blocks
1629 // unordered by dominance.
1630 if (DT->properlyDominates(A, B)) return true;
1631 if (DT->properlyDominates(B, A)) return false;
1632 return A->getName() < B->getName();
1633 });
1634 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1635 // exits before the backedge). If they aren't, reasoning about reachability
1636 // is complicated and we choose not to for now.
1637 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1638 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1639 return false;
1640
1641 // Given our sorted total order, we know that exit[j] must be evaluated
1642 // after all exit[i] such j > i.
1643 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1644 if (BadExit(ExitingBlocks[i])) {
1645 ExitingBlocks.resize(i);
1646 break;
1647 }
1648
1649 if (ExitingBlocks.empty())
1650 return false;
1651
1652 // We rely on not being able to reach an exiting block on a later iteration
1653 // then it's statically compute exit count. The implementaton of
1654 // getExitCount currently has this invariant, but assert it here so that
1655 // breakage is obvious if this ever changes..
1656 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {(static_cast <bool> (llvm::all_of(ExitingBlocks, [&
](BasicBlock *ExitingBB) { return DT->dominates(ExitingBB,
L->getLoopLatch()); })) ? void (0) : __assert_fail ("llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { return DT->dominates(ExitingBB, L->getLoopLatch()); })"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1658, __extension__ __PRETTY_FUNCTION__))
1657 return DT->dominates(ExitingBB, L->getLoopLatch());(static_cast <bool> (llvm::all_of(ExitingBlocks, [&
](BasicBlock *ExitingBB) { return DT->dominates(ExitingBB,
L->getLoopLatch()); })) ? void (0) : __assert_fail ("llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { return DT->dominates(ExitingBB, L->getLoopLatch()); })"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1658, __extension__ __PRETTY_FUNCTION__))
1658 }))(static_cast <bool> (llvm::all_of(ExitingBlocks, [&
](BasicBlock *ExitingBB) { return DT->dominates(ExitingBB,
L->getLoopLatch()); })) ? void (0) : __assert_fail ("llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { return DT->dominates(ExitingBB, L->getLoopLatch()); })"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1658, __extension__ __PRETTY_FUNCTION__))
;
1659
1660 // At this point, ExitingBlocks consists of only those blocks which are
1661 // predicatable. Given that, we know we have at least one exit we can
1662 // predicate if the loop is doesn't have side effects and doesn't have any
1663 // implicit exits (because then our exact BTC isn't actually exact).
1664 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1665 // suggestions on how to improve this? I can obviously bail out for outer
1666 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1667 // is that enough for *all* side effects?
1668 for (BasicBlock *BB : L->blocks())
1669 for (auto &I : *BB)
1670 // TODO:isGuaranteedToTransfer
1671 if (I.mayHaveSideEffects())
1672 return false;
1673
1674 bool Changed = false;
1675 // Finally, do the actual predication for all predicatable blocks. A couple
1676 // of notes here:
1677 // 1) We don't bother to constant fold dominated exits with identical exit
1678 // counts; that's simply a form of CSE/equality propagation and we leave
1679 // it for dedicated passes.
1680 // 2) We insert the comparison at the branch. Hoisting introduces additional
1681 // legality constraints and we leave that to dedicated logic. We want to
1682 // predicate even if we can't insert a loop invariant expression as
1683 // peeling or unrolling will likely reduce the cost of the otherwise loop
1684 // varying check.
1685 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1686 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1687 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1688 for (BasicBlock *ExitingBB : ExitingBlocks) {
1689 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1690
1691 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1692 Value *NewCond;
1693 if (ExitCount == ExactBTC) {
1694 NewCond = L->contains(BI->getSuccessor(0)) ?
1695 B.getFalse() : B.getTrue();
1696 } else {
1697 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1698 if (!ExactBTCV)
1699 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1700 Value *RHS = ExactBTCV;
1701 if (ECV->getType() != RHS->getType()) {
1702 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1703 ECV = B.CreateZExt(ECV, WiderTy);
1704 RHS = B.CreateZExt(RHS, WiderTy);
1705 }
1706 auto Pred = L->contains(BI->getSuccessor(0)) ?
1707 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1708 NewCond = B.CreateICmp(Pred, ECV, RHS);
1709 }
1710 Value *OldCond = BI->getCondition();
1711 BI->setCondition(NewCond);
1712 if (OldCond->use_empty())
1713 DeadInsts.emplace_back(OldCond);
1714 Changed = true;
1715 }
1716
1717 return Changed;
1718}
1719
1720//===----------------------------------------------------------------------===//
1721// IndVarSimplify driver. Manage several subpasses of IV simplification.
1722//===----------------------------------------------------------------------===//
1723
1724bool IndVarSimplify::run(Loop *L) {
1725 // We need (and expect!) the incoming loop to be in LCSSA.
1726 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&(static_cast <bool> (L->isRecursivelyLCSSAForm(*DT, *
LI) && "LCSSA required to run indvars!") ? void (0) :
__assert_fail ("L->isRecursivelyLCSSAForm(*DT, *LI) && \"LCSSA required to run indvars!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1727, __extension__ __PRETTY_FUNCTION__))
3
Assuming the condition is true
4
'?' condition is true
1727 "LCSSA required to run indvars!")(static_cast <bool> (L->isRecursivelyLCSSAForm(*DT, *
LI) && "LCSSA required to run indvars!") ? void (0) :
__assert_fail ("L->isRecursivelyLCSSAForm(*DT, *LI) && \"LCSSA required to run indvars!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1727, __extension__ __PRETTY_FUNCTION__))
;
1728
1729 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1730 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1731 // canonicalization can be a pessimization without LSR to "clean up"
1732 // afterwards.
1733 // - We depend on having a preheader; in particular,
1734 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1735 // and we're in trouble if we can't find the induction variable even when
1736 // we've manually inserted one.
1737 // - LFTR relies on having a single backedge.
1738 if (!L->isLoopSimplifyForm())
5
Assuming the condition is false
6
Taking false branch
1739 return false;
1740
1741#ifndef NDEBUG
1742 // Used below for a consistency check only
1743 // Note: Since the result returned by ScalarEvolution may depend on the order
1744 // in which previous results are added to its cache, the call to
1745 // getBackedgeTakenCount() may change following SCEV queries.
1746 const SCEV *BackedgeTakenCount;
7
'BackedgeTakenCount' declared without an initial value
1747 if (VerifyIndvars)
8
Assuming the condition is false
9
Taking false branch
1748 BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1749#endif
1750
1751 bool Changed = false;
1752 // If there are any floating-point recurrences, attempt to
1753 // transform them to use integer recurrences.
1754 Changed |= rewriteNonIntegerIVs(L);
1755
1756 // Create a rewriter object which we'll use to transform the code with.
1757 SCEVExpander Rewriter(*SE, DL, "indvars");
1758#ifndef NDEBUG
1759 Rewriter.setDebugType(DEBUG_TYPE"indvars");
1760#endif
1761
1762 // Eliminate redundant IV users.
1763 //
1764 // Simplification works best when run before other consumers of SCEV. We
1765 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1766 // other expressions involving loop IVs have been evaluated. This helps SCEV
1767 // set no-wrap flags before normalizing sign/zero extension.
1768 Rewriter.disableCanonicalMode();
1769 Changed |= simplifyAndExtend(L, Rewriter, LI);
1770
1771 // Check to see if we can compute the final value of any expressions
1772 // that are recurrent in the loop, and substitute the exit values from the
1773 // loop into any instructions outside of the loop that use the final values
1774 // of the current expressions.
1775 if (ReplaceExitValue != NeverRepl) {
10
Assuming the condition is false
11
Taking false branch
1776 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1777 ReplaceExitValue, DeadInsts)) {
1778 NumReplaced += Rewrites;
1779 Changed = true;
1780 }
1781 }
1782
1783 // Eliminate redundant IV cycles.
1784 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1785
1786 // Try to eliminate loop exits based on analyzeable exit counts
1787 if (optimizeLoopExits(L, Rewriter)) {
12
Taking false branch
1788 Changed = true;
1789 // Given we've changed exit counts, notify SCEV
1790 // Some nested loops may share same folded exit basic block,
1791 // thus we need to notify top most loop.
1792 SE->forgetTopmostLoop(L);
1793 }
1794
1795 // Try to form loop invariant tests for loop exits by changing how many
1796 // iterations of the loop run when that is unobservable.
1797 if (predicateLoopExits(L, Rewriter)) {
13
Assuming the condition is false
14
Taking false branch
1798 Changed = true;
1799 // Given we've changed exit counts, notify SCEV
1800 SE->forgetLoop(L);
1801 }
1802
1803 // If we have a trip count expression, rewrite the loop's exit condition
1804 // using it.
1805 if (!DisableLFTR) {
15
Assuming the condition is false
16
Taking false branch
1806 BasicBlock *PreHeader = L->getLoopPreheader();
1807 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1808
1809 SmallVector<BasicBlock*, 16> ExitingBlocks;
1810 L->getExitingBlocks(ExitingBlocks);
1811 for (BasicBlock *ExitingBB : ExitingBlocks) {
1812 // Can't rewrite non-branch yet.
1813 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1814 continue;
1815
1816 // If our exitting block exits multiple loops, we can only rewrite the
1817 // innermost one. Otherwise, we're changing how many times the innermost
1818 // loop runs before it exits.
1819 if (LI->getLoopFor(ExitingBB) != L)
1820 continue;
1821
1822 if (!needsLFTR(L, ExitingBB))
1823 continue;
1824
1825 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1826 if (isa<SCEVCouldNotCompute>(ExitCount))
1827 continue;
1828
1829 // This was handled above, but as we form SCEVs, we can sometimes refine
1830 // existing ones; this allows exit counts to be folded to zero which
1831 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1832 // until stable to handle cases like this better.
1833 if (ExitCount->isZero())
1834 continue;
1835
1836 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1837 if (!IndVar)
1838 continue;
1839
1840 // Avoid high cost expansions. Note: This heuristic is questionable in
1841 // that our definition of "high cost" is not exactly principled.
1842 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1843 TTI, PreHeaderBR))
1844 continue;
1845
1846 // Check preconditions for proper SCEVExpander operation. SCEV does not
1847 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1848 // any pass that uses the SCEVExpander must do it. This does not work
1849 // well for loop passes because SCEVExpander makes assumptions about
1850 // all loops, while LoopPassManager only forces the current loop to be
1851 // simplified.
1852 //
1853 // FIXME: SCEV expansion has no way to bail out, so the caller must
1854 // explicitly check any assumptions made by SCEV. Brittle.
1855 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1856 if (!AR || AR->getLoop()->getLoopPreheader())
1857 Changed |= linearFunctionTestReplace(L, ExitingBB,
1858 ExitCount, IndVar,
1859 Rewriter);
1860 }
1861 }
1862 // Clear the rewriter cache, because values that are in the rewriter's cache
1863 // can be deleted in the loop below, causing the AssertingVH in the cache to
1864 // trigger.
1865 Rewriter.clear();
1866
1867 // Now that we're done iterating through lists, clean up any instructions
1868 // which are now dead.
1869 while (!DeadInsts.empty()) {
17
Loop condition is false. Execution continues on line 1883
1870 Value *V = DeadInsts.pop_back_val();
1871
1872 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1873 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1874 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1875 Changed |=
1876 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1877 }
1878
1879 // The Rewriter may not be used from this point on.
1880
1881 // Loop-invariant instructions in the preheader that aren't used in the
1882 // loop may be sunk below the loop to reduce register pressure.
1883 Changed |= sinkUnusedInvariants(L);
1884
1885 // rewriteFirstIterationLoopExitValues does not rely on the computation of
1886 // trip count and therefore can further simplify exit values in addition to
1887 // rewriteLoopExitValues.
1888 Changed |= rewriteFirstIterationLoopExitValues(L);
1889
1890 // Clean up dead instructions.
1891 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1892
1893 // Check a post-condition.
1894 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&(static_cast <bool> (L->isRecursivelyLCSSAForm(*DT, *
LI) && "Indvars did not preserve LCSSA!") ? void (0) :
__assert_fail ("L->isRecursivelyLCSSAForm(*DT, *LI) && \"Indvars did not preserve LCSSA!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1895, __extension__ __PRETTY_FUNCTION__))
18
Assuming the condition is true
19
'?' condition is true
1895 "Indvars did not preserve LCSSA!")(static_cast <bool> (L->isRecursivelyLCSSAForm(*DT, *
LI) && "Indvars did not preserve LCSSA!") ? void (0) :
__assert_fail ("L->isRecursivelyLCSSAForm(*DT, *LI) && \"Indvars did not preserve LCSSA!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1895, __extension__ __PRETTY_FUNCTION__))
;
1896
1897 // Verify that LFTR, and any other change have not interfered with SCEV's
1898 // ability to compute trip count. We may have *changed* the exit count, but
1899 // only by reducing it.
1900#ifndef NDEBUG
1901 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
20
Assuming the condition is true
21
Assuming 'BackedgeTakenCount' is not a 'SCEVCouldNotCompute'
22
Taking true branch
1902 SE->forgetLoop(L);
1903 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1904 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
23
Called C++ object pointer is uninitialized
1905 SE->getTypeSizeInBits(NewBECount->getType()))
1906 NewBECount = SE->getTruncateOrNoop(NewBECount,
1907 BackedgeTakenCount->getType());
1908 else
1909 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1910 NewBECount->getType());
1911 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,(static_cast <bool> (!SE->isKnownPredicate(ICmpInst::
ICMP_ULT, BackedgeTakenCount, NewBECount) && "indvars must preserve SCEV"
) ? void (0) : __assert_fail ("!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, NewBECount) && \"indvars must preserve SCEV\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1912, __extension__ __PRETTY_FUNCTION__))
1912 NewBECount) && "indvars must preserve SCEV")(static_cast <bool> (!SE->isKnownPredicate(ICmpInst::
ICMP_ULT, BackedgeTakenCount, NewBECount) && "indvars must preserve SCEV"
) ? void (0) : __assert_fail ("!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, NewBECount) && \"indvars must preserve SCEV\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp"
, 1912, __extension__ __PRETTY_FUNCTION__))
;
1913 }
1914 if (VerifyMemorySSA && MSSAU)
1915 MSSAU->getMemorySSA()->verifyMemorySSA();
1916#endif
1917
1918 return Changed;
1919}
1920
1921PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
1922 LoopStandardAnalysisResults &AR,
1923 LPMUpdater &) {
1924 Function *F = L.getHeader()->getParent();
1925 const DataLayout &DL = F->getParent()->getDataLayout();
1926
1927 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
1928 WidenIndVars && AllowIVWidening);
1
Assuming field 'WidenIndVars' is false
1929 if (!IVS.run(&L))
2
Calling 'IndVarSimplify::run'
1930 return PreservedAnalyses::all();
1931
1932 auto PA = getLoopPassPreservedAnalyses();
1933 PA.preserveSet<CFGAnalyses>();
1934 if (AR.MSSA)
1935 PA.preserve<MemorySSAAnalysis>();
1936 return PA;
1937}
1938
1939namespace {
1940
1941struct IndVarSimplifyLegacyPass : public LoopPass {
1942 static char ID; // Pass identification, replacement for typeid
1943
1944 IndVarSimplifyLegacyPass() : LoopPass(ID) {
1945 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
1946 }
1947
1948 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1949 if (skipLoop(L))
1950 return false;
1951
1952 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1953 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1954 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1955 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
1956 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
1957 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
1958 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
1959 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1960 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
1961 MemorySSA *MSSA = nullptr;
1962 if (MSSAAnalysis)
1963 MSSA = &MSSAAnalysis->getMSSA();
1964
1965 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
1966 return IVS.run(L);
1967 }
1968
1969 void getAnalysisUsage(AnalysisUsage &AU) const override {
1970 AU.setPreservesCFG();
1971 AU.addPreserved<MemorySSAWrapperPass>();
1972 getLoopAnalysisUsage(AU);
1973 }
1974};
1975
1976} // end anonymous namespace
1977
1978char IndVarSimplifyLegacyPass::ID = 0;
1979
1980INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",static void *initializeIndVarSimplifyLegacyPassPassOnce(PassRegistry
&Registry) {
1981 "Induction Variable Simplification", false, false)static void *initializeIndVarSimplifyLegacyPassPassOnce(PassRegistry
&Registry) {
1982INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1983INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",PassInfo *PI = new PassInfo( "Induction Variable Simplification"
, "indvars", &IndVarSimplifyLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<IndVarSimplifyLegacyPass>), false, false
); Registry.registerPass(*PI, true); return PI; } static llvm
::once_flag InitializeIndVarSimplifyLegacyPassPassFlag; void llvm
::initializeIndVarSimplifyLegacyPassPass(PassRegistry &Registry
) { llvm::call_once(InitializeIndVarSimplifyLegacyPassPassFlag
, initializeIndVarSimplifyLegacyPassPassOnce, std::ref(Registry
)); }
1984 "Induction Variable Simplification", false, false)PassInfo *PI = new PassInfo( "Induction Variable Simplification"
, "indvars", &IndVarSimplifyLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<IndVarSimplifyLegacyPass>), false, false
); Registry.registerPass(*PI, true); return PI; } static llvm
::once_flag InitializeIndVarSimplifyLegacyPassPassFlag; void llvm
::initializeIndVarSimplifyLegacyPassPass(PassRegistry &Registry
) { llvm::call_once(InitializeIndVarSimplifyLegacyPassPassFlag
, initializeIndVarSimplifyLegacyPassPassOnce, std::ref(Registry
)); }
1985
1986Pass *llvm::createIndVarSimplifyPass() {
1987 return new IndVarSimplifyLegacyPass();
1988}