LLVM  15.0.0git
LoopFlatten.cpp
Go to the documentation of this file.
1 //===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass flattens pairs nested loops into a single loop.
10 //
11 // The intention is to optimise loop nests like this, which together access an
12 // array linearly:
13 //
14 // for (int i = 0; i < N; ++i)
15 // for (int j = 0; j < M; ++j)
16 // f(A[i*M+j]);
17 //
18 // into one loop:
19 //
20 // for (int i = 0; i < (N*M); ++i)
21 // f(A[i]);
22 //
23 // It can also flatten loops where the induction variables are not used in the
24 // loop. This is only worth doing if the induction variables are only used in an
25 // expression like i*M+j. If they had any other uses, we would have to insert a
26 // div/mod to reconstruct the original values, so this wouldn't be profitable.
27 //
28 // We also need to prove that N*M will not overflow. The preferred solution is
29 // to widen the IV, which avoids overflow checks, so that is tried first. If
30 // the IV cannot be widened, then we try to determine that this new tripcount
31 // expression won't overflow.
32 //
33 // Q: Does LoopFlatten use SCEV?
34 // Short answer: Yes and no.
35 //
36 // Long answer:
37 // For this transformation to be valid, we require all uses of the induction
38 // variables to be linear expressions of the form i*M+j. The different Loop
39 // APIs are used to get some loop components like the induction variable,
40 // compare statement, etc. In addition, we do some pattern matching to find the
41 // linear expressions and other loop components like the loop increment. The
42 // latter are examples of expressions that do use the induction variable, but
43 // are safe to ignore when we check all uses to be of the form i*M+j. We keep
44 // track of all of this in bookkeeping struct FlattenInfo.
45 // We assume the loops to be canonical, i.e. starting at 0 and increment with
46 // 1. This makes RHS of the compare the loop tripcount (with the right
47 // predicate). We use SCEV to then sanity check that this tripcount matches
48 // with the tripcount as computed by SCEV.
49 //
50 //===----------------------------------------------------------------------===//
51 
53 
54 #include "llvm/ADT/Statistic.h"
56 #include "llvm/Analysis/LoopInfo.h"
63 #include "llvm/IR/Dominators.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/IRBuilder.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/IR/PatternMatch.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Debug.h"
72 #include "llvm/Transforms/Scalar.h"
78 
79 using namespace llvm;
80 using namespace llvm::PatternMatch;
81 
82 #define DEBUG_TYPE "loop-flatten"
83 
84 STATISTIC(NumFlattened, "Number of loops flattened");
85 
87  "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
88  cl::desc("Limit on the cost of instructions that can be repeated due to "
89  "loop flattening"));
90 
91 static cl::opt<bool>
92  AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
93  cl::init(false),
94  cl::desc("Assume that the product of the two iteration "
95  "trip counts will never overflow"));
96 
97 static cl::opt<bool>
98  WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true),
99  cl::desc("Widen the loop induction variables, if possible, so "
100  "overflow checks won't reject flattening"));
101 
102 // We require all uses of both induction variables to match this pattern:
103 //
104 // (OuterPHI * InnerTripCount) + InnerPHI
105 //
106 // I.e., it needs to be a linear expression of the induction variables and the
107 // inner loop trip count. We keep track of all different expressions on which
108 // checks will be performed in this bookkeeping struct.
109 //
110 struct FlattenInfo {
111  Loop *OuterLoop = nullptr; // The loop pair to be flattened.
112  Loop *InnerLoop = nullptr;
113 
114  PHINode *InnerInductionPHI = nullptr; // These PHINodes correspond to loop
115  PHINode *OuterInductionPHI = nullptr; // induction variables, which are
116  // expected to start at zero and
117  // increment by one on each loop.
118 
119  Value *InnerTripCount = nullptr; // The product of these two tripcounts
120  Value *OuterTripCount = nullptr; // will be the new flattened loop
121  // tripcount. Also used to recognise a
122  // linear expression that will be replaced.
123 
124  SmallPtrSet<Value *, 4> LinearIVUses; // Contains the linear expressions
125  // of the form i*M+j that will be
126  // replaced.
127 
128  BinaryOperator *InnerIncrement = nullptr; // Uses of induction variables in
129  BinaryOperator *OuterIncrement = nullptr; // loop control statements that
130  BranchInst *InnerBranch = nullptr; // are safe to ignore.
131 
132  BranchInst *OuterBranch = nullptr; // The instruction that needs to be
133  // updated with new tripcount.
134 
136 
137  bool Widened = false; // Whether this holds the flatten info before or after
138  // widening.
139 
140  PHINode *NarrowInnerInductionPHI = nullptr; // Holds the old/narrow induction
141  PHINode *NarrowOuterInductionPHI = nullptr; // phis, i.e. the Phis before IV
142  // has been apllied. Used to skip
143  // checks on phi nodes.
144 
145  FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL){};
146 
148  // This can't be the narrow phi if we haven't widened the IV first.
149  if (!Widened)
150  return false;
151  return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
152  }
154  return InnerIncrement == U;
155  }
157  return OuterIncrement == U;
158  }
160  return InnerBranch->getCondition() == U;
161  }
162 
164  for (User *U : OuterInductionPHI->users()) {
165  if (isOuterLoopIncrement(U))
166  continue;
167 
168  auto IsValidOuterPHIUses = [&] (User *U) -> bool {
169  LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
170  if (!ValidOuterPHIUses.count(U)) {
171  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
172  return false;
173  }
174  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
175  return true;
176  };
177 
178  if (auto *V = dyn_cast<TruncInst>(U)) {
179  for (auto *K : V->users()) {
180  if (!IsValidOuterPHIUses(K))
181  return false;
182  }
183  continue;
184  }
185 
186  if (!IsValidOuterPHIUses(U))
187  return false;
188  }
189  return true;
190  }
191 
192  bool matchLinearIVUser(User *U, Value *InnerTripCount,
193  SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
194  LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
195  Value *MatchedMul = nullptr;
196  Value *MatchedItCount = nullptr;
197 
198  bool IsAdd = match(U, m_c_Add(m_Specific(InnerInductionPHI),
199  m_Value(MatchedMul))) &&
200  match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI),
201  m_Value(MatchedItCount)));
202 
203  // Matches the same pattern as above, except it also looks for truncs
204  // on the phi, which can be the result of widening the induction variables.
205  bool IsAddTrunc =
206  match(U, m_c_Add(m_Trunc(m_Specific(InnerInductionPHI)),
207  m_Value(MatchedMul))) &&
208  match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)),
209  m_Value(MatchedItCount)));
210 
211  if (!MatchedItCount)
212  return false;
213 
214  // Look through extends if the IV has been widened.
215  if (Widened &&
216  (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
217  assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
218  "Unexpected type mismatch in types after widening");
219  MatchedItCount = isa<SExtInst>(MatchedItCount)
220  ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
221  : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
222  }
223 
224  if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
225  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
226  ValidOuterPHIUses.insert(MatchedMul);
227  LinearIVUses.insert(U);
228  return true;
229  }
230 
231  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
232  return false;
233  }
234 
236  Value *SExtInnerTripCount = InnerTripCount;
237  if (Widened &&
238  (isa<SExtInst>(InnerTripCount) || isa<ZExtInst>(InnerTripCount)))
239  SExtInnerTripCount = cast<Instruction>(InnerTripCount)->getOperand(0);
240 
241  for (User *U : InnerInductionPHI->users()) {
242  if (isInnerLoopIncrement(U))
243  continue;
244 
245  // After widening the IVs, a trunc instruction might have been introduced,
246  // so look through truncs.
247  if (isa<TruncInst>(U)) {
248  if (!U->hasOneUse())
249  return false;
250  U = *U->user_begin();
251  }
252 
253  // If the use is in the compare (which is also the condition of the inner
254  // branch) then the compare has been altered by another transformation e.g
255  // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
256  // a constant. Ignore this use as the compare gets removed later anyway.
257  if (isInnerLoopTest(U))
258  continue;
259 
260  if (!matchLinearIVUser(U, SExtInnerTripCount, ValidOuterPHIUses))
261  return false;
262  }
263  return true;
264  }
265 };
266 
267 static bool
268 setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
269  SmallPtrSetImpl<Instruction *> &IterationInstructions) {
270  TripCount = TC;
271  IterationInstructions.insert(Increment);
272  LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
273  LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
274  LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
275  return true;
276 }
277 
278 // Given the RHS of the loop latch compare instruction, verify with SCEV
279 // that this is indeed the loop tripcount.
280 // TODO: This used to be a straightforward check but has grown to be quite
281 // complicated now. It is therefore worth revisiting what the additional
282 // benefits are of this (compared to relying on canonical loops and pattern
283 // matching).
284 static bool verifyTripCount(Value *RHS, Loop *L,
285  SmallPtrSetImpl<Instruction *> &IterationInstructions,
286  PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
287  BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
288  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
289  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
290  LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
291  return false;
292  }
293 
294  // The Extend=false flag is used for getTripCountFromExitCount as we want
295  // to verify and match it with the pattern matched tripcount. Please note
296  // that overflow checks are performed in checkOverflow, but are first tried
297  // to avoid by widening the IV.
298  const SCEV *SCEVTripCount =
299  SE->getTripCountFromExitCount(BackedgeTakenCount, /*Extend=*/false);
300 
301  const SCEV *SCEVRHS = SE->getSCEV(RHS);
302  if (SCEVRHS == SCEVTripCount)
303  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
304  ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
305  if (ConstantRHS) {
306  const SCEV *BackedgeTCExt = nullptr;
307  if (IsWidened) {
308  const SCEV *SCEVTripCountExt;
309  // Find the extended backedge taken count and extended trip count using
310  // SCEV. One of these should now match the RHS of the compare.
311  BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
312  SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
313  if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
314  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
315  return false;
316  }
317  }
318  // If the RHS of the compare is equal to the backedge taken count we need
319  // to add one to get the trip count.
320  if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
321  ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
322  Value *NewRHS = ConstantInt::get(
323  ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
324  return setLoopComponents(NewRHS, TripCount, Increment,
325  IterationInstructions);
326  }
327  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
328  }
329  // If the RHS isn't a constant then check that the reason it doesn't match
330  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
331  // (and take the trip count to be the RHS).
332  if (!IsWidened) {
333  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
334  return false;
335  }
336  auto *TripCountInst = dyn_cast<Instruction>(RHS);
337  if (!TripCountInst) {
338  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
339  return false;
340  }
341  if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
342  SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
343  LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
344  return false;
345  }
346  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
347 }
348 
349 // Finds the induction variable, increment and trip count for a simple loop that
350 // we can flatten.
351 static bool findLoopComponents(
352  Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
353  PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
354  BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
355  LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
356 
357  if (!L->isLoopSimplifyForm()) {
358  LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
359  return false;
360  }
361 
362  // Currently, to simplify the implementation, the Loop induction variable must
363  // start at zero and increment with a step size of one.
364  if (!L->isCanonical(*SE)) {
365  LLVM_DEBUG(dbgs() << "Loop is not canonical\n");
366  return false;
367  }
368 
369  // There must be exactly one exiting block, and it must be the same at the
370  // latch.
371  BasicBlock *Latch = L->getLoopLatch();
372  if (L->getExitingBlock() != Latch) {
373  LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
374  return false;
375  }
376 
377  // Find the induction PHI. If there is no induction PHI, we can't do the
378  // transformation. TODO: could other variables trigger this? Do we have to
379  // search for the best one?
380  InductionPHI = L->getInductionVariable(*SE);
381  if (!InductionPHI) {
382  LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
383  return false;
384  }
385  LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
386 
387  bool ContinueOnTrue = L->contains(Latch->getTerminator()->getSuccessor(0));
388  auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
389  if (ContinueOnTrue)
390  return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
391  else
392  return Pred == CmpInst::ICMP_EQ;
393  };
394 
395  // Find Compare and make sure it is valid. getLatchCmpInst checks that the
396  // back branch of the latch is conditional.
398  if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
399  Compare->hasNUsesOrMore(2)) {
400  LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
401  return false;
402  }
403  BackBranch = cast<BranchInst>(Latch->getTerminator());
404  IterationInstructions.insert(BackBranch);
405  LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
406  IterationInstructions.insert(Compare);
407  LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
408 
409  // Find increment and trip count.
410  // There are exactly 2 incoming values to the induction phi; one from the
411  // pre-header and one from the latch. The incoming latch value is the
412  // increment variable.
413  Increment =
414  dyn_cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
415  if (Increment->hasNUsesOrMore(3)) {
416  LLVM_DEBUG(dbgs() << "Could not find valid increment\n");
417  return false;
418  }
419  // The trip count is the RHS of the compare. If this doesn't match the trip
420  // count computed by SCEV then this is because the trip count variable
421  // has been widened so the types don't match, or because it is a constant and
422  // another transformation has changed the compare (e.g. icmp ult %inc,
423  // tripcount -> icmp ult %j, tripcount-1), or both.
424  Value *RHS = Compare->getOperand(1);
425 
426  return verifyTripCount(RHS, L, IterationInstructions, InductionPHI, TripCount,
427  Increment, BackBranch, SE, IsWidened);
428 }
429 
430 static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
431  // All PHIs in the inner and outer headers must either be:
432  // - The induction PHI, which we are going to rewrite as one induction in
433  // the new loop. This is already checked by findLoopComponents.
434  // - An outer header PHI with all incoming values from outside the loop.
435  // LoopSimplify guarantees we have a pre-header, so we don't need to
436  // worry about that here.
437  // - Pairs of PHIs in the inner and outer headers, which implement a
438  // loop-carried dependency that will still be valid in the new loop. To
439  // be valid, this variable must be modified only in the inner loop.
440 
441  // The set of PHI nodes in the outer loop header that we know will still be
442  // valid after the transformation. These will not need to be modified (with
443  // the exception of the induction variable), but we do need to check that
444  // there are no unsafe PHI nodes.
445  SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
446  SafeOuterPHIs.insert(FI.OuterInductionPHI);
447 
448  // Check that all PHI nodes in the inner loop header match one of the valid
449  // patterns.
450  for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
451  // The induction PHIs break these rules, and that's OK because we treat
452  // them specially when doing the transformation.
453  if (&InnerPHI == FI.InnerInductionPHI)
454  continue;
455  if (FI.isNarrowInductionPhi(&InnerPHI))
456  continue;
457 
458  // Each inner loop PHI node must have two incoming values/blocks - one
459  // from the pre-header, and one from the latch.
460  assert(InnerPHI.getNumIncomingValues() == 2);
461  Value *PreHeaderValue =
462  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
463  Value *LatchValue =
464  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
465 
466  // The incoming value from the outer loop must be the PHI node in the
467  // outer loop header, with no modifications made in the top of the outer
468  // loop.
469  PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
470  if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
471  LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
472  return false;
473  }
474 
475  // The other incoming value must come from the inner loop, without any
476  // modifications in the tail end of the outer loop. We are in LCSSA form,
477  // so this will actually be a PHI in the inner loop's exit block, which
478  // only uses values from inside the inner loop.
479  PHINode *LCSSAPHI = dyn_cast<PHINode>(
481  if (!LCSSAPHI) {
482  LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
483  return false;
484  }
485 
486  // The value used by the LCSSA PHI must be the same one that the inner
487  // loop's PHI uses.
488  if (LCSSAPHI->hasConstantValue() != LatchValue) {
489  LLVM_DEBUG(
490  dbgs() << "LCSSA PHI incoming value does not match latch value\n");
491  return false;
492  }
493 
494  LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
495  LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
496  LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
497  SafeOuterPHIs.insert(OuterPHI);
498  FI.InnerPHIsToTransform.insert(&InnerPHI);
499  }
500 
501  for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
502  if (FI.isNarrowInductionPhi(&OuterPHI))
503  continue;
504  if (!SafeOuterPHIs.count(&OuterPHI)) {
505  LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
506  return false;
507  }
508  }
509 
510  LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
511  return true;
512 }
513 
514 static bool
516  SmallPtrSetImpl<Instruction *> &IterationInstructions,
517  const TargetTransformInfo *TTI) {
518  // Check for instructions in the outer but not inner loop. If any of these
519  // have side-effects then this transformation is not legal, and if there is
520  // a significant amount of code here which can't be optimised out that it's
521  // not profitable (as these instructions would get executed for each
522  // iteration of the inner loop).
523  InstructionCost RepeatedInstrCost = 0;
524  for (auto *B : FI.OuterLoop->getBlocks()) {
525  if (FI.InnerLoop->contains(B))
526  continue;
527 
528  for (auto &I : *B) {
529  if (!isa<PHINode>(&I) && !I.isTerminator() &&
531  LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
532  "side effects: ";
533  I.dump());
534  return false;
535  }
536  // The execution count of the outer loop's iteration instructions
537  // (increment, compare and branch) will be increased, but the
538  // equivalent instructions will be removed from the inner loop, so
539  // they make a net difference of zero.
540  if (IterationInstructions.count(&I))
541  continue;
542  // The uncoditional branch to the inner loop's header will turn into
543  // a fall-through, so adds no cost.
544  BranchInst *Br = dyn_cast<BranchInst>(&I);
545  if (Br && Br->isUnconditional() &&
546  Br->getSuccessor(0) == FI.InnerLoop->getHeader())
547  continue;
548  // Multiplies of the outer iteration variable and inner iteration
549  // count will be optimised out.
552  continue;
553  InstructionCost Cost =
555  LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
556  RepeatedInstrCost += Cost;
557  }
558  }
559 
560  LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
561  << RepeatedInstrCost << "\n");
562  // Bail out if flattening the loops would cause instructions in the outer
563  // loop but not in the inner loop to be executed extra times.
564  if (RepeatedInstrCost > RepeatedInstructionThreshold) {
565  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
566  return false;
567  }
568 
569  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
570  return true;
571 }
572 
573 
574 
575 // We require all uses of both induction variables to match this pattern:
576 //
577 // (OuterPHI * InnerTripCount) + InnerPHI
578 //
579 // Any uses of the induction variables not matching that pattern would
580 // require a div/mod to reconstruct in the flattened loop, so the
581 // transformation wouldn't be profitable.
582 static bool checkIVUsers(FlattenInfo &FI) {
583  // Check that all uses of the inner loop's induction variable match the
584  // expected pattern, recording the uses of the outer IV.
585  SmallPtrSet<Value *, 4> ValidOuterPHIUses;
586  if (!FI.checkInnerInductionPhiUsers(ValidOuterPHIUses))
587  return false;
588 
589  // Check that there are no uses of the outer IV other than the ones found
590  // as part of the pattern above.
591  if (!FI.checkOuterInductionPhiUsers(ValidOuterPHIUses))
592  return false;
593 
594  LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
595  dbgs() << "Found " << FI.LinearIVUses.size()
596  << " value(s) that can be replaced:\n";
597  for (Value *V : FI.LinearIVUses) {
598  dbgs() << " ";
599  V->dump();
600  });
601  return true;
602 }
603 
604 // Return an OverflowResult dependant on if overflow of the multiplication of
605 // InnerTripCount and OuterTripCount can be assumed not to happen.
607  AssumptionCache *AC) {
608  Function *F = FI.OuterLoop->getHeader()->getParent();
609  const DataLayout &DL = F->getParent()->getDataLayout();
610 
611  // For debugging/testing.
612  if (AssumeNoOverflow)
614 
615  // Check if the multiply could not overflow due to known ranges of the
616  // input values.
618  FI.InnerTripCount, FI.OuterTripCount, DL, AC,
621  return OR;
622 
623  for (Value *V : FI.LinearIVUses) {
624  for (Value *U : V->users()) {
625  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
626  for (Value *GEPUser : U->users()) {
627  auto *GEPUserInst = cast<Instruction>(GEPUser);
628  if (!isa<LoadInst>(GEPUserInst) &&
629  !(isa<StoreInst>(GEPUserInst) &&
630  GEP == GEPUserInst->getOperand(1)))
631  continue;
633  FI.InnerLoop))
634  continue;
635  // The IV is used as the operand of a GEP which dominates the loop
636  // latch, and the IV is at least as wide as the address space of the
637  // GEP. In this case, the GEP would wrap around the address space
638  // before the IV increment wraps, which would be UB.
639  if (GEP->isInBounds() &&
640  V->getType()->getIntegerBitWidth() >=
641  DL.getPointerTypeSizeInBits(GEP->getType())) {
642  LLVM_DEBUG(
643  dbgs() << "use of linear IV would be UB if overflow occurred: ";
644  GEP->dump());
646  }
647  }
648  }
649  }
650  }
651 
653 }
654 
657  const TargetTransformInfo *TTI) {
658  SmallPtrSet<Instruction *, 8> IterationInstructions;
659  if (!findLoopComponents(FI.InnerLoop, IterationInstructions,
661  FI.InnerIncrement, FI.InnerBranch, SE, FI.Widened))
662  return false;
663  if (!findLoopComponents(FI.OuterLoop, IterationInstructions,
665  FI.OuterIncrement, FI.OuterBranch, SE, FI.Widened))
666  return false;
667 
668  // Both of the loop trip count values must be invariant in the outer loop
669  // (non-instructions are all inherently invariant).
671  LLVM_DEBUG(dbgs() << "inner loop trip count not invariant\n");
672  return false;
673  }
675  LLVM_DEBUG(dbgs() << "outer loop trip count not invariant\n");
676  return false;
677  }
678 
679  if (!checkPHIs(FI, TTI))
680  return false;
681 
682  // FIXME: it should be possible to handle different types correctly.
684  return false;
685 
686  if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
687  return false;
688 
689  // Find the values in the loop that can be replaced with the linearized
690  // induction variable, and check that there are no other uses of the inner
691  // or outer induction variable. If there were, we could still do this
692  // transformation, but we'd have to insert a div/mod to calculate the
693  // original IVs, so it wouldn't be profitable.
694  if (!checkIVUsers(FI))
695  return false;
696 
697  LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
698  return true;
699 }
700 
704  MemorySSAUpdater *MSSAU) {
705  Function *F = FI.OuterLoop->getHeader()->getParent();
706  LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
707  {
708  using namespace ore;
710  FI.InnerLoop->getHeader());
712  Remark << "Flattened into outer loop";
713  ORE.emit(Remark);
714  }
715 
716  Value *NewTripCount = BinaryOperator::CreateMul(
717  FI.InnerTripCount, FI.OuterTripCount, "flatten.tripcount",
719  LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
720  NewTripCount->dump());
721 
722  // Fix up PHI nodes that take values from the inner loop back-edge, which
723  // we are about to remove.
725 
726  // The old Phi will be optimised away later, but for now we can't leave
727  // leave it in an invalid state, so are updating them too.
728  for (PHINode *PHI : FI.InnerPHIsToTransform)
730 
731  // Modify the trip count of the outer loop to be the product of the two
732  // trip counts.
733  cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
734 
735  // Replace the inner loop backedge with an unconditional branch to the exit.
736  BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
737  BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
738  InnerExitingBlock->getTerminator()->eraseFromParent();
739  BranchInst::Create(InnerExitBlock, InnerExitingBlock);
740 
741  // Update the DomTree and MemorySSA.
742  DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
743  if (MSSAU)
744  MSSAU->removeEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
745 
746  // Replace all uses of the polynomial calculated from the two induction
747  // variables with the one new one.
749  for (Value *V : FI.LinearIVUses) {
750  Value *OuterValue = FI.OuterInductionPHI;
751  if (FI.Widened)
752  OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
753  "flatten.trunciv");
754 
755  LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); dbgs() << "with: ";
756  OuterValue->dump());
757  V->replaceAllUsesWith(OuterValue);
758  }
759 
760  // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
761  // deleted, and any information that have about the outer loop invalidated.
762  SE->forgetLoop(FI.OuterLoop);
763  SE->forgetLoop(FI.InnerLoop);
764  if (U)
766  LI->erase(FI.InnerLoop);
767 
768  // Increment statistic value.
769  NumFlattened++;
770 
771  return true;
772 }
773 
774 static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
776  const TargetTransformInfo *TTI) {
777  if (!WidenIV) {
778  LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
779  return false;
780  }
781 
782  LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
784  auto &DL = M->getDataLayout();
785  auto *InnerType = FI.InnerInductionPHI->getType();
786  auto *OuterType = FI.OuterInductionPHI->getType();
787  unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
788  auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
789 
790  // If both induction types are less than the maximum legal integer width,
791  // promote both to the widest type available so we know calculating
792  // (OuterTripCount * InnerTripCount) as the new trip count is safe.
793  if (InnerType != OuterType ||
794  InnerType->getScalarSizeInBits() >= MaxLegalSize ||
795  MaxLegalType->getScalarSizeInBits() <
796  InnerType->getScalarSizeInBits() * 2) {
797  LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
798  return false;
799  }
800 
801  SCEVExpander Rewriter(*SE, DL, "loopflatten");
803  unsigned ElimExt = 0;
804  unsigned Widened = 0;
805 
806  auto CreateWideIV = [&](WideIVInfo WideIV, bool &Deleted) -> bool {
807  PHINode *WidePhi =
808  createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts, ElimExt, Widened,
809  true /* HasGuards */, true /* UsePostIncrementRanges */);
810  if (!WidePhi)
811  return false;
812  LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
813  LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
815  return true;
816  };
817 
818  bool Deleted;
819  if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false}, Deleted))
820  return false;
821  // Add the narrow phi to list, so that it will be adjusted later when the
822  // the transformation is performed.
823  if (!Deleted)
825 
826  if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false}, Deleted))
827  return false;
828 
829  assert(Widened && "Widened IV expected");
830  FI.Widened = true;
831 
832  // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
835 
836  // After widening, rediscover all the loop components.
837  return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
838 }
839 
843  MemorySSAUpdater *MSSAU) {
844  LLVM_DEBUG(
845  dbgs() << "Loop flattening running on outer loop "
846  << FI.OuterLoop->getHeader()->getName() << " and inner loop "
847  << FI.InnerLoop->getHeader()->getName() << " in "
848  << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
849 
850  if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
851  return false;
852 
853  // Check if we can widen the induction variables to avoid overflow checks.
854  bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
855 
856  // It can happen that after widening of the IV, flattening may not be
857  // possible/happening, e.g. when it is deemed unprofitable. So bail here if
858  // that is the case.
859  // TODO: IV widening without performing the actual flattening transformation
860  // is not ideal. While this codegen change should not matter much, it is an
861  // unnecessary change which is better to avoid. It's unlikely this happens
862  // often, because if it's unprofitibale after widening, it should be
863  // unprofitabe before widening as checked in the first round of checks. But
864  // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
865  // relaxed. Because this is making a code change (the IV widening, but not
866  // the flattening), we return true here.
867  if (FI.Widened && !CanFlatten)
868  return true;
869 
870  // If we have widened and can perform the transformation, do that here.
871  if (CanFlatten)
872  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
873 
874  // Otherwise, if we haven't widened the IV, check if the new iteration
875  // variable might overflow. In this case, we need to version the loop, and
876  // select the original version at runtime if the iteration space is too
877  // large.
878  // TODO: We currently don't version the loop.
879  OverflowResult OR = checkOverflow(FI, DT, AC);
882  LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
883  return false;
884  } else if (OR == OverflowResult::MayOverflow) {
885  LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
886  return false;
887  }
888 
889  LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
890  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
891 }
892 
895  MemorySSAUpdater *MSSAU) {
896  bool Changed = false;
897  for (Loop *InnerLoop : LN.getLoops()) {
898  auto *OuterLoop = InnerLoop->getParentLoop();
899  if (!OuterLoop)
900  continue;
901  FlattenInfo FI(OuterLoop, InnerLoop);
902  Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
903  }
904  return Changed;
905 }
906 
909  LPMUpdater &U) {
910 
911  bool Changed = false;
912 
914  if (AR.MSSA) {
915  MSSAU = MemorySSAUpdater(AR.MSSA);
916  if (VerifyMemorySSA)
917  AR.MSSA->verifyMemorySSA();
918  }
919 
920  // The loop flattening pass requires loops to be
921  // in simplified form, and also needs LCSSA. Running
922  // this pass will simplify all loops that contain inner loops,
923  // regardless of whether anything ends up being flattened.
924  Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
925  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
926 
927  if (!Changed)
928  return PreservedAnalyses::all();
929 
930  if (AR.MSSA && VerifyMemorySSA)
931  AR.MSSA->verifyMemorySSA();
932 
933  auto PA = getLoopPassPreservedAnalyses();
934  if (AR.MSSA)
935  PA.preserve<MemorySSAAnalysis>();
936  return PA;
937 }
938 
939 namespace {
940 class LoopFlattenLegacyPass : public FunctionPass {
941 public:
942  static char ID; // Pass ID, replacement for typeid
943  LoopFlattenLegacyPass() : FunctionPass(ID) {
945  }
946 
947  // Possibly flatten loop L into its child.
948  bool runOnFunction(Function &F) override;
949 
950  void getAnalysisUsage(AnalysisUsage &AU) const override {
957  }
958 };
959 } // namespace
960 
962 INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
963  false, false)
966 INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
968 
970  return new LoopFlattenLegacyPass();
971 }
972 
974  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
975  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
976  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
977  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
978  auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
979  auto *TTI = &TTIP.getTTI(F);
980  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
981  auto *MSSA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
982 
984  if (MSSA)
985  MSSAU = MemorySSAUpdater(&MSSA->getMSSA());
986 
987  bool Changed = false;
988  for (Loop *L : *LI) {
989  auto LN = LoopNest::getLoopNest(*L, *SE);
990  Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr,
991  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
992  }
993  return Changed;
994 }
checkOuterLoopInsts
static bool checkOuterLoopInsts(FlattenInfo &FI, SmallPtrSetImpl< Instruction * > &IterationInstructions, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:515
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", false, false) INITIALIZE_PASS_END(LoopFlattenLegacyPass
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
FlattenLoopPair
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU)
Definition: LoopFlatten.cpp:840
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::createWideIV
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Definition: SimplifyIndVar.cpp:2042
llvm::Loop::getLatchCmpInst
ICmpInst * getLatchCmpInst() const
Get the latch condition instruction.
Definition: LoopInfo.cpp:170
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1906
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:53
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::OverflowResult::NeverOverflows
@ NeverOverflows
Never overflows.
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:81
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4576
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::computeOverflowForUnsignedMul
OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo=true)
Definition: ValueTracking.cpp:4706
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
ScalarEvolutionExpander.h
Scalar.h
flatten
loop flatten
Definition: LoopFlatten.cpp:966
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4823
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:630
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
FlattenInfo::InnerInductionPHI
PHINode * InnerInductionPHI
Definition: LoopFlatten.cpp:114
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::PHINode::removeIncomingValue
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Definition: Instructions.cpp:113
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
checkPHIs
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:430
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:144
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
FlattenInfo::OuterIncrement
BinaryOperator * OuterIncrement
Definition: LoopFlatten.cpp:129
LoopFlatten.h
ScalarEvolution.h
llvm::OverflowResult::AlwaysOverflowsLow
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
Module.h
findLoopComponents
static bool findLoopComponents(Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
Definition: LoopFlatten.cpp:351
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::Loop::isCanonical
bool isCanonical(ScalarEvolution &SE) const
Return true if the loop induction variable starts at zero and increments by one each time through the...
Definition: LoopInfo.cpp:407
FlattenInfo::checkOuterInductionPhiUsers
bool checkOuterInductionPhiUsers(SmallPtrSet< Value *, 4 > &ValidOuterPHIUses)
Definition: LoopFlatten.cpp:163
FlattenInfo::OuterInductionPHI
PHINode * OuterInductionPHI
Definition: LoopFlatten.cpp:115
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet< Value *, 4 >
FlattenInfo::OuterLoop
Loop * OuterLoop
Definition: LoopFlatten.cpp:111
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopFlatten.cpp:82
FlattenInfo::LinearIVUses
SmallPtrSet< Value *, 4 > LinearIVUses
Definition: LoopFlatten.cpp:124
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
FlattenInfo::OuterBranch
BranchInst * OuterBranch
Definition: LoopFlatten.cpp:132
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::WideIVInfo
Collect information about induction variables that are used by sign/zero extend operations.
Definition: SimplifyIndVar.h:64
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RecursivelyDeleteDeadPHINode
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:627
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:277
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:283
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Flatten
bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU)
Definition: LoopFlatten.cpp:893
LoopDeletionResult::Deleted
@ Deleted
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
FlattenInfo::Widened
bool Widened
Definition: LoopFlatten.cpp:137
llvm::ScalarEvolution::getTripCountFromExitCount
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount, bool Extend=true)
Convert from an "exit count" (i.e.
Definition: ScalarEvolution.cpp:7620
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::OverflowResult::MayOverflow
@ MayOverflow
May or may not overflow.
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::PatternMatch::m_c_Add
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
Definition: PatternMatch.h:2243
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2849
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
FlattenInfo::isInnerLoopIncrement
bool isInnerLoopIncrement(User *U)
Definition: LoopFlatten.cpp:153
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:355
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::WideIVInfo::NarrowIV
PHINode * NarrowIV
Definition: SimplifyIndVar.h:65
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
LoopUtils.h
FlattenInfo::InnerBranch
BranchInst * InnerBranch
Definition: LoopFlatten.cpp:130
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
PatternMatch.h
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:869
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::OverflowResult
OverflowResult
Definition: ValueTracking.h:486
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3180
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4398
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
llvm::HighlightColor::Remark
@ Remark
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:475
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
FlattenInfo::OuterTripCount
Value * OuterTripCount
Definition: LoopFlatten.cpp:120
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
setLoopComponents
static bool setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment, SmallPtrSetImpl< Instruction * > &IterationInstructions)
Definition: LoopFlatten.cpp:268
llvm::isGuaranteedToExecuteForEveryIteration
bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L)
Return true if this function can prove that the instruction I is executed for every iteration of the ...
Definition: ValueTracking.cpp:5339
llvm::PHINode::hasConstantValue
Value * hasConstantValue() const
If the specified PHI node always merges together the same value, return the value,...
Definition: Instructions.cpp:152
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:531
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
CanWidenIV
static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:774
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2514
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
AssumeNoOverflow
static cl::opt< bool > AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, cl::init(false), cl::desc("Assume that the product of the two iteration " "trip counts will never overflow"))
RepeatedInstructionThreshold
static cl::opt< unsigned > RepeatedInstructionThreshold("loop-flatten-cost-threshold", cl::Hidden, cl::init(2), cl::desc("Limit on the cost of instructions that can be repeated due to " "loop flattening"))
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3155
FlattenInfo::checkInnerInductionPhiUsers
bool checkInnerInductionPhiUsers(SmallPtrSet< Value *, 4 > &ValidOuterPHIUses)
Definition: LoopFlatten.cpp:235
I
#define I(x, y, z)
Definition: MD5.cpp:58
FlattenInfo::isNarrowInductionPhi
bool isNarrowInductionPhi(PHINode *Phi)
Definition: LoopFlatten.cpp:147
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
checkIVUsers
static bool checkIVUsers(FlattenInfo &FI)
Definition: LoopFlatten.cpp:582
FlattenInfo::isInnerLoopTest
bool isInnerLoopTest(User *U)
Definition: LoopFlatten.cpp:159
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
FlattenInfo::InnerIncrement
BinaryOperator * InnerIncrement
Definition: LoopFlatten.cpp:128
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
LoopNestAnalysis.h
llvm::LoopInfo::erase
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:875
DoFlattenLoopPair
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU)
Definition: LoopFlatten.cpp:701
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:282
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LoopPassManager.h
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
FlattenInfo::FlattenInfo
FlattenInfo(Loop *OL, Loop *IL)
Definition: LoopFlatten.cpp:145
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3177
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
checkOverflow
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, AssumptionCache *AC)
Definition: LoopFlatten.cpp:606
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:214
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
verifyTripCount
static bool verifyTripCount(Value *RHS, Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
Definition: LoopFlatten.cpp:284
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:217
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
FlattenInfo::NarrowInnerInductionPHI
PHINode * NarrowInnerInductionPHI
Definition: LoopFlatten.cpp:140
FlattenInfo::InnerTripCount
Value * InnerTripCount
Definition: LoopFlatten.cpp:119
FlattenInfo::NarrowOuterInductionPHI
PHINode * NarrowOuterInductionPHI
Definition: LoopFlatten.cpp:141
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
WidenIV
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
Definition: SimplifyIndVar.cpp:1128
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8006
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
FlattenInfo::InnerPHIsToTransform
SmallPtrSet< PHINode *, 4 > InnerPHIsToTransform
Definition: LoopFlatten.cpp:135
FlattenInfo::matchLinearIVUser
bool matchLinearIVUser(User *U, Value *InnerTripCount, SmallPtrSet< Value *, 4 > &ValidOuterPHIUses)
Definition: LoopFlatten.cpp:192
Function.h
llvm::initializeLoopFlattenLegacyPassPass
void initializeLoopFlattenLegacyPassPass(PassRegistry &)
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::OverflowResult::AlwaysOverflowsHigh
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
llvm::PatternMatch::m_c_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
Definition: PatternMatch.h:2250
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:117
CanFlattenLoopPair
static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:655
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1595
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
Dominators.h
CreateMul
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:243
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
FlattenInfo
Definition: LoopFlatten.cpp:110
TargetTransformInfo.h
llvm::createLoopFlattenPass
FunctionPass * createLoopFlattenPass()
Definition: LoopFlatten.cpp:969
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallPtrSetImpl< Instruction * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::LoopFlattenPass::run
PreservedAnalyses run(LoopNest &LN, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopFlatten.cpp:907
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
FlattenInfo::isOuterLoopIncrement
bool isOuterLoopIncrement(User *U)
Definition: LoopFlatten.cpp:156
llvm::Loop::getInductionVariable
PHINode * getInductionVariable(ScalarEvolution &SE) const
Return the loop induction variable if found, else return nullptr.
Definition: LoopInfo.cpp:290
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
llvm::LoopNest::getLoopNest
static std::unique_ptr< LoopNest > getLoopNest(Loop &Root, ScalarEvolution &SE)
Construct a LoopNest object.
Definition: LoopNestAnalysis.cpp:47
raw_ostream.h
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1621
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7876
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
FlattenInfo::InnerLoop
Loop * InnerLoop
Definition: LoopFlatten.cpp:112
loops
loop Flattens loops
Definition: LoopFlatten.cpp:966
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
SimplifyIndVar.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37