LLVM  14.0.0git
LoopFlatten.cpp
Go to the documentation of this file.
1 //===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass flattens pairs nested loops into a single loop.
10 //
11 // The intention is to optimise loop nests like this, which together access an
12 // array linearly:
13 // for (int i = 0; i < N; ++i)
14 // for (int j = 0; j < M; ++j)
15 // f(A[i*M+j]);
16 // into one loop:
17 // for (int i = 0; i < (N*M); ++i)
18 // f(A[i]);
19 //
20 // It can also flatten loops where the induction variables are not used in the
21 // loop. This is only worth doing if the induction variables are only used in an
22 // expression like i*M+j. If they had any other uses, we would have to insert a
23 // div/mod to reconstruct the original values, so this wouldn't be profitable.
24 //
25 // We also need to prove that N*M will not overflow.
26 //
27 //===----------------------------------------------------------------------===//
28 
30 
31 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/Module.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Verifier.h"
44 #include "llvm/InitializePasses.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Debug.h"
48 #include "llvm/Transforms/Scalar.h"
53 
54 using namespace llvm;
55 using namespace llvm::PatternMatch;
56 
57 #define DEBUG_TYPE "loop-flatten"
58 
59 STATISTIC(NumFlattened, "Number of loops flattened");
60 
62  "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
63  cl::desc("Limit on the cost of instructions that can be repeated due to "
64  "loop flattening"));
65 
66 static cl::opt<bool>
67  AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
68  cl::init(false),
69  cl::desc("Assume that the product of the two iteration "
70  "trip counts will never overflow"));
71 
72 static cl::opt<bool>
73  WidenIV("loop-flatten-widen-iv", cl::Hidden,
74  cl::init(true),
75  cl::desc("Widen the loop induction variables, if possible, so "
76  "overflow checks won't reject flattening"));
77 
78 struct FlattenInfo {
79  Loop *OuterLoop = nullptr;
80  Loop *InnerLoop = nullptr;
81  // These PHINodes correspond to loop induction variables, which are expected
82  // to start at zero and increment by one on each loop.
83  PHINode *InnerInductionPHI = nullptr;
84  PHINode *OuterInductionPHI = nullptr;
85  Value *InnerTripCount = nullptr;
86  Value *OuterTripCount = nullptr;
87  BinaryOperator *InnerIncrement = nullptr;
88  BinaryOperator *OuterIncrement = nullptr;
89  BranchInst *InnerBranch = nullptr;
90  BranchInst *OuterBranch = nullptr;
93 
94  // Whether this holds the flatten info before or after widening.
95  bool Widened = false;
96 
97  // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
98  // been applied. This bookkeeping is used so we can skip some checks on these
99  // phi nodes.
100  PHINode *NarrowInnerInductionPHI = nullptr;
101  PHINode *NarrowOuterInductionPHI = nullptr;
102 
103  FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
104 
106  // This can't be the narrow phi if we haven't widened the IV first.
107  if (!Widened)
108  return false;
109  return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
110  }
111 };
112 
113 static bool
114 setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
115  SmallPtrSetImpl<Instruction *> &IterationInstructions) {
116  TripCount = TC;
117  IterationInstructions.insert(Increment);
118  LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
119  LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
120  LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
121  return true;
122 }
123 
124 // Finds the induction variable, increment and trip count for a simple loop that
125 // we can flatten.
126 static bool findLoopComponents(
127  Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
128  PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
129  BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
130  LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
131 
132  if (!L->isLoopSimplifyForm()) {
133  LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
134  return false;
135  }
136 
137  // Currently, to simplify the implementation, the Loop induction variable must
138  // start at zero and increment with a step size of one.
139  if (!L->isCanonical(*SE)) {
140  LLVM_DEBUG(dbgs() << "Loop is not canonical\n");
141  return false;
142  }
143 
144  // There must be exactly one exiting block, and it must be the same at the
145  // latch.
146  BasicBlock *Latch = L->getLoopLatch();
147  if (L->getExitingBlock() != Latch) {
148  LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
149  return false;
150  }
151 
152  // Find the induction PHI. If there is no induction PHI, we can't do the
153  // transformation. TODO: could other variables trigger this? Do we have to
154  // search for the best one?
155  InductionPHI = L->getInductionVariable(*SE);
156  if (!InductionPHI) {
157  LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
158  return false;
159  }
160  LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
161 
162  bool ContinueOnTrue = L->contains(Latch->getTerminator()->getSuccessor(0));
163  auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
164  if (ContinueOnTrue)
165  return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
166  else
167  return Pred == CmpInst::ICMP_EQ;
168  };
169 
170  // Find Compare and make sure it is valid. getLatchCmpInst checks that the
171  // back branch of the latch is conditional.
173  if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
174  Compare->hasNUsesOrMore(2)) {
175  LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
176  return false;
177  }
178  BackBranch = cast<BranchInst>(Latch->getTerminator());
179  IterationInstructions.insert(BackBranch);
180  LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
181  IterationInstructions.insert(Compare);
182  LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
183 
184  // Find increment and trip count.
185  // There are exactly 2 incoming values to the induction phi; one from the
186  // pre-header and one from the latch. The incoming latch value is the
187  // increment variable.
188  Increment =
189  dyn_cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
190  if (Increment->hasNUsesOrMore(3)) {
191  LLVM_DEBUG(dbgs() << "Could not find valid increment\n");
192  return false;
193  }
194  // The trip count is the RHS of the compare. If this doesn't match the trip
195  // count computed by SCEV then this is because the trip count variable
196  // has been widened so the types don't match, or because it is a constant and
197  // another transformation has changed the compare (e.g. icmp ult %inc,
198  // tripcount -> icmp ult %j, tripcount-1), or both.
199  Value *RHS = Compare->getOperand(1);
200  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
201  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
202  LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
203  return false;
204  }
205  // The use of the Extend=false flag on getTripCountFromExitCount was added
206  // during a refactoring to preserve existing behavior. However, there's
207  // nothing obvious in the surrounding code when handles the overflow case.
208  // FIXME: audit code to establish whether there's a latent bug here.
209  const SCEV *SCEVTripCount =
210  SE->getTripCountFromExitCount(BackedgeTakenCount, false);
211  const SCEV *SCEVRHS = SE->getSCEV(RHS);
212  if (SCEVRHS == SCEVTripCount)
213  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
214  ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
215  if (ConstantRHS) {
216  const SCEV *BackedgeTCExt = nullptr;
217  if (IsWidened) {
218  const SCEV *SCEVTripCountExt;
219  // Find the extended backedge taken count and extended trip count using
220  // SCEV. One of these should now match the RHS of the compare.
221  BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
222  SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
223  if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
224  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
225  return false;
226  }
227  }
228  // If the RHS of the compare is equal to the backedge taken count we need
229  // to add one to get the trip count.
230  if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
231  ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
232  Value *NewRHS = ConstantInt::get(
233  ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
234  return setLoopComponents(NewRHS, TripCount, Increment,
235  IterationInstructions);
236  }
237  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
238  }
239  // If the RHS isn't a constant then check that the reason it doesn't match
240  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
241  // (and take the trip count to be the RHS).
242  if (!IsWidened) {
243  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
244  return false;
245  }
246  auto *TripCountInst = dyn_cast<Instruction>(RHS);
247  if (!TripCountInst) {
248  LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
249  return false;
250  }
251  if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
252  SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
253  LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
254  return false;
255  }
256  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
257 }
258 
259 static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
260  // All PHIs in the inner and outer headers must either be:
261  // - The induction PHI, which we are going to rewrite as one induction in
262  // the new loop. This is already checked by findLoopComponents.
263  // - An outer header PHI with all incoming values from outside the loop.
264  // LoopSimplify guarantees we have a pre-header, so we don't need to
265  // worry about that here.
266  // - Pairs of PHIs in the inner and outer headers, which implement a
267  // loop-carried dependency that will still be valid in the new loop. To
268  // be valid, this variable must be modified only in the inner loop.
269 
270  // The set of PHI nodes in the outer loop header that we know will still be
271  // valid after the transformation. These will not need to be modified (with
272  // the exception of the induction variable), but we do need to check that
273  // there are no unsafe PHI nodes.
274  SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
275  SafeOuterPHIs.insert(FI.OuterInductionPHI);
276 
277  // Check that all PHI nodes in the inner loop header match one of the valid
278  // patterns.
279  for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
280  // The induction PHIs break these rules, and that's OK because we treat
281  // them specially when doing the transformation.
282  if (&InnerPHI == FI.InnerInductionPHI)
283  continue;
284  if (FI.isNarrowInductionPhi(&InnerPHI))
285  continue;
286 
287  // Each inner loop PHI node must have two incoming values/blocks - one
288  // from the pre-header, and one from the latch.
289  assert(InnerPHI.getNumIncomingValues() == 2);
290  Value *PreHeaderValue =
291  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
292  Value *LatchValue =
293  InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
294 
295  // The incoming value from the outer loop must be the PHI node in the
296  // outer loop header, with no modifications made in the top of the outer
297  // loop.
298  PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
299  if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
300  LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
301  return false;
302  }
303 
304  // The other incoming value must come from the inner loop, without any
305  // modifications in the tail end of the outer loop. We are in LCSSA form,
306  // so this will actually be a PHI in the inner loop's exit block, which
307  // only uses values from inside the inner loop.
308  PHINode *LCSSAPHI = dyn_cast<PHINode>(
310  if (!LCSSAPHI) {
311  LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
312  return false;
313  }
314 
315  // The value used by the LCSSA PHI must be the same one that the inner
316  // loop's PHI uses.
317  if (LCSSAPHI->hasConstantValue() != LatchValue) {
318  LLVM_DEBUG(
319  dbgs() << "LCSSA PHI incoming value does not match latch value\n");
320  return false;
321  }
322 
323  LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
324  LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
325  LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
326  SafeOuterPHIs.insert(OuterPHI);
327  FI.InnerPHIsToTransform.insert(&InnerPHI);
328  }
329 
330  for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
331  if (FI.isNarrowInductionPhi(&OuterPHI))
332  continue;
333  if (!SafeOuterPHIs.count(&OuterPHI)) {
334  LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
335  return false;
336  }
337  }
338 
339  LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
340  return true;
341 }
342 
343 static bool
345  SmallPtrSetImpl<Instruction *> &IterationInstructions,
346  const TargetTransformInfo *TTI) {
347  // Check for instructions in the outer but not inner loop. If any of these
348  // have side-effects then this transformation is not legal, and if there is
349  // a significant amount of code here which can't be optimised out that it's
350  // not profitable (as these instructions would get executed for each
351  // iteration of the inner loop).
352  InstructionCost RepeatedInstrCost = 0;
353  for (auto *B : FI.OuterLoop->getBlocks()) {
354  if (FI.InnerLoop->contains(B))
355  continue;
356 
357  for (auto &I : *B) {
358  if (!isa<PHINode>(&I) && !I.isTerminator() &&
360  LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
361  "side effects: ";
362  I.dump());
363  return false;
364  }
365  // The execution count of the outer loop's iteration instructions
366  // (increment, compare and branch) will be increased, but the
367  // equivalent instructions will be removed from the inner loop, so
368  // they make a net difference of zero.
369  if (IterationInstructions.count(&I))
370  continue;
371  // The uncoditional branch to the inner loop's header will turn into
372  // a fall-through, so adds no cost.
373  BranchInst *Br = dyn_cast<BranchInst>(&I);
374  if (Br && Br->isUnconditional() &&
375  Br->getSuccessor(0) == FI.InnerLoop->getHeader())
376  continue;
377  // Multiplies of the outer iteration variable and inner iteration
378  // count will be optimised out.
381  continue;
382  InstructionCost Cost =
384  LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
385  RepeatedInstrCost += Cost;
386  }
387  }
388 
389  LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
390  << RepeatedInstrCost << "\n");
391  // Bail out if flattening the loops would cause instructions in the outer
392  // loop but not in the inner loop to be executed extra times.
393  if (RepeatedInstrCost > RepeatedInstructionThreshold) {
394  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
395  return false;
396  }
397 
398  LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
399  return true;
400 }
401 
402 static bool checkIVUsers(FlattenInfo &FI) {
403  // We require all uses of both induction variables to match this pattern:
404  //
405  // (OuterPHI * InnerTripCount) + InnerPHI
406  //
407  // Any uses of the induction variables not matching that pattern would
408  // require a div/mod to reconstruct in the flattened loop, so the
409  // transformation wouldn't be profitable.
410 
411  Value *InnerTripCount = FI.InnerTripCount;
412  if (FI.Widened &&
413  (isa<SExtInst>(InnerTripCount) || isa<ZExtInst>(InnerTripCount)))
414  InnerTripCount = cast<Instruction>(InnerTripCount)->getOperand(0);
415 
416  // Check that all uses of the inner loop's induction variable match the
417  // expected pattern, recording the uses of the outer IV.
418  SmallPtrSet<Value *, 4> ValidOuterPHIUses;
419  for (User *U : FI.InnerInductionPHI->users()) {
420  if (U == FI.InnerIncrement)
421  continue;
422 
423  // After widening the IVs, a trunc instruction might have been introduced,
424  // so look through truncs.
425  if (isa<TruncInst>(U)) {
426  if (!U->hasOneUse())
427  return false;
428  U = *U->user_begin();
429  }
430 
431  // If the use is in the compare (which is also the condition of the inner
432  // branch) then the compare has been altered by another transformation e.g
433  // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
434  // a constant. Ignore this use as the compare gets removed later anyway.
435  if (U == FI.InnerBranch->getCondition())
436  continue;
437 
438  LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
439 
440  Value *MatchedMul = nullptr;
441  Value *MatchedItCount = nullptr;
442  bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
443  m_Value(MatchedMul))) &&
444  match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
445  m_Value(MatchedItCount)));
446 
447  // Matches the same pattern as above, except it also looks for truncs
448  // on the phi, which can be the result of widening the induction variables.
449  bool IsAddTrunc =
451  m_Value(MatchedMul))) &&
453  m_Value(MatchedItCount)));
454 
455  if (!MatchedItCount)
456  return false;
457  // Look through extends if the IV has been widened.
458  if (FI.Widened &&
459  (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
460  assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
461  "Unexpected type mismatch in types after widening");
462  MatchedItCount = isa<SExtInst>(MatchedItCount)
463  ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
464  : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
465  }
466 
467  if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
468  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
469  ValidOuterPHIUses.insert(MatchedMul);
470  FI.LinearIVUses.insert(U);
471  } else {
472  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
473  return false;
474  }
475  }
476 
477  // Check that there are no uses of the outer IV other than the ones found
478  // as part of the pattern above.
479  for (User *U : FI.OuterInductionPHI->users()) {
480  if (U == FI.OuterIncrement)
481  continue;
482 
483  auto IsValidOuterPHIUses = [&] (User *U) -> bool {
484  LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
485  if (!ValidOuterPHIUses.count(U)) {
486  LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
487  return false;
488  }
489  LLVM_DEBUG(dbgs() << "Use is optimisable\n");
490  return true;
491  };
492 
493  if (auto *V = dyn_cast<TruncInst>(U)) {
494  for (auto *K : V->users()) {
495  if (!IsValidOuterPHIUses(K))
496  return false;
497  }
498  continue;
499  }
500 
501  if (!IsValidOuterPHIUses(U))
502  return false;
503  }
504 
505  LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
506  dbgs() << "Found " << FI.LinearIVUses.size()
507  << " value(s) that can be replaced:\n";
508  for (Value *V : FI.LinearIVUses) {
509  dbgs() << " ";
510  V->dump();
511  });
512  return true;
513 }
514 
515 // Return an OverflowResult dependant on if overflow of the multiplication of
516 // InnerTripCount and OuterTripCount can be assumed not to happen.
518  AssumptionCache *AC) {
519  Function *F = FI.OuterLoop->getHeader()->getParent();
520  const DataLayout &DL = F->getParent()->getDataLayout();
521 
522  // For debugging/testing.
523  if (AssumeNoOverflow)
525 
526  // Check if the multiply could not overflow due to known ranges of the
527  // input values.
529  FI.InnerTripCount, FI.OuterTripCount, DL, AC,
532  return OR;
533 
534  for (Value *V : FI.LinearIVUses) {
535  for (Value *U : V->users()) {
536  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
537  for (Value *GEPUser : U->users()) {
538  Instruction *GEPUserInst = dyn_cast<Instruction>(GEPUser);
539  if (!isa<LoadInst>(GEPUserInst) &&
540  !(isa<StoreInst>(GEPUserInst) &&
541  GEP == GEPUserInst->getOperand(1)))
542  continue;
544  FI.InnerLoop))
545  continue;
546  // The IV is used as the operand of a GEP which dominates the loop
547  // latch, and the IV is at least as wide as the address space of the
548  // GEP. In this case, the GEP would wrap around the address space
549  // before the IV increment wraps, which would be UB.
550  if (GEP->isInBounds() &&
551  V->getType()->getIntegerBitWidth() >=
552  DL.getPointerTypeSizeInBits(GEP->getType())) {
553  LLVM_DEBUG(
554  dbgs() << "use of linear IV would be UB if overflow occurred: ";
555  GEP->dump());
557  }
558  }
559  }
560  }
561  }
562 
564 }
565 
568  const TargetTransformInfo *TTI) {
569  SmallPtrSet<Instruction *, 8> IterationInstructions;
570  if (!findLoopComponents(FI.InnerLoop, IterationInstructions,
572  FI.InnerIncrement, FI.InnerBranch, SE, FI.Widened))
573  return false;
574  if (!findLoopComponents(FI.OuterLoop, IterationInstructions,
576  FI.OuterIncrement, FI.OuterBranch, SE, FI.Widened))
577  return false;
578 
579  // Both of the loop trip count values must be invariant in the outer loop
580  // (non-instructions are all inherently invariant).
582  LLVM_DEBUG(dbgs() << "inner loop trip count not invariant\n");
583  return false;
584  }
586  LLVM_DEBUG(dbgs() << "outer loop trip count not invariant\n");
587  return false;
588  }
589 
590  if (!checkPHIs(FI, TTI))
591  return false;
592 
593  // FIXME: it should be possible to handle different types correctly.
595  return false;
596 
597  if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
598  return false;
599 
600  // Find the values in the loop that can be replaced with the linearized
601  // induction variable, and check that there are no other uses of the inner
602  // or outer induction variable. If there were, we could still do this
603  // transformation, but we'd have to insert a div/mod to calculate the
604  // original IVs, so it wouldn't be profitable.
605  if (!checkIVUsers(FI))
606  return false;
607 
608  LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
609  return true;
610 }
611 
614  const TargetTransformInfo *TTI, LPMUpdater *U) {
615  Function *F = FI.OuterLoop->getHeader()->getParent();
616  LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
617  {
618  using namespace ore;
620  FI.InnerLoop->getHeader());
622  Remark << "Flattened into outer loop";
623  ORE.emit(Remark);
624  }
625 
626  Value *NewTripCount = BinaryOperator::CreateMul(
627  FI.InnerTripCount, FI.OuterTripCount, "flatten.tripcount",
629  LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
630  NewTripCount->dump());
631 
632  // Fix up PHI nodes that take values from the inner loop back-edge, which
633  // we are about to remove.
635 
636  // The old Phi will be optimised away later, but for now we can't leave
637  // leave it in an invalid state, so are updating them too.
638  for (PHINode *PHI : FI.InnerPHIsToTransform)
640 
641  // Modify the trip count of the outer loop to be the product of the two
642  // trip counts.
643  cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
644 
645  // Replace the inner loop backedge with an unconditional branch to the exit.
646  BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
647  BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
648  InnerExitingBlock->getTerminator()->eraseFromParent();
649  BranchInst::Create(InnerExitBlock, InnerExitingBlock);
650  DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
651 
652  // Replace all uses of the polynomial calculated from the two induction
653  // variables with the one new one.
655  for (Value *V : FI.LinearIVUses) {
656  Value *OuterValue = FI.OuterInductionPHI;
657  if (FI.Widened)
658  OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
659  "flatten.trunciv");
660 
661  LLVM_DEBUG(dbgs() << "Replacing: "; V->dump();
662  dbgs() << "with: "; OuterValue->dump());
663  V->replaceAllUsesWith(OuterValue);
664  }
665 
666  // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
667  // deleted, and any information that have about the outer loop invalidated.
668  SE->forgetLoop(FI.OuterLoop);
669  SE->forgetLoop(FI.InnerLoop);
670  if (U)
672  LI->erase(FI.InnerLoop);
673 
674  // Increment statistic value.
675  NumFlattened++;
676 
677  return true;
678 }
679 
680 static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
682  const TargetTransformInfo *TTI) {
683  if (!WidenIV) {
684  LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
685  return false;
686  }
687 
688  LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
690  auto &DL = M->getDataLayout();
691  auto *InnerType = FI.InnerInductionPHI->getType();
692  auto *OuterType = FI.OuterInductionPHI->getType();
693  unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
694  auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
695 
696  // If both induction types are less than the maximum legal integer width,
697  // promote both to the widest type available so we know calculating
698  // (OuterTripCount * InnerTripCount) as the new trip count is safe.
699  if (InnerType != OuterType ||
700  InnerType->getScalarSizeInBits() >= MaxLegalSize ||
701  MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) {
702  LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
703  return false;
704  }
705 
706  SCEVExpander Rewriter(*SE, DL, "loopflatten");
708  unsigned ElimExt = 0;
709  unsigned Widened = 0;
710 
711  auto CreateWideIV = [&] (WideIVInfo WideIV, bool &Deleted) -> bool {
712  PHINode *WidePhi = createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts,
713  ElimExt, Widened, true /* HasGuards */,
714  true /* UsePostIncrementRanges */);
715  if (!WidePhi)
716  return false;
717  LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
718  LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
720  return true;
721  };
722 
723  bool Deleted;
724  if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted))
725  return false;
726  // Add the narrow phi to list, so that it will be adjusted later when the
727  // the transformation is performed.
728  if (!Deleted)
730 
731  if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted))
732  return false;
733 
734  assert(Widened && "Widened IV expected");
735  FI.Widened = true;
736 
737  // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
740 
741  // After widening, rediscover all the loop components.
742  return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
743 }
744 
747  const TargetTransformInfo *TTI, LPMUpdater *U) {
748  LLVM_DEBUG(
749  dbgs() << "Loop flattening running on outer loop "
750  << FI.OuterLoop->getHeader()->getName() << " and inner loop "
751  << FI.InnerLoop->getHeader()->getName() << " in "
752  << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
753 
754  if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
755  return false;
756 
757  // Check if we can widen the induction variables to avoid overflow checks.
758  bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
759 
760  // It can happen that after widening of the IV, flattening may not be
761  // possible/happening, e.g. when it is deemed unprofitable. So bail here if
762  // that is the case.
763  // TODO: IV widening without performing the actual flattening transformation
764  // is not ideal. While this codegen change should not matter much, it is an
765  // unnecessary change which is better to avoid. It's unlikely this happens
766  // often, because if it's unprofitibale after widening, it should be
767  // unprofitabe before widening as checked in the first round of checks. But
768  // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
769  // relaxed. Because this is making a code change (the IV widening, but not
770  // the flattening), we return true here.
771  if (FI.Widened && !CanFlatten)
772  return true;
773 
774  // If we have widened and can perform the transformation, do that here.
775  if (CanFlatten)
776  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
777 
778  // Otherwise, if we haven't widened the IV, check if the new iteration
779  // variable might overflow. In this case, we need to version the loop, and
780  // select the original version at runtime if the iteration space is too
781  // large.
782  // TODO: We currently don't version the loop.
783  OverflowResult OR = checkOverflow(FI, DT, AC);
786  LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
787  return false;
788  } else if (OR == OverflowResult::MayOverflow) {
789  LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
790  return false;
791  }
792 
793  LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
794  return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
795 }
796 
799  bool Changed = false;
800  for (Loop *InnerLoop : LN.getLoops()) {
801  auto *OuterLoop = InnerLoop->getParentLoop();
802  if (!OuterLoop)
803  continue;
804  FlattenInfo FI(OuterLoop, InnerLoop);
805  Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
806  }
807  return Changed;
808 }
809 
812  LPMUpdater &U) {
813 
814  bool Changed = false;
815 
816  // The loop flattening pass requires loops to be
817  // in simplified form, and also needs LCSSA. Running
818  // this pass will simplify all loops that contain inner loops,
819  // regardless of whether anything ends up being flattened.
820  Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U);
821 
822  if (!Changed)
823  return PreservedAnalyses::all();
824 
826 }
827 
828 namespace {
829 class LoopFlattenLegacyPass : public FunctionPass {
830 public:
831  static char ID; // Pass ID, replacement for typeid
832  LoopFlattenLegacyPass() : FunctionPass(ID) {
834  }
835 
836  // Possibly flatten loop L into its child.
837  bool runOnFunction(Function &F) override;
838 
839  void getAnalysisUsage(AnalysisUsage &AU) const override {
845  }
846 };
847 } // namespace
848 
850 INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
851  false, false)
854 INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
856 
857 FunctionPass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); }
858 
860  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
861  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
862  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
863  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
864  auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
865  auto *TTI = &TTIP.getTTI(F);
866  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
867  bool Changed = false;
868  for (Loop *L : *LI) {
869  auto LN = LoopNest::getLoopNest(*L, *SE);
870  Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr);
871  }
872  return Changed;
873 }
checkOuterLoopInsts
static bool checkOuterLoopInsts(FlattenInfo &FI, SmallPtrSetImpl< Instruction * > &IterationInstructions, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:344
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", false, false) INITIALIZE_PASS_END(LoopFlattenLegacyPass
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::createWideIV
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Definition: SimplifyIndVar.cpp:2043
llvm::Loop::getLatchCmpInst
ICmpInst * getLatchCmpInst() const
Get the latch condition instruction.
Definition: LoopInfo.cpp:174
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:742
llvm::OverflowResult::NeverOverflows
@ NeverOverflows
Never overflows.
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4630
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::computeOverflowForUnsignedMul
OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo=true)
Definition: ValueTracking.cpp:4748
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
Scalar.h
flatten
loop flatten
Definition: LoopFlatten.cpp:854
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4813
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:65
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:634
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
FlattenInfo::InnerInductionPHI
PHINode * InnerInductionPHI
Definition: LoopFlatten.cpp:83
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::PHINode::removeIncomingValue
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Definition: Instructions.cpp:113
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
checkPHIs
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:259
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
FlattenInfo::OuterIncrement
BinaryOperator * OuterIncrement
Definition: LoopFlatten.cpp:88
LoopFlatten.h
ScalarEvolution.h
llvm::OverflowResult::AlwaysOverflowsLow
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
Module.h
findLoopComponents
static bool findLoopComponents(Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
Definition: LoopFlatten.cpp:126
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::Loop::isCanonical
bool isCanonical(ScalarEvolution &SE) const
Return true if the loop induction variable starts at zero and increments by one each time through the...
Definition: LoopInfo.cpp:411
FlattenInfo::OuterInductionPHI
PHINode * OuterInductionPHI
Definition: LoopFlatten.cpp:84
llvm::SmallPtrSet< Value *, 4 >
FlattenInfo::OuterLoop
Loop * OuterLoop
Definition: LoopFlatten.cpp:79
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopFlatten.cpp:57
FlattenInfo::LinearIVUses
SmallPtrSet< Value *, 4 > LinearIVUses
Definition: LoopFlatten.cpp:91
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
FlattenInfo::OuterBranch
BranchInst * OuterBranch
Definition: LoopFlatten.cpp:90
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::WideIVInfo
Collect information about induction variables that are used by sign/zero extend operations.
Definition: SimplifyIndVar.h:65
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RecursivelyDeleteDeadPHINode
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:633
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LoopDeletionResult::Deleted
@ Deleted
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
FlattenInfo::Widened
bool Widened
Definition: LoopFlatten.cpp:95
llvm::ScalarEvolution::getTripCountFromExitCount
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount, bool Extend=true)
Convert from an "exit count" (i.e.
Definition: ScalarEvolution.cpp:7231
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
Flatten
bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U)
Definition: LoopFlatten.cpp:797
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::OverflowResult::MayOverflow
@ MayOverflow
May or may not overflow.
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::PatternMatch::m_c_Add
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
Definition: PatternMatch.h:2232
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2833
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:45
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::WideIVInfo::NarrowIV
PHINode * NarrowIV
Definition: SimplifyIndVar.h:66
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
LoopUtils.h
FlattenInfo::InnerBranch
BranchInst * InnerBranch
Definition: LoopFlatten.cpp:89
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:783
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
PatternMatch.h
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:866
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::OverflowResult
OverflowResult
Definition: ValueTracking.h:496
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3164
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4110
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::HighlightColor::Remark
@ Remark
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
FlattenInfo::OuterTripCount
Value * OuterTripCount
Definition: LoopFlatten.cpp:86
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
setLoopComponents
static bool setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment, SmallPtrSetImpl< Instruction * > &IterationInstructions)
Definition: LoopFlatten.cpp:114
llvm::isGuaranteedToExecuteForEveryIteration
bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L)
Return true if this function can prove that the instruction I is executed for every iteration of the ...
Definition: ValueTracking.cpp:5373
llvm::PHINode::hasConstantValue
Value * hasConstantValue() const
If the specified PHI node always merges together the same value, return the value,...
Definition: Instructions.cpp:152
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1185
CanWidenIV
static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:680
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2428
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
AssumeNoOverflow
static cl::opt< bool > AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, cl::init(false), cl::desc("Assume that the product of the two iteration " "trip counts will never overflow"))
RepeatedInstructionThreshold
static cl::opt< unsigned > RepeatedInstructionThreshold("loop-flatten-cost-threshold", cl::Hidden, cl::init(2), cl::desc("Limit on the cost of instructions that can be repeated due to " "loop flattening"))
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3139
I
#define I(x, y, z)
Definition: MD5.cpp:59
FlattenInfo::isNarrowInductionPhi
bool isNarrowInductionPhi(PHINode *Phi)
Definition: LoopFlatten.cpp:105
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
checkIVUsers
static bool checkIVUsers(FlattenInfo &FI)
Definition: LoopFlatten.cpp:402
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
FlattenInfo::InnerIncrement
BinaryOperator * InnerIncrement
Definition: LoopFlatten.cpp:87
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopInfo::erase
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:879
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:283
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
FlattenInfo::FlattenInfo
FlattenInfo(Loop *OL, Loop *IL)
Definition: LoopFlatten.cpp:103
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3161
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
checkOverflow
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, AssumptionCache *AC)
Definition: LoopFlatten.cpp:517
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:216
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:746
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
FlattenInfo::NarrowInnerInductionPHI
PHINode * NarrowInnerInductionPHI
Definition: LoopFlatten.cpp:100
FlattenInfo::InnerTripCount
Value * InnerTripCount
Definition: LoopFlatten.cpp:85
FlattenInfo::NarrowOuterInductionPHI
PHINode * NarrowOuterInductionPHI
Definition: LoopFlatten.cpp:101
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
FlattenLoopPair
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U)
Definition: LoopFlatten.cpp:745
WidenIV
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
Definition: SimplifyIndVar.cpp:1129
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
Verifier.h
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7622
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
FlattenInfo::InnerPHIsToTransform
SmallPtrSet< PHINode *, 4 > InnerPHIsToTransform
Definition: LoopFlatten.cpp:92
Function.h
llvm::initializeLoopFlattenLegacyPassPass
void initializeLoopFlattenLegacyPassPass(PassRegistry &)
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::OverflowResult::AlwaysOverflowsHigh
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
llvm::PatternMatch::m_c_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
Definition: PatternMatch.h:2239
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:109
CanFlattenLoopPair
static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Definition: LoopFlatten.cpp:566
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1575
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:777
Dominators.h
CreateMul
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:246
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
FlattenInfo
Definition: LoopFlatten.cpp:78
TargetTransformInfo.h
llvm::createLoopFlattenPass
FunctionPass * createLoopFlattenPass()
Definition: LoopFlatten.cpp:857
llvm::PHINode
Definition: Instructions.h:2648
DoFlattenLoopPair
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U)
Definition: LoopFlatten.cpp:612
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallPtrSetImpl< Instruction * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::LoopFlattenPass::run
PreservedAnalyses run(LoopNest &LN, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopFlatten.cpp:810
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::Loop::getInductionVariable
PHINode * getInductionVariable(ScalarEvolution &SE) const
Return the loop induction variable if found, else return nullptr.
Definition: LoopInfo.cpp:294
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3083
llvm::LoopNest::getLoopNest
static std::unique_ptr< LoopNest > getLoopNest(Loop &Root, ScalarEvolution &SE)
Construct a LoopNest object.
Definition: LoopNestAnalysis.cpp:48
raw_ostream.h
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1618
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7492
InitializePasses.h
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
FlattenInfo::InnerLoop
Loop * InnerLoop
Definition: LoopFlatten.cpp:80
loops
loop Flattens loops
Definition: LoopFlatten.cpp:854
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3176
SimplifyIndVar.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38