LLVM  3.7.0
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop reroller.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/LoopPass.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Support/Debug.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "loop-reroll"
41 
42 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
43 
44 static cl::opt<unsigned>
45 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
46  cl::desc("The maximum increment for loop rerolling"));
47 
48 static cl::opt<unsigned>
49 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
50  cl::Hidden,
51  cl::desc("The maximum number of failures to tolerate"
52  " during fuzzy matching. (default: 400)"));
53 
54 // This loop re-rolling transformation aims to transform loops like this:
55 //
56 // int foo(int a);
57 // void bar(int *x) {
58 // for (int i = 0; i < 500; i += 3) {
59 // foo(i);
60 // foo(i+1);
61 // foo(i+2);
62 // }
63 // }
64 //
65 // into a loop like this:
66 //
67 // void bar(int *x) {
68 // for (int i = 0; i < 500; ++i)
69 // foo(i);
70 // }
71 //
72 // It does this by looking for loops that, besides the latch code, are composed
73 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
74 // to the induction variable, and where each DAG is isomorphic to the DAG
75 // rooted at the induction variable (excepting the sub-DAGs which root the
76 // other induction-variable increments). In other words, we're looking for loop
77 // bodies of the form:
78 //
79 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
80 // f(%iv)
81 // %iv.1 = add %iv, 1 <-- a root increment
82 // f(%iv.1)
83 // %iv.2 = add %iv, 2 <-- a root increment
84 // f(%iv.2)
85 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
86 // f(%iv.scale_m_1)
87 // ...
88 // %iv.next = add %iv, scale
89 // %cmp = icmp(%iv, ...)
90 // br %cmp, header, exit
91 //
92 // where each f(i) is a set of instructions that, collectively, are a function
93 // only of i (and other loop-invariant values).
94 //
95 // As a special case, we can also reroll loops like this:
96 //
97 // int foo(int);
98 // void bar(int *x) {
99 // for (int i = 0; i < 500; ++i) {
100 // x[3*i] = foo(0);
101 // x[3*i+1] = foo(0);
102 // x[3*i+2] = foo(0);
103 // }
104 // }
105 //
106 // into this:
107 //
108 // void bar(int *x) {
109 // for (int i = 0; i < 1500; ++i)
110 // x[i] = foo(0);
111 // }
112 //
113 // in which case, we're looking for inputs like this:
114 //
115 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
116 // %scaled.iv = mul %iv, scale
117 // f(%scaled.iv)
118 // %scaled.iv.1 = add %scaled.iv, 1
119 // f(%scaled.iv.1)
120 // %scaled.iv.2 = add %scaled.iv, 2
121 // f(%scaled.iv.2)
122 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
123 // f(%scaled.iv.scale_m_1)
124 // ...
125 // %iv.next = add %iv, 1
126 // %cmp = icmp(%iv, ...)
127 // br %cmp, header, exit
128 
129 namespace {
131  /// The maximum number of iterations that we'll try and reroll. This
132  /// has to be less than 25 in order to fit into a SmallBitVector.
133  IL_MaxRerollIterations = 16,
134  /// The bitvector index used by loop induction variables and other
135  /// instructions that belong to all iterations.
136  IL_All,
137  IL_End
138  };
139 
140  class LoopReroll : public LoopPass {
141  public:
142  static char ID; // Pass ID, replacement for typeid
143  LoopReroll() : LoopPass(ID) {
145  }
146 
147  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
148 
149  void getAnalysisUsage(AnalysisUsage &AU) const override {
157  }
158 
159  protected:
160  AliasAnalysis *AA;
161  LoopInfo *LI;
162  ScalarEvolution *SE;
163  TargetLibraryInfo *TLI;
164  DominatorTree *DT;
165 
166  typedef SmallVector<Instruction *, 16> SmallInstructionVector;
167  typedef SmallSet<Instruction *, 16> SmallInstructionSet;
168 
169  // A chain of isomorphic instructions, indentified by a single-use PHI,
170  // representing a reduction. Only the last value may be used outside the
171  // loop.
172  struct SimpleLoopReduction {
173  SimpleLoopReduction(Instruction *P, Loop *L)
174  : Valid(false), Instructions(1, P) {
175  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
176  add(L);
177  }
178 
179  bool valid() const {
180  return Valid;
181  }
182 
183  Instruction *getPHI() const {
184  assert(Valid && "Using invalid reduction");
185  return Instructions.front();
186  }
187 
188  Instruction *getReducedValue() const {
189  assert(Valid && "Using invalid reduction");
190  return Instructions.back();
191  }
192 
193  Instruction *get(size_t i) const {
194  assert(Valid && "Using invalid reduction");
195  return Instructions[i+1];
196  }
197 
198  Instruction *operator [] (size_t i) const { return get(i); }
199 
200  // The size, ignoring the initial PHI.
201  size_t size() const {
202  assert(Valid && "Using invalid reduction");
203  return Instructions.size()-1;
204  }
205 
206  typedef SmallInstructionVector::iterator iterator;
207  typedef SmallInstructionVector::const_iterator const_iterator;
208 
209  iterator begin() {
210  assert(Valid && "Using invalid reduction");
211  return std::next(Instructions.begin());
212  }
213 
214  const_iterator begin() const {
215  assert(Valid && "Using invalid reduction");
216  return std::next(Instructions.begin());
217  }
218 
219  iterator end() { return Instructions.end(); }
220  const_iterator end() const { return Instructions.end(); }
221 
222  protected:
223  bool Valid;
224  SmallInstructionVector Instructions;
225 
226  void add(Loop *L);
227  };
228 
229  // The set of all reductions, and state tracking of possible reductions
230  // during loop instruction processing.
231  struct ReductionTracker {
232  typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
233 
234  // Add a new possible reduction.
235  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
236 
237  // Setup to track possible reductions corresponding to the provided
238  // rerolling scale. Only reductions with a number of non-PHI instructions
239  // that is divisible by the scale are considered. Three instructions sets
240  // are filled in:
241  // - A set of all possible instructions in eligible reductions.
242  // - A set of all PHIs in eligible reductions
243  // - A set of all reduced values (last instructions) in eligible
244  // reductions.
245  void restrictToScale(uint64_t Scale,
246  SmallInstructionSet &PossibleRedSet,
247  SmallInstructionSet &PossibleRedPHISet,
248  SmallInstructionSet &PossibleRedLastSet) {
249  PossibleRedIdx.clear();
250  PossibleRedIter.clear();
251  Reds.clear();
252 
253  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
254  if (PossibleReds[i].size() % Scale == 0) {
255  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
256  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
257 
258  PossibleRedSet.insert(PossibleReds[i].getPHI());
259  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
260  for (Instruction *J : PossibleReds[i]) {
261  PossibleRedSet.insert(J);
262  PossibleRedIdx[J] = i;
263  }
264  }
265  }
266 
267  // The functions below are used while processing the loop instructions.
268 
269  // Are the two instructions both from reductions, and furthermore, from
270  // the same reduction?
271  bool isPairInSame(Instruction *J1, Instruction *J2) {
272  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
273  if (J1I != PossibleRedIdx.end()) {
274  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
275  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
276  return true;
277  }
278 
279  return false;
280  }
281 
282  // The two provided instructions, the first from the base iteration, and
283  // the second from iteration i, form a matched pair. If these are part of
284  // a reduction, record that fact.
285  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
286  if (PossibleRedIdx.count(J1)) {
287  assert(PossibleRedIdx.count(J2) &&
288  "Recording reduction vs. non-reduction instruction?");
289 
290  PossibleRedIter[J1] = 0;
291  PossibleRedIter[J2] = i;
292 
293  int Idx = PossibleRedIdx[J1];
294  assert(Idx == PossibleRedIdx[J2] &&
295  "Recording pair from different reductions?");
296  Reds.insert(Idx);
297  }
298  }
299 
300  // The functions below can be called after we've finished processing all
301  // instructions in the loop, and we know which reductions were selected.
302 
303  // Is the provided instruction the PHI of a reduction selected for
304  // rerolling?
305  bool isSelectedPHI(Instruction *J) {
306  if (!isa<PHINode>(J))
307  return false;
308 
309  for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
310  RI != RIE; ++RI) {
311  int i = *RI;
312  if (cast<Instruction>(J) == PossibleReds[i].getPHI())
313  return true;
314  }
315 
316  return false;
317  }
318 
319  bool validateSelected();
320  void replaceSelected();
321 
322  protected:
323  // The vector of all possible reductions (for any scale).
324  SmallReductionVector PossibleReds;
325 
326  DenseMap<Instruction *, int> PossibleRedIdx;
327  DenseMap<Instruction *, int> PossibleRedIter;
328  DenseSet<int> Reds;
329  };
330 
331  // A DAGRootSet models an induction variable being used in a rerollable
332  // loop. For example,
333  //
334  // x[i*3+0] = y1
335  // x[i*3+1] = y2
336  // x[i*3+2] = y3
337  //
338  // Base instruction -> i*3
339  // +---+----+
340  // / | \
341  // ST[y1] +1 +2 <-- Roots
342  // | |
343  // ST[y2] ST[y3]
344  //
345  // There may be multiple DAGRoots, for example:
346  //
347  // x[i*2+0] = ... (1)
348  // x[i*2+1] = ... (1)
349  // x[i*2+4] = ... (2)
350  // x[i*2+5] = ... (2)
351  // x[(i+1234)*2+5678] = ... (3)
352  // x[(i+1234)*2+5679] = ... (3)
353  //
354  // The loop will be rerolled by adding a new loop induction variable,
355  // one for the Base instruction in each DAGRootSet.
356  //
357  struct DAGRootSet {
358  Instruction *BaseInst;
359  SmallInstructionVector Roots;
360  // The instructions between IV and BaseInst (but not including BaseInst).
361  SmallInstructionSet SubsumedInsts;
362  };
363 
364  // The set of all DAG roots, and state tracking of all roots
365  // for a particular induction variable.
366  struct DAGRootTracker {
367  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
369  TargetLibraryInfo *TLI)
370  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
371 
372  /// Stage 1: Find all the DAG roots for the induction variable.
373  bool findRoots();
374  /// Stage 2: Validate if the found roots are valid.
375  bool validate(ReductionTracker &Reductions);
376  /// Stage 3: Assuming validate() returned true, perform the
377  /// replacement.
378  /// @param IterCount The maximum iteration count of L.
379  void replace(const SCEV *IterCount);
380 
381  protected:
383 
384  bool findRootsRecursive(Instruction *IVU,
385  SmallInstructionSet SubsumedInsts);
386  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
387  bool collectPossibleRoots(Instruction *Base,
388  std::map<int64_t,Instruction*> &Roots);
389 
390  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
391  void collectInLoopUserSet(const SmallInstructionVector &Roots,
392  const SmallInstructionSet &Exclude,
393  const SmallInstructionSet &Final,
395  void collectInLoopUserSet(Instruction *Root,
396  const SmallInstructionSet &Exclude,
397  const SmallInstructionSet &Final,
399 
400  UsesTy::iterator nextInstr(int Val, UsesTy &In,
401  const SmallInstructionSet &Exclude,
402  UsesTy::iterator *StartI=nullptr);
403  bool isBaseInst(Instruction *I);
404  bool isRootInst(Instruction *I);
405  bool instrDependsOn(Instruction *I,
406  UsesTy::iterator Start,
407  UsesTy::iterator End);
408 
409  LoopReroll *Parent;
410 
411  // Members of Parent, replicated here for brevity.
412  Loop *L;
413  ScalarEvolution *SE;
414  AliasAnalysis *AA;
415  TargetLibraryInfo *TLI;
416 
417  // The loop induction variable.
418  Instruction *IV;
419  // Loop step amount.
420  uint64_t Inc;
421  // Loop reroll count; if Inc == 1, this records the scaling applied
422  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
423  // If Inc is not 1, Scale = Inc.
424  uint64_t Scale;
425  // The roots themselves.
427  // All increment instructions for IV.
428  SmallInstructionVector LoopIncs;
429  // Map of all instructions in the loop (in order) to the iterations
430  // they are used in (or specially, IL_All for instructions
431  // used in the loop increment mechanism).
432  UsesTy Uses;
433  };
434 
435  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
436  void collectPossibleReductions(Loop *L,
437  ReductionTracker &Reductions);
438  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
439  ReductionTracker &Reductions);
440  };
441 }
442 
443 char LoopReroll::ID = 0;
444 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
450 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
451 
453  return new LoopReroll;
454 }
455 
456 // Returns true if the provided instruction is used outside the given loop.
457 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
458 // non-loop blocks to be outside the loop.
460  for (User *U : I->users()) {
461  if (!L->contains(cast<Instruction>(U)))
462  return true;
463  }
464  return false;
465 }
466 
467 // Collect the list of loop induction variables with respect to which it might
468 // be possible to reroll the loop.
469 void LoopReroll::collectPossibleIVs(Loop *L,
470  SmallInstructionVector &PossibleIVs) {
471  BasicBlock *Header = L->getHeader();
472  for (BasicBlock::iterator I = Header->begin(),
473  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
474  if (!isa<PHINode>(I))
475  continue;
476  if (!I->getType()->isIntegerTy())
477  continue;
478 
479  if (const SCEVAddRecExpr *PHISCEV =
480  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
481  if (PHISCEV->getLoop() != L)
482  continue;
483  if (!PHISCEV->isAffine())
484  continue;
485  if (const SCEVConstant *IncSCEV =
486  dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
487  if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
488  continue;
489  if (IncSCEV->getValue()->uge(MaxInc))
490  continue;
491 
492  DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
493  *PHISCEV << "\n");
494  PossibleIVs.push_back(I);
495  }
496  }
497  }
498 }
499 
500 // Add the remainder of the reduction-variable chain to the instruction vector
501 // (the initial PHINode has already been added). If successful, the object is
502 // marked as valid.
504  assert(!Valid && "Cannot add to an already-valid chain");
505 
506  // The reduction variable must be a chain of single-use instructions
507  // (including the PHI), except for the last value (which is used by the PHI
508  // and also outside the loop).
509  Instruction *C = Instructions.front();
510  if (C->user_empty())
511  return;
512 
513  do {
514  C = cast<Instruction>(*C->user_begin());
515  if (C->hasOneUse()) {
516  if (!C->isBinaryOp())
517  return;
518 
519  if (!(isa<PHINode>(Instructions.back()) ||
520  C->isSameOperationAs(Instructions.back())))
521  return;
522 
523  Instructions.push_back(C);
524  }
525  } while (C->hasOneUse());
526 
527  if (Instructions.size() < 2 ||
528  !C->isSameOperationAs(Instructions.back()) ||
529  C->use_empty())
530  return;
531 
532  // C is now the (potential) last instruction in the reduction chain.
533  for (User *U : C->users()) {
534  // The only in-loop user can be the initial PHI.
535  if (L->contains(cast<Instruction>(U)))
536  if (cast<Instruction>(U) != Instructions.front())
537  return;
538  }
539 
540  Instructions.push_back(C);
541  Valid = true;
542 }
543 
544 // Collect the vector of possible reduction variables.
545 void LoopReroll::collectPossibleReductions(Loop *L,
546  ReductionTracker &Reductions) {
547  BasicBlock *Header = L->getHeader();
548  for (BasicBlock::iterator I = Header->begin(),
549  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
550  if (!isa<PHINode>(I))
551  continue;
552  if (!I->getType()->isSingleValueType())
553  continue;
554 
555  SimpleLoopReduction SLR(I, L);
556  if (!SLR.valid())
557  continue;
558 
559  DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
560  SLR.size() << " chained instructions)\n");
561  Reductions.addSLR(SLR);
562  }
563 }
564 
565 // Collect the set of all users of the provided root instruction. This set of
566 // users contains not only the direct users of the root instruction, but also
567 // all users of those users, and so on. There are two exceptions:
568 //
569 // 1. Instructions in the set of excluded instructions are never added to the
570 // use set (even if they are users). This is used, for example, to exclude
571 // including root increments in the use set of the primary IV.
572 //
573 // 2. Instructions in the set of final instructions are added to the use set
574 // if they are users, but their users are not added. This is used, for
575 // example, to prevent a reduction update from forcing all later reduction
576 // updates into the use set.
577 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
578  Instruction *Root, const SmallInstructionSet &Exclude,
579  const SmallInstructionSet &Final,
581  SmallInstructionVector Queue(1, Root);
582  while (!Queue.empty()) {
583  Instruction *I = Queue.pop_back_val();
584  if (!Users.insert(I).second)
585  continue;
586 
587  if (!Final.count(I))
588  for (Use &U : I->uses()) {
589  Instruction *User = cast<Instruction>(U.getUser());
590  if (PHINode *PN = dyn_cast<PHINode>(User)) {
591  // Ignore "wrap-around" uses to PHIs of this loop's header.
592  if (PN->getIncomingBlock(U) == L->getHeader())
593  continue;
594  }
595 
596  if (L->contains(User) && !Exclude.count(User)) {
597  Queue.push_back(User);
598  }
599  }
600 
601  // We also want to collect single-user "feeder" values.
602  for (User::op_iterator OI = I->op_begin(),
603  OIE = I->op_end(); OI != OIE; ++OI) {
604  if (Instruction *Op = dyn_cast<Instruction>(*OI))
605  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
606  !Final.count(Op))
607  Queue.push_back(Op);
608  }
609  }
610 }
611 
612 // Collect all of the users of all of the provided root instructions (combined
613 // into a single set).
614 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
615  const SmallInstructionVector &Roots,
616  const SmallInstructionSet &Exclude,
617  const SmallInstructionSet &Final,
618  DenseSet<Instruction *> &Users) {
619  for (SmallInstructionVector::const_iterator I = Roots.begin(),
620  IE = Roots.end(); I != IE; ++I)
621  collectInLoopUserSet(*I, Exclude, Final, Users);
622 }
623 
625  if (LoadInst *LI = dyn_cast<LoadInst>(I))
626  return LI->isSimple();
627  if (StoreInst *SI = dyn_cast<StoreInst>(I))
628  return SI->isSimple();
629  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
630  return !MI->isVolatile();
631  return false;
632 }
633 
634 /// Return true if IVU is a "simple" arithmetic operation.
635 /// This is used for narrowing the search space for DAGRoots; only arithmetic
636 /// and GEPs can be part of a DAGRoot.
637 static bool isSimpleArithmeticOp(User *IVU) {
638  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
639  switch (I->getOpcode()) {
640  default: return false;
641  case Instruction::Add:
642  case Instruction::Sub:
643  case Instruction::Mul:
644  case Instruction::Shl:
645  case Instruction::AShr:
646  case Instruction::LShr:
647  case Instruction::GetElementPtr:
648  case Instruction::Trunc:
649  case Instruction::ZExt:
650  case Instruction::SExt:
651  return true;
652  }
653  }
654  return false;
655 }
656 
657 static bool isLoopIncrement(User *U, Instruction *IV) {
659  if (!BO || BO->getOpcode() != Instruction::Add)
660  return false;
661 
662  for (auto *UU : BO->users()) {
663  PHINode *PN = dyn_cast<PHINode>(UU);
664  if (PN && PN == IV)
665  return true;
666  }
667  return false;
668 }
669 
670 bool LoopReroll::DAGRootTracker::
671 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
672  SmallInstructionVector BaseUsers;
673 
674  for (auto *I : Base->users()) {
675  ConstantInt *CI = nullptr;
676 
677  if (isLoopIncrement(I, IV)) {
678  LoopIncs.push_back(cast<Instruction>(I));
679  continue;
680  }
681 
682  // The root nodes must be either GEPs, ORs or ADDs.
683  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
684  if (BO->getOpcode() == Instruction::Add ||
685  BO->getOpcode() == Instruction::Or)
686  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
687  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
688  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
689  CI = dyn_cast<ConstantInt>(LastOperand);
690  }
691 
692  if (!CI) {
693  if (Instruction *II = dyn_cast<Instruction>(I)) {
694  BaseUsers.push_back(II);
695  continue;
696  } else {
697  DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n");
698  return false;
699  }
700  }
701 
702  int64_t V = CI->getValue().getSExtValue();
703  if (Roots.find(V) != Roots.end())
704  // No duplicates, please.
705  return false;
706 
707  // FIXME: Add support for negative values.
708  if (V < 0) {
709  DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
710  return false;
711  }
712 
713  Roots[V] = cast<Instruction>(I);
714  }
715 
716  if (Roots.empty())
717  return false;
718 
719  // If we found non-loop-inc, non-root users of Base, assume they are
720  // for the zeroth root index. This is because "add %a, 0" gets optimized
721  // away.
722  if (BaseUsers.size()) {
723  if (Roots.find(0) != Roots.end()) {
724  DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
725  return false;
726  }
727  Roots[0] = Base;
728  }
729 
730  // Calculate the number of users of the base, or lowest indexed, iteration.
731  unsigned NumBaseUses = BaseUsers.size();
732  if (NumBaseUses == 0)
733  NumBaseUses = Roots.begin()->second->getNumUses();
734 
735  // Check that every node has the same number of users.
736  for (auto &KV : Roots) {
737  if (KV.first == 0)
738  continue;
739  if (KV.second->getNumUses() != NumBaseUses) {
740  DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
741  << "#Base=" << NumBaseUses << ", #Root=" <<
742  KV.second->getNumUses() << "\n");
743  return false;
744  }
745  }
746 
747  return true;
748 }
749 
750 bool LoopReroll::DAGRootTracker::
751 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
752  // Does the user look like it could be part of a root set?
753  // All its users must be simple arithmetic ops.
754  if (I->getNumUses() > IL_MaxRerollIterations)
755  return false;
756 
757  if ((I->getOpcode() == Instruction::Mul ||
758  I->getOpcode() == Instruction::PHI) &&
759  I != IV &&
760  findRootsBase(I, SubsumedInsts))
761  return true;
762 
763  SubsumedInsts.insert(I);
764 
765  for (User *V : I->users()) {
767  if (std::find(LoopIncs.begin(), LoopIncs.end(), I) != LoopIncs.end())
768  continue;
769 
770  if (!I || !isSimpleArithmeticOp(I) ||
771  !findRootsRecursive(I, SubsumedInsts))
772  return false;
773  }
774  return true;
775 }
776 
777 bool LoopReroll::DAGRootTracker::
778 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
779 
780  // The base instruction needs to be a multiply so
781  // that we can erase it.
782  if (IVU->getOpcode() != Instruction::Mul &&
783  IVU->getOpcode() != Instruction::PHI)
784  return false;
785 
786  std::map<int64_t, Instruction*> V;
787  if (!collectPossibleRoots(IVU, V))
788  return false;
789 
790  // If we didn't get a root for index zero, then IVU must be
791  // subsumed.
792  if (V.find(0) == V.end())
793  SubsumedInsts.insert(IVU);
794 
795  // Partition the vector into monotonically increasing indexes.
796  DAGRootSet DRS;
797  DRS.BaseInst = nullptr;
798 
799  for (auto &KV : V) {
800  if (!DRS.BaseInst) {
801  DRS.BaseInst = KV.second;
802  DRS.SubsumedInsts = SubsumedInsts;
803  } else if (DRS.Roots.empty()) {
804  DRS.Roots.push_back(KV.second);
805  } else if (V.find(KV.first - 1) != V.end()) {
806  DRS.Roots.push_back(KV.second);
807  } else {
808  // Linear sequence terminated.
809  RootSets.push_back(DRS);
810  DRS.BaseInst = KV.second;
811  DRS.SubsumedInsts = SubsumedInsts;
812  DRS.Roots.clear();
813  }
814  }
815  RootSets.push_back(DRS);
816 
817  return true;
818 }
819 
820 bool LoopReroll::DAGRootTracker::findRoots() {
821 
822  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
823  Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
824  getValue()->getZExtValue();
825 
826  assert(RootSets.empty() && "Unclean state!");
827  if (Inc == 1) {
828  for (auto *IVU : IV->users()) {
829  if (isLoopIncrement(IVU, IV))
830  LoopIncs.push_back(cast<Instruction>(IVU));
831  }
832  if (!findRootsRecursive(IV, SmallInstructionSet()))
833  return false;
834  LoopIncs.push_back(IV);
835  } else {
836  if (!findRootsBase(IV, SmallInstructionSet()))
837  return false;
838  }
839 
840  // Ensure all sets have the same size.
841  if (RootSets.empty()) {
842  DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
843  return false;
844  }
845  for (auto &V : RootSets) {
846  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
847  DEBUG(dbgs()
848  << "LRR: Aborting because not all root sets have the same size\n");
849  return false;
850  }
851  }
852 
853  // And ensure all loop iterations are consecutive. We rely on std::map
854  // providing ordered traversal.
855  for (auto &V : RootSets) {
856  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(V.BaseInst));
857  if (!ADR)
858  return false;
859 
860  // Consider a DAGRootSet with N-1 roots (so N different values including
861  // BaseInst).
862  // Define d = Roots[0] - BaseInst, which should be the same as
863  // Roots[I] - Roots[I-1] for all I in [1..N).
864  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
865  // loop iteration J.
866  //
867  // Now, For the loop iterations to be consecutive:
868  // D = d * N
869 
870  unsigned N = V.Roots.size() + 1;
871  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
872  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
873  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
874  DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
875  return false;
876  }
877  }
878  Scale = RootSets[0].Roots.size() + 1;
879 
880  if (Scale > IL_MaxRerollIterations) {
881  DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
882  << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations
883  << "\n");
884  return false;
885  }
886 
887  DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n");
888 
889  return true;
890 }
891 
892 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
893  // Populate the MapVector with all instructions in the block, in order first,
894  // so we can iterate over the contents later in perfect order.
895  for (auto &I : *L->getHeader()) {
896  Uses[&I].resize(IL_End);
897  }
898 
899  SmallInstructionSet Exclude;
900  for (auto &DRS : RootSets) {
901  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
902  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
903  Exclude.insert(DRS.BaseInst);
904  }
905  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
906 
907  for (auto &DRS : RootSets) {
909  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
910  for (auto *I : VBase) {
911  Uses[I].set(0);
912  }
913 
914  unsigned Idx = 1;
915  for (auto *Root : DRS.Roots) {
917  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
918 
919  // While we're here, check the use sets are the same size.
920  if (V.size() != VBase.size()) {
921  DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
922  return false;
923  }
924 
925  for (auto *I : V) {
926  Uses[I].set(Idx);
927  }
928  ++Idx;
929  }
930 
931  // Make sure our subsumed instructions are remembered too.
932  for (auto *I : DRS.SubsumedInsts) {
933  Uses[I].set(IL_All);
934  }
935  }
936 
937  // Make sure the loop increments are also accounted for.
938 
939  Exclude.clear();
940  for (auto &DRS : RootSets) {
941  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
942  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
943  Exclude.insert(DRS.BaseInst);
944  }
945 
947  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
948  for (auto *I : V) {
949  Uses[I].set(IL_All);
950  }
951 
952  return true;
953 
954 }
955 
956 /// Get the next instruction in "In" that is a member of set Val.
957 /// Start searching from StartI, and do not return anything in Exclude.
958 /// If StartI is not given, start from In.begin().
960 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
961  const SmallInstructionSet &Exclude,
962  UsesTy::iterator *StartI) {
963  UsesTy::iterator I = StartI ? *StartI : In.begin();
964  while (I != In.end() && (I->second.test(Val) == 0 ||
965  Exclude.count(I->first) != 0))
966  ++I;
967  return I;
968 }
969 
970 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
971  for (auto &DRS : RootSets) {
972  if (DRS.BaseInst == I)
973  return true;
974  }
975  return false;
976 }
977 
978 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
979  for (auto &DRS : RootSets) {
980  if (std::find(DRS.Roots.begin(), DRS.Roots.end(), I) != DRS.Roots.end())
981  return true;
982  }
983  return false;
984 }
985 
986 /// Return true if instruction I depends on any instruction between
987 /// Start and End.
988 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
989  UsesTy::iterator Start,
990  UsesTy::iterator End) {
991  for (auto *U : I->users()) {
992  for (auto It = Start; It != End; ++It)
993  if (U == It->first)
994  return true;
995  }
996  return false;
997 }
998 
999 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1000  // We now need to check for equivalence of the use graph of each root with
1001  // that of the primary induction variable (excluding the roots). Our goal
1002  // here is not to solve the full graph isomorphism problem, but rather to
1003  // catch common cases without a lot of work. As a result, we will assume
1004  // that the relative order of the instructions in each unrolled iteration
1005  // is the same (although we will not make an assumption about how the
1006  // different iterations are intermixed). Note that while the order must be
1007  // the same, the instructions may not be in the same basic block.
1008 
1009  // An array of just the possible reductions for this scale factor. When we
1010  // collect the set of all users of some root instructions, these reduction
1011  // instructions are treated as 'final' (their uses are not considered).
1012  // This is important because we don't want the root use set to search down
1013  // the reduction chain.
1014  SmallInstructionSet PossibleRedSet;
1015  SmallInstructionSet PossibleRedLastSet;
1016  SmallInstructionSet PossibleRedPHISet;
1017  Reductions.restrictToScale(Scale, PossibleRedSet,
1018  PossibleRedPHISet, PossibleRedLastSet);
1019 
1020  // Populate "Uses" with where each instruction is used.
1021  if (!collectUsedInstructions(PossibleRedSet))
1022  return false;
1023 
1024  // Make sure we mark the reduction PHIs as used in all iterations.
1025  for (auto *I : PossibleRedPHISet) {
1026  Uses[I].set(IL_All);
1027  }
1028 
1029  // Make sure all instructions in the loop are in one and only one
1030  // set.
1031  for (auto &KV : Uses) {
1032  if (KV.second.count() != 1) {
1033  DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1034  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1035  return false;
1036  }
1037  }
1038 
1039  DEBUG(
1040  for (auto &KV : Uses) {
1041  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1042  }
1043  );
1044 
1045  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1046  // In addition to regular aliasing information, we need to look for
1047  // instructions from later (future) iterations that have side effects
1048  // preventing us from reordering them past other instructions with side
1049  // effects.
1050  bool FutureSideEffects = false;
1051  AliasSetTracker AST(*AA);
1052  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1054 
1055  // Compare iteration Iter to the base.
1056  SmallInstructionSet Visited;
1057  auto BaseIt = nextInstr(0, Uses, Visited);
1058  auto RootIt = nextInstr(Iter, Uses, Visited);
1059  auto LastRootIt = Uses.begin();
1060 
1061  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1062  Instruction *BaseInst = BaseIt->first;
1063  Instruction *RootInst = RootIt->first;
1064 
1065  // Skip over the IV or root instructions; only match their users.
1066  bool Continue = false;
1067  if (isBaseInst(BaseInst)) {
1068  Visited.insert(BaseInst);
1069  BaseIt = nextInstr(0, Uses, Visited);
1070  Continue = true;
1071  }
1072  if (isRootInst(RootInst)) {
1073  LastRootIt = RootIt;
1074  Visited.insert(RootInst);
1075  RootIt = nextInstr(Iter, Uses, Visited);
1076  Continue = true;
1077  }
1078  if (Continue) continue;
1079 
1080  if (!BaseInst->isSameOperationAs(RootInst)) {
1081  // Last chance saloon. We don't try and solve the full isomorphism
1082  // problem, but try and at least catch the case where two instructions
1083  // *of different types* are round the wrong way. We won't be able to
1084  // efficiently tell, given two ADD instructions, which way around we
1085  // should match them, but given an ADD and a SUB, we can at least infer
1086  // which one is which.
1087  //
1088  // This should allow us to deal with a greater subset of the isomorphism
1089  // problem. It does however change a linear algorithm into a quadratic
1090  // one, so limit the number of probes we do.
1091  auto TryIt = RootIt;
1092  unsigned N = NumToleratedFailedMatches;
1093  while (TryIt != Uses.end() &&
1094  !BaseInst->isSameOperationAs(TryIt->first) &&
1095  N--) {
1096  ++TryIt;
1097  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1098  }
1099 
1100  if (TryIt == Uses.end() || TryIt == RootIt ||
1101  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1102  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1103  " vs. " << *RootInst << "\n");
1104  return false;
1105  }
1106 
1107  RootIt = TryIt;
1108  RootInst = TryIt->first;
1109  }
1110 
1111  // All instructions between the last root and this root
1112  // may belong to some other iteration. If they belong to a
1113  // future iteration, then they're dangerous to alias with.
1114  //
1115  // Note that because we allow a limited amount of flexibility in the order
1116  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1117  // case we've already checked this set of instructions so we shouldn't
1118  // do anything.
1119  for (; LastRootIt < RootIt; ++LastRootIt) {
1120  Instruction *I = LastRootIt->first;
1121  if (LastRootIt->second.find_first() < (int)Iter)
1122  continue;
1123  if (I->mayWriteToMemory())
1124  AST.add(I);
1125  // Note: This is specifically guarded by a check on isa<PHINode>,
1126  // which while a valid (somewhat arbitrary) micro-optimization, is
1127  // needed because otherwise isSafeToSpeculativelyExecute returns
1128  // false on PHI nodes.
1129  if (!isa<PHINode>(I) && !isSimpleLoadStore(I) &&
1131  // Intervening instructions cause side effects.
1132  FutureSideEffects = true;
1133  }
1134 
1135  // Make sure that this instruction, which is in the use set of this
1136  // root instruction, does not also belong to the base set or the set of
1137  // some other root instruction.
1138  if (RootIt->second.count() > 1) {
1139  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1140  " vs. " << *RootInst << " (prev. case overlap)\n");
1141  return false;
1142  }
1143 
1144  // Make sure that we don't alias with any instruction in the alias set
1145  // tracker. If we do, then we depend on a future iteration, and we
1146  // can't reroll.
1147  if (RootInst->mayReadFromMemory())
1148  for (auto &K : AST) {
1149  if (K.aliasesUnknownInst(RootInst, *AA)) {
1150  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1151  " vs. " << *RootInst << " (depends on future store)\n");
1152  return false;
1153  }
1154  }
1155 
1156  // If we've past an instruction from a future iteration that may have
1157  // side effects, and this instruction might also, then we can't reorder
1158  // them, and this matching fails. As an exception, we allow the alias
1159  // set tracker to handle regular (simple) load/store dependencies.
1160  if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) &&
1161  !isSafeToSpeculativelyExecute(BaseInst)) ||
1162  (!isSimpleLoadStore(RootInst) &&
1163  !isSafeToSpeculativelyExecute(RootInst)))) {
1164  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1165  " vs. " << *RootInst <<
1166  " (side effects prevent reordering)\n");
1167  return false;
1168  }
1169 
1170  // For instructions that are part of a reduction, if the operation is
1171  // associative, then don't bother matching the operands (because we
1172  // already know that the instructions are isomorphic, and the order
1173  // within the iteration does not matter). For non-associative reductions,
1174  // we do need to match the operands, because we need to reject
1175  // out-of-order instructions within an iteration!
1176  // For example (assume floating-point addition), we need to reject this:
1177  // x += a[i]; x += b[i];
1178  // x += a[i+1]; x += b[i+1];
1179  // x += b[i+2]; x += a[i+2];
1180  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1181 
1182  if (!(InReduction && BaseInst->isAssociative())) {
1183  bool Swapped = false, SomeOpMatched = false;
1184  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1185  Value *Op2 = RootInst->getOperand(j);
1186 
1187  // If this is part of a reduction (and the operation is not
1188  // associatve), then we match all operands, but not those that are
1189  // part of the reduction.
1190  if (InReduction)
1191  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1192  if (Reductions.isPairInSame(RootInst, Op2I))
1193  continue;
1194 
1195  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1196  if (BMI != BaseMap.end()) {
1197  Op2 = BMI->second;
1198  } else {
1199  for (auto &DRS : RootSets) {
1200  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1201  Op2 = DRS.BaseInst;
1202  break;
1203  }
1204  }
1205  }
1206 
1207  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1208  // If we've not already decided to swap the matched operands, and
1209  // we've not already matched our first operand (note that we could
1210  // have skipped matching the first operand because it is part of a
1211  // reduction above), and the instruction is commutative, then try
1212  // the swapped match.
1213  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1214  BaseInst->getOperand(!j) == Op2) {
1215  Swapped = true;
1216  } else {
1217  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1218  << " vs. " << *RootInst << " (operand " << j << ")\n");
1219  return false;
1220  }
1221  }
1222 
1223  SomeOpMatched = true;
1224  }
1225  }
1226 
1227  if ((!PossibleRedLastSet.count(BaseInst) &&
1228  hasUsesOutsideLoop(BaseInst, L)) ||
1229  (!PossibleRedLastSet.count(RootInst) &&
1230  hasUsesOutsideLoop(RootInst, L))) {
1231  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1232  " vs. " << *RootInst << " (uses outside loop)\n");
1233  return false;
1234  }
1235 
1236  Reductions.recordPair(BaseInst, RootInst, Iter);
1237  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1238 
1239  LastRootIt = RootIt;
1240  Visited.insert(BaseInst);
1241  Visited.insert(RootInst);
1242  BaseIt = nextInstr(0, Uses, Visited);
1243  RootIt = nextInstr(Iter, Uses, Visited);
1244  }
1245  assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
1246  "Mismatched set sizes!");
1247  }
1248 
1249  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
1250  *IV << "\n");
1251 
1252  return true;
1253 }
1254 
1255 void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
1256  BasicBlock *Header = L->getHeader();
1257  // Remove instructions associated with non-base iterations.
1258  for (BasicBlock::reverse_iterator J = Header->rbegin();
1259  J != Header->rend();) {
1260  unsigned I = Uses[&*J].find_first();
1261  if (I > 0 && I < IL_All) {
1262  Instruction *D = &*J;
1263  DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
1264  D->eraseFromParent();
1265  continue;
1266  }
1267 
1268  ++J;
1269  }
1270  const DataLayout &DL = Header->getModule()->getDataLayout();
1271 
1272  // We need to create a new induction variable for each different BaseInst.
1273  for (auto &DRS : RootSets) {
1274  // Insert the new induction variable.
1275  const SCEVAddRecExpr *RealIVSCEV =
1276  cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
1277  const SCEV *Start = RealIVSCEV->getStart();
1278  const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>
1279  (SE->getAddRecExpr(Start,
1280  SE->getConstant(RealIVSCEV->getType(), 1),
1281  L, SCEV::FlagAnyWrap));
1282  { // Limit the lifetime of SCEVExpander.
1283  SCEVExpander Expander(*SE, DL, "reroll");
1284  Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
1285 
1286  for (auto &KV : Uses) {
1287  if (KV.second.find_first() == 0)
1288  KV.first->replaceUsesOfWith(DRS.BaseInst, NewIV);
1289  }
1290 
1291  if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
1292  // FIXME: Why do we need this check?
1293  if (Uses[BI].find_first() == IL_All) {
1294  const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1295 
1296  // Iteration count SCEV minus 1
1297  const SCEV *ICMinus1SCEV =
1298  SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
1299 
1300  Value *ICMinus1; // Iteration count minus 1
1301  if (isa<SCEVConstant>(ICMinus1SCEV)) {
1302  ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
1303  } else {
1304  BasicBlock *Preheader = L->getLoopPreheader();
1305  if (!Preheader)
1306  Preheader = InsertPreheaderForLoop(L, Parent);
1307 
1308  ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
1309  Preheader->getTerminator());
1310  }
1311 
1312  Value *Cond =
1313  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
1314  BI->setCondition(Cond);
1315 
1316  if (BI->getSuccessor(1) != Header)
1317  BI->swapSuccessors();
1318  }
1319  }
1320  }
1321  }
1322 
1323  SimplifyInstructionsInBlock(Header, TLI);
1324  DeleteDeadPHIs(Header, TLI);
1325 }
1326 
1327 // Validate the selected reductions. All iterations must have an isomorphic
1328 // part of the reduction chain and, for non-associative reductions, the chain
1329 // entries must appear in order.
1330 bool LoopReroll::ReductionTracker::validateSelected() {
1331  // For a non-associative reduction, the chain entries must appear in order.
1332  for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
1333  RI != RIE; ++RI) {
1334  int i = *RI;
1335  int PrevIter = 0, BaseCount = 0, Count = 0;
1336  for (Instruction *J : PossibleReds[i]) {
1337  // Note that all instructions in the chain must have been found because
1338  // all instructions in the function must have been assigned to some
1339  // iteration.
1340  int Iter = PossibleRedIter[J];
1341  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1342  !PossibleReds[i].getReducedValue()->isAssociative()) {
1343  DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
1344  J << "\n");
1345  return false;
1346  }
1347 
1348  if (Iter != PrevIter) {
1349  if (Count != BaseCount) {
1350  DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
1351  " reduction use count " << Count <<
1352  " is not equal to the base use count " <<
1353  BaseCount << "\n");
1354  return false;
1355  }
1356 
1357  Count = 0;
1358  }
1359 
1360  ++Count;
1361  if (Iter == 0)
1362  ++BaseCount;
1363 
1364  PrevIter = Iter;
1365  }
1366  }
1367 
1368  return true;
1369 }
1370 
1371 // For all selected reductions, remove all parts except those in the first
1372 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1373 // of the first-iteration reduced value (in other words, reroll the selected
1374 // reductions).
1375 void LoopReroll::ReductionTracker::replaceSelected() {
1376  // Fixup reductions to refer to the last instruction associated with the
1377  // first iteration (not the last).
1378  for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
1379  RI != RIE; ++RI) {
1380  int i = *RI;
1381  int j = 0;
1382  for (int e = PossibleReds[i].size(); j != e; ++j)
1383  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1384  --j;
1385  break;
1386  }
1387 
1388  // Replace users with the new end-of-chain value.
1389  SmallInstructionVector Users;
1390  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1391  Users.push_back(cast<Instruction>(U));
1392  }
1393 
1394  for (SmallInstructionVector::iterator J = Users.begin(),
1395  JE = Users.end(); J != JE; ++J)
1396  (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1397  PossibleReds[i][j]);
1398  }
1399 }
1400 
1401 // Reroll the provided loop with respect to the provided induction variable.
1402 // Generally, we're looking for a loop like this:
1403 //
1404 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1405 // f(%iv)
1406 // %iv.1 = add %iv, 1 <-- a root increment
1407 // f(%iv.1)
1408 // %iv.2 = add %iv, 2 <-- a root increment
1409 // f(%iv.2)
1410 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1411 // f(%iv.scale_m_1)
1412 // ...
1413 // %iv.next = add %iv, scale
1414 // %cmp = icmp(%iv, ...)
1415 // br %cmp, header, exit
1416 //
1417 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1418 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1419 // be intermixed with eachother. The restriction imposed by this algorithm is
1420 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1421 // etc. be the same.
1422 //
1423 // First, we collect the use set of %iv, excluding the other increment roots.
1424 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1425 // times, having collected the use set of f(%iv.(i+1)), during which we:
1426 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1427 // the next unmatched instruction in f(%iv.(i+1)).
1428 // - Ensure that both matched instructions don't have any external users
1429 // (with the exception of last-in-chain reduction instructions).
1430 // - Track the (aliasing) write set, and other side effects, of all
1431 // instructions that belong to future iterations that come before the matched
1432 // instructions. If the matched instructions read from that write set, then
1433 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1434 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1435 // if any of these future instructions had side effects (could not be
1436 // speculatively executed), and so do the matched instructions, when we
1437 // cannot reorder those side-effect-producing instructions, and rerolling
1438 // fails.
1439 //
1440 // Finally, we make sure that all loop instructions are either loop increment
1441 // roots, belong to simple latch code, parts of validated reductions, part of
1442 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1443 // have been validated), then we reroll the loop.
1444 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1445  const SCEV *IterCount,
1446  ReductionTracker &Reductions) {
1447  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
1448 
1449  if (!DAGRoots.findRoots())
1450  return false;
1451  DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
1452  *IV << "\n");
1453 
1454  if (!DAGRoots.validate(Reductions))
1455  return false;
1456  if (!Reductions.validateSelected())
1457  return false;
1458  // At this point, we've validated the rerolling, and we're committed to
1459  // making changes!
1460 
1461  Reductions.replaceSelected();
1462  DAGRoots.replace(IterCount);
1463 
1464  ++NumRerolledLoops;
1465  return true;
1466 }
1467 
1468 bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
1469  if (skipOptnoneFunction(L))
1470  return false;
1471 
1472  AA = &getAnalysis<AliasAnalysis>();
1473  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1474  SE = &getAnalysis<ScalarEvolution>();
1475  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1476  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1477 
1478  BasicBlock *Header = L->getHeader();
1479  DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
1480  "] Loop %" << Header->getName() << " (" <<
1481  L->getNumBlocks() << " block(s))\n");
1482 
1483  bool Changed = false;
1484 
1485  // For now, we'll handle only single BB loops.
1486  if (L->getNumBlocks() > 1)
1487  return Changed;
1488 
1489  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1490  return Changed;
1491 
1492  const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
1493  const SCEV *IterCount =
1494  SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
1495  DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
1496 
1497  // First, we need to find the induction variable with respect to which we can
1498  // reroll (there may be several possible options).
1499  SmallInstructionVector PossibleIVs;
1500  collectPossibleIVs(L, PossibleIVs);
1501 
1502  if (PossibleIVs.empty()) {
1503  DEBUG(dbgs() << "LRR: No possible IVs found\n");
1504  return Changed;
1505  }
1506 
1507  ReductionTracker Reductions;
1508  collectPossibleReductions(L, Reductions);
1509 
1510  // For each possible IV, collect the associated possible set of 'root' nodes
1511  // (i+1, i+2, etc.).
1512  for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
1513  IE = PossibleIVs.end(); I != IE; ++I)
1514  if (reroll(*I, L, Header, IterCount, Reductions)) {
1515  Changed = true;
1516  break;
1517  }
1518 
1519  return Changed;
1520 }
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:82
VectorType::iterator iterator
Definition: MapVector.h:39
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
evaluateAtIteration - Return the value of this chain of recurrences at the specified iteration number...
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
void push_back(const T &Elt)
Definition: SmallVector.h:222
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
Pass * createLoopRerollPass()
static bool isSimpleLoadStore(Instruction *I)
iterator_range< use_iterator > uses()
Definition: Value.h:283
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
size_type size() const
Definition: DenseSet.h:53
DenseSet - This implements a dense probed hash-table based set.
Definition: DenseSet.h:39
unsigned getNumOperands() const
Definition: User.h:138
ScalarEvolution - This class is the main scalar evolution driver.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:32
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
SimplifyInstructionsInBlock - Scan the specified basic block and try to simplify any instructions in ...
Definition: Local.cpp:422
void initializeLoopRerollPass(PassRegistry &)
reverse_iterator rend()
Definition: BasicBlock.h:238
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
reverse_iterator rbegin()
Definition: BasicBlock.h:236
Hexagon Common GEP
iv Induction Variable Users
Definition: IVUsers.cpp:43
op_iterator op_begin()
Definition: User.h:183
BlockT * getHeader() const
Definition: LoopInfo.h:96
const SCEV * getStart() const
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:231
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
static cl::opt< unsigned > MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, cl::desc("The maximum increment for loop rerolling"))
BasicBlock * InsertPreheaderForLoop(Loop *L, Pass *P)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:106
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
#define false
Definition: ConvertUTF.c:65
bool mayReadFromMemory() const
mayReadFromMemory - Return true if this instruction may read memory.
SCEVAddRecExpr - This node represents a polynomial recurrence on the trip count of the specified loop...
bool isAssociative() const
isAssociative - Return true if the instruction is associative:
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:238
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
DeleteDeadPHIs - Examine each PHI in the given block and delete it if it is dead. ...
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:95
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
BlockT * getLoopPreheader() const
getLoopPreheader - If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:108
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
Type * getType() const
getType - Return the LLVM type of this SCEV expression.
BranchInst - Conditional or Unconditional Branch instruction.
const SCEV * getOperand(unsigned i) const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
#define H(x, y, z)
Definition: MD5.cpp:53
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:185
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
This instruction compares its operands according to the predicate given to the constructor.
Value * getOperand(unsigned i) const
Definition: User.h:118
static bool isLoopIncrement(User *U, Instruction *IV)
bool isCommutative() const
isCommutative - Return true if the instruction is commutative:
Definition: Instruction.h:327
#define INITIALIZE_AG_DEPENDENCY(depName)
Definition: PassSupport.h:72
bool mayWriteToMemory() const
mayWriteToMemory - Return true if this instruction may modify memory.
BinaryOps getOpcode() const
Definition: InstrTypes.h:323
IterationLimits
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:147
MemIntrinsic - This is the common base class for memset/memcpy/memmove.
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Provides information about what library functions are available for the current target.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:67
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate"" during fuzzy matching. (default: 400)"))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
iterator_range< user_iterator > users()
Definition: Value.h:300
This class uses information about analyze scalars to rewrite expressions in canonical form...
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
SCEV - This class represents an analyzed expression in the program.
unsigned getNumBlocks() const
getNumBlocks - Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:145
bool isBinaryOp() const
Definition: Instruction.h:116
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:311
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
void size_t size
bool use_empty() const
Definition: Value.h:275
user_iterator user_begin()
Definition: Value.h:294
LLVM Value Representation.
Definition: Value.h:69
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
bool user_empty() const
Definition: Value.h:290
#define DEBUG(X)
Definition: Debug.h:92
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:737
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
isSafeToSpeculativelyExecute - Return true if the instruction does not have any effects besides calcu...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:194
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:134
loops
Definition: LoopInfo.cpp:696
SCEVConstant - This class represents a constant integer value.