LLVM  4.0.0
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop reroller.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/LoopPass.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Support/Debug.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "loop-reroll"
41 
42 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
43 
44 static cl::opt<unsigned>
45 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
46  cl::desc("The maximum increment for loop rerolling"));
47 
48 static cl::opt<unsigned>
49 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
50  cl::Hidden,
51  cl::desc("The maximum number of failures to tolerate"
52  " during fuzzy matching. (default: 400)"));
53 
54 // This loop re-rolling transformation aims to transform loops like this:
55 //
56 // int foo(int a);
57 // void bar(int *x) {
58 // for (int i = 0; i < 500; i += 3) {
59 // foo(i);
60 // foo(i+1);
61 // foo(i+2);
62 // }
63 // }
64 //
65 // into a loop like this:
66 //
67 // void bar(int *x) {
68 // for (int i = 0; i < 500; ++i)
69 // foo(i);
70 // }
71 //
72 // It does this by looking for loops that, besides the latch code, are composed
73 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
74 // to the induction variable, and where each DAG is isomorphic to the DAG
75 // rooted at the induction variable (excepting the sub-DAGs which root the
76 // other induction-variable increments). In other words, we're looking for loop
77 // bodies of the form:
78 //
79 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
80 // f(%iv)
81 // %iv.1 = add %iv, 1 <-- a root increment
82 // f(%iv.1)
83 // %iv.2 = add %iv, 2 <-- a root increment
84 // f(%iv.2)
85 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
86 // f(%iv.scale_m_1)
87 // ...
88 // %iv.next = add %iv, scale
89 // %cmp = icmp(%iv, ...)
90 // br %cmp, header, exit
91 //
92 // where each f(i) is a set of instructions that, collectively, are a function
93 // only of i (and other loop-invariant values).
94 //
95 // As a special case, we can also reroll loops like this:
96 //
97 // int foo(int);
98 // void bar(int *x) {
99 // for (int i = 0; i < 500; ++i) {
100 // x[3*i] = foo(0);
101 // x[3*i+1] = foo(0);
102 // x[3*i+2] = foo(0);
103 // }
104 // }
105 //
106 // into this:
107 //
108 // void bar(int *x) {
109 // for (int i = 0; i < 1500; ++i)
110 // x[i] = foo(0);
111 // }
112 //
113 // in which case, we're looking for inputs like this:
114 //
115 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
116 // %scaled.iv = mul %iv, scale
117 // f(%scaled.iv)
118 // %scaled.iv.1 = add %scaled.iv, 1
119 // f(%scaled.iv.1)
120 // %scaled.iv.2 = add %scaled.iv, 2
121 // f(%scaled.iv.2)
122 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
123 // f(%scaled.iv.scale_m_1)
124 // ...
125 // %iv.next = add %iv, 1
126 // %cmp = icmp(%iv, ...)
127 // br %cmp, header, exit
128 
129 namespace {
131  /// The maximum number of iterations that we'll try and reroll.
132  IL_MaxRerollIterations = 32,
133  /// The bitvector index used by loop induction variables and other
134  /// instructions that belong to all iterations.
135  IL_All,
136  IL_End
137  };
138 
139  class LoopReroll : public LoopPass {
140  public:
141  static char ID; // Pass ID, replacement for typeid
142  LoopReroll() : LoopPass(ID) {
144  }
145 
146  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
147 
148  void getAnalysisUsage(AnalysisUsage &AU) const override {
151  }
152 
153  protected:
154  AliasAnalysis *AA;
155  LoopInfo *LI;
156  ScalarEvolution *SE;
157  TargetLibraryInfo *TLI;
158  DominatorTree *DT;
159  bool PreserveLCSSA;
160 
161  typedef SmallVector<Instruction *, 16> SmallInstructionVector;
162  typedef SmallSet<Instruction *, 16> SmallInstructionSet;
163 
164  // Map between induction variable and its increment
166  // For loop with multiple induction variable, remember the one used only to
167  // control the loop.
168  Instruction *LoopControlIV;
169 
170  // A chain of isomorphic instructions, identified by a single-use PHI
171  // representing a reduction. Only the last value may be used outside the
172  // loop.
173  struct SimpleLoopReduction {
174  SimpleLoopReduction(Instruction *P, Loop *L)
175  : Valid(false), Instructions(1, P) {
176  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
177  add(L);
178  }
179 
180  bool valid() const {
181  return Valid;
182  }
183 
184  Instruction *getPHI() const {
185  assert(Valid && "Using invalid reduction");
186  return Instructions.front();
187  }
188 
189  Instruction *getReducedValue() const {
190  assert(Valid && "Using invalid reduction");
191  return Instructions.back();
192  }
193 
194  Instruction *get(size_t i) const {
195  assert(Valid && "Using invalid reduction");
196  return Instructions[i+1];
197  }
198 
199  Instruction *operator [] (size_t i) const { return get(i); }
200 
201  // The size, ignoring the initial PHI.
202  size_t size() const {
203  assert(Valid && "Using invalid reduction");
204  return Instructions.size()-1;
205  }
206 
207  typedef SmallInstructionVector::iterator iterator;
208  typedef SmallInstructionVector::const_iterator const_iterator;
209 
210  iterator begin() {
211  assert(Valid && "Using invalid reduction");
212  return std::next(Instructions.begin());
213  }
214 
215  const_iterator begin() const {
216  assert(Valid && "Using invalid reduction");
217  return std::next(Instructions.begin());
218  }
219 
220  iterator end() { return Instructions.end(); }
221  const_iterator end() const { return Instructions.end(); }
222 
223  protected:
224  bool Valid;
225  SmallInstructionVector Instructions;
226 
227  void add(Loop *L);
228  };
229 
230  // The set of all reductions, and state tracking of possible reductions
231  // during loop instruction processing.
232  struct ReductionTracker {
233  typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
234 
235  // Add a new possible reduction.
236  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
237 
238  // Setup to track possible reductions corresponding to the provided
239  // rerolling scale. Only reductions with a number of non-PHI instructions
240  // that is divisible by the scale are considered. Three instructions sets
241  // are filled in:
242  // - A set of all possible instructions in eligible reductions.
243  // - A set of all PHIs in eligible reductions
244  // - A set of all reduced values (last instructions) in eligible
245  // reductions.
246  void restrictToScale(uint64_t Scale,
247  SmallInstructionSet &PossibleRedSet,
248  SmallInstructionSet &PossibleRedPHISet,
249  SmallInstructionSet &PossibleRedLastSet) {
250  PossibleRedIdx.clear();
251  PossibleRedIter.clear();
252  Reds.clear();
253 
254  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
255  if (PossibleReds[i].size() % Scale == 0) {
256  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
257  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
258 
259  PossibleRedSet.insert(PossibleReds[i].getPHI());
260  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
261  for (Instruction *J : PossibleReds[i]) {
262  PossibleRedSet.insert(J);
263  PossibleRedIdx[J] = i;
264  }
265  }
266  }
267 
268  // The functions below are used while processing the loop instructions.
269 
270  // Are the two instructions both from reductions, and furthermore, from
271  // the same reduction?
272  bool isPairInSame(Instruction *J1, Instruction *J2) {
273  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
274  if (J1I != PossibleRedIdx.end()) {
275  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
276  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
277  return true;
278  }
279 
280  return false;
281  }
282 
283  // The two provided instructions, the first from the base iteration, and
284  // the second from iteration i, form a matched pair. If these are part of
285  // a reduction, record that fact.
286  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
287  if (PossibleRedIdx.count(J1)) {
288  assert(PossibleRedIdx.count(J2) &&
289  "Recording reduction vs. non-reduction instruction?");
290 
291  PossibleRedIter[J1] = 0;
292  PossibleRedIter[J2] = i;
293 
294  int Idx = PossibleRedIdx[J1];
295  assert(Idx == PossibleRedIdx[J2] &&
296  "Recording pair from different reductions?");
297  Reds.insert(Idx);
298  }
299  }
300 
301  // The functions below can be called after we've finished processing all
302  // instructions in the loop, and we know which reductions were selected.
303 
304  bool validateSelected();
305  void replaceSelected();
306 
307  protected:
308  // The vector of all possible reductions (for any scale).
309  SmallReductionVector PossibleReds;
310 
311  DenseMap<Instruction *, int> PossibleRedIdx;
312  DenseMap<Instruction *, int> PossibleRedIter;
313  DenseSet<int> Reds;
314  };
315 
316  // A DAGRootSet models an induction variable being used in a rerollable
317  // loop. For example,
318  //
319  // x[i*3+0] = y1
320  // x[i*3+1] = y2
321  // x[i*3+2] = y3
322  //
323  // Base instruction -> i*3
324  // +---+----+
325  // / | \
326  // ST[y1] +1 +2 <-- Roots
327  // | |
328  // ST[y2] ST[y3]
329  //
330  // There may be multiple DAGRoots, for example:
331  //
332  // x[i*2+0] = ... (1)
333  // x[i*2+1] = ... (1)
334  // x[i*2+4] = ... (2)
335  // x[i*2+5] = ... (2)
336  // x[(i+1234)*2+5678] = ... (3)
337  // x[(i+1234)*2+5679] = ... (3)
338  //
339  // The loop will be rerolled by adding a new loop induction variable,
340  // one for the Base instruction in each DAGRootSet.
341  //
342  struct DAGRootSet {
343  Instruction *BaseInst;
344  SmallInstructionVector Roots;
345  // The instructions between IV and BaseInst (but not including BaseInst).
346  SmallInstructionSet SubsumedInsts;
347  };
348 
349  // The set of all DAG roots, and state tracking of all roots
350  // for a particular induction variable.
351  struct DAGRootTracker {
352  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
355  bool PreserveLCSSA,
357  Instruction *LoopCtrlIV)
358  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
359  PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
360  LoopControlIV(LoopCtrlIV) {}
361 
362  /// Stage 1: Find all the DAG roots for the induction variable.
363  bool findRoots();
364  /// Stage 2: Validate if the found roots are valid.
365  bool validate(ReductionTracker &Reductions);
366  /// Stage 3: Assuming validate() returned true, perform the
367  /// replacement.
368  /// @param IterCount The maximum iteration count of L.
369  void replace(const SCEV *IterCount);
370 
371  protected:
372  typedef MapVector<Instruction*, BitVector> UsesTy;
373 
374  void findRootsRecursive(Instruction *IVU,
375  SmallInstructionSet SubsumedInsts);
376  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
377  bool collectPossibleRoots(Instruction *Base,
378  std::map<int64_t,Instruction*> &Roots);
379  bool validateRootSet(DAGRootSet &DRS);
380 
381  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
382  void collectInLoopUserSet(const SmallInstructionVector &Roots,
383  const SmallInstructionSet &Exclude,
384  const SmallInstructionSet &Final,
386  void collectInLoopUserSet(Instruction *Root,
387  const SmallInstructionSet &Exclude,
388  const SmallInstructionSet &Final,
390 
391  UsesTy::iterator nextInstr(int Val, UsesTy &In,
392  const SmallInstructionSet &Exclude,
393  UsesTy::iterator *StartI=nullptr);
394  bool isBaseInst(Instruction *I);
395  bool isRootInst(Instruction *I);
396  bool instrDependsOn(Instruction *I,
397  UsesTy::iterator Start,
398  UsesTy::iterator End);
399  void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
400  void updateNonLoopCtrlIncr();
401 
402  LoopReroll *Parent;
403 
404  // Members of Parent, replicated here for brevity.
405  Loop *L;
406  ScalarEvolution *SE;
407  AliasAnalysis *AA;
408  TargetLibraryInfo *TLI;
409  DominatorTree *DT;
410  LoopInfo *LI;
411  bool PreserveLCSSA;
412 
413  // The loop induction variable.
414  Instruction *IV;
415  // Loop step amount.
416  int64_t Inc;
417  // Loop reroll count; if Inc == 1, this records the scaling applied
418  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
419  // If Inc is not 1, Scale = Inc.
420  uint64_t Scale;
421  // The roots themselves.
423  // All increment instructions for IV.
424  SmallInstructionVector LoopIncs;
425  // Map of all instructions in the loop (in order) to the iterations
426  // they are used in (or specially, IL_All for instructions
427  // used in the loop increment mechanism).
428  UsesTy Uses;
429  // Map between induction variable and its increment
431  Instruction *LoopControlIV;
432  };
433 
434  // Check if it is a compare-like instruction whose user is a branch
435  bool isCompareUsedByBranch(Instruction *I) {
436  auto *TI = I->getParent()->getTerminator();
437  if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
438  return false;
439  return I->hasOneUse() && TI->getOperand(0) == I;
440  };
441 
442  bool isLoopControlIV(Loop *L, Instruction *IV);
443  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
444  void collectPossibleReductions(Loop *L,
445  ReductionTracker &Reductions);
446  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
447  ReductionTracker &Reductions);
448  };
449 }
450 
451 char LoopReroll::ID = 0;
452 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
455 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
456 
458  return new LoopReroll;
459 }
460 
461 // Returns true if the provided instruction is used outside the given loop.
462 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
463 // non-loop blocks to be outside the loop.
464 static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
465  for (User *U : I->users()) {
466  if (!L->contains(cast<Instruction>(U)))
467  return true;
468  }
469  return false;
470 }
471 
473  const SCEV *SCEVExpr,
474  Instruction &IV) {
475  const SCEVMulExpr *MulSCEV = dyn_cast<SCEVMulExpr>(SCEVExpr);
476 
477  // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)),
478  // Return c1.
479  if (!MulSCEV && IV.getType()->isPointerTy())
480  if (const SCEVConstant *IncSCEV = dyn_cast<SCEVConstant>(SCEVExpr)) {
481  const PointerType *PTy = cast<PointerType>(IV.getType());
482  Type *ElTy = PTy->getElementType();
483  const SCEV *SizeOfExpr =
484  SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy);
485  if (IncSCEV->getValue()->getValue().isNegative()) {
486  const SCEV *NewSCEV =
487  SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr);
488  return dyn_cast<SCEVConstant>(SE->getNegativeSCEV(NewSCEV));
489  } else {
490  return dyn_cast<SCEVConstant>(SE->getUDivExpr(SCEVExpr, SizeOfExpr));
491  }
492  }
493 
494  if (!MulSCEV)
495  return nullptr;
496 
497  // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant,
498  // Return c.
499  const SCEVConstant *CIncSCEV = nullptr;
500  for (const SCEV *Operand : MulSCEV->operands()) {
501  if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Operand)) {
502  CIncSCEV = Constant;
503  } else if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Operand)) {
504  Type *AllocTy;
505  if (!Unknown->isSizeOf(AllocTy))
506  break;
507  } else {
508  return nullptr;
509  }
510  }
511  return CIncSCEV;
512 }
513 
514 // Check if an IV is only used to control the loop. There are two cases:
515 // 1. It only has one use which is loop increment, and the increment is only
516 // used by comparison and the PHI (could has sext with nsw in between), and the
517 // comparison is only used by branch.
518 // 2. It is used by loop increment and the comparison, the loop increment is
519 // only used by the PHI, and the comparison is used only by the branch.
520 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
521  unsigned IVUses = IV->getNumUses();
522  if (IVUses != 2 && IVUses != 1)
523  return false;
524 
525  for (auto *User : IV->users()) {
526  int32_t IncOrCmpUses = User->getNumUses();
527  bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
528 
529  // User can only have one or two uses.
530  if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
531  return false;
532 
533  // Case 1
534  if (IVUses == 1) {
535  // The only user must be the loop increment.
536  // The loop increment must have two uses.
537  if (IsCompInst || IncOrCmpUses != 2)
538  return false;
539  }
540 
541  // Case 2
542  if (IVUses == 2 && IncOrCmpUses != 1)
543  return false;
544 
545  // The users of the IV must be a binary operation or a comparison
546  if (auto *BO = dyn_cast<BinaryOperator>(User)) {
547  if (BO->getOpcode() == Instruction::Add) {
548  // Loop Increment
549  // User of Loop Increment should be either PHI or CMP
550  for (auto *UU : User->users()) {
551  if (PHINode *PN = dyn_cast<PHINode>(UU)) {
552  if (PN != IV)
553  return false;
554  }
555  // Must be a CMP or an ext (of a value with nsw) then CMP
556  else {
557  Instruction *UUser = dyn_cast<Instruction>(UU);
558  // Skip SExt if we are extending an nsw value
559  // TODO: Allow ZExt too
560  if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 &&
561  isa<SExtInst>(UUser))
562  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
563  if (!isCompareUsedByBranch(UUser))
564  return false;
565  }
566  }
567  } else
568  return false;
569  // Compare : can only have one use, and must be branch
570  } else if (!IsCompInst)
571  return false;
572  }
573  return true;
574 }
575 
576 // Collect the list of loop induction variables with respect to which it might
577 // be possible to reroll the loop.
578 void LoopReroll::collectPossibleIVs(Loop *L,
579  SmallInstructionVector &PossibleIVs) {
580  BasicBlock *Header = L->getHeader();
581  for (BasicBlock::iterator I = Header->begin(),
582  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
583  if (!isa<PHINode>(I))
584  continue;
585  if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
586  continue;
587 
588  if (const SCEVAddRecExpr *PHISCEV =
589  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
590  if (PHISCEV->getLoop() != L)
591  continue;
592  if (!PHISCEV->isAffine())
593  continue;
594  const SCEVConstant *IncSCEV = nullptr;
595  if (I->getType()->isPointerTy())
596  IncSCEV =
597  getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I);
598  else
599  IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
600  if (IncSCEV) {
601  const APInt &AInt = IncSCEV->getValue()->getValue().abs();
602  if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
603  continue;
604  IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
605  DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
606  << "\n");
607 
608  if (isLoopControlIV(L, &*I)) {
609  assert(!LoopControlIV && "Found two loop control only IV");
610  LoopControlIV = &(*I);
611  DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
612  << *PHISCEV << "\n");
613  } else
614  PossibleIVs.push_back(&*I);
615  }
616  }
617  }
618 }
619 
620 // Add the remainder of the reduction-variable chain to the instruction vector
621 // (the initial PHINode has already been added). If successful, the object is
622 // marked as valid.
624  assert(!Valid && "Cannot add to an already-valid chain");
625 
626  // The reduction variable must be a chain of single-use instructions
627  // (including the PHI), except for the last value (which is used by the PHI
628  // and also outside the loop).
629  Instruction *C = Instructions.front();
630  if (C->user_empty())
631  return;
632 
633  do {
634  C = cast<Instruction>(*C->user_begin());
635  if (C->hasOneUse()) {
636  if (!C->isBinaryOp())
637  return;
638 
639  if (!(isa<PHINode>(Instructions.back()) ||
640  C->isSameOperationAs(Instructions.back())))
641  return;
642 
643  Instructions.push_back(C);
644  }
645  } while (C->hasOneUse());
646 
647  if (Instructions.size() < 2 ||
648  !C->isSameOperationAs(Instructions.back()) ||
649  C->use_empty())
650  return;
651 
652  // C is now the (potential) last instruction in the reduction chain.
653  for (User *U : C->users()) {
654  // The only in-loop user can be the initial PHI.
655  if (L->contains(cast<Instruction>(U)))
656  if (cast<Instruction>(U) != Instructions.front())
657  return;
658  }
659 
660  Instructions.push_back(C);
661  Valid = true;
662 }
663 
664 // Collect the vector of possible reduction variables.
665 void LoopReroll::collectPossibleReductions(Loop *L,
666  ReductionTracker &Reductions) {
667  BasicBlock *Header = L->getHeader();
668  for (BasicBlock::iterator I = Header->begin(),
669  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
670  if (!isa<PHINode>(I))
671  continue;
672  if (!I->getType()->isSingleValueType())
673  continue;
674 
675  SimpleLoopReduction SLR(&*I, L);
676  if (!SLR.valid())
677  continue;
678 
679  DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
680  SLR.size() << " chained instructions)\n");
681  Reductions.addSLR(SLR);
682  }
683 }
684 
685 // Collect the set of all users of the provided root instruction. This set of
686 // users contains not only the direct users of the root instruction, but also
687 // all users of those users, and so on. There are two exceptions:
688 //
689 // 1. Instructions in the set of excluded instructions are never added to the
690 // use set (even if they are users). This is used, for example, to exclude
691 // including root increments in the use set of the primary IV.
692 //
693 // 2. Instructions in the set of final instructions are added to the use set
694 // if they are users, but their users are not added. This is used, for
695 // example, to prevent a reduction update from forcing all later reduction
696 // updates into the use set.
697 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
698  Instruction *Root, const SmallInstructionSet &Exclude,
699  const SmallInstructionSet &Final,
701  SmallInstructionVector Queue(1, Root);
702  while (!Queue.empty()) {
703  Instruction *I = Queue.pop_back_val();
704  if (!Users.insert(I).second)
705  continue;
706 
707  if (!Final.count(I))
708  for (Use &U : I->uses()) {
709  Instruction *User = cast<Instruction>(U.getUser());
710  if (PHINode *PN = dyn_cast<PHINode>(User)) {
711  // Ignore "wrap-around" uses to PHIs of this loop's header.
712  if (PN->getIncomingBlock(U) == L->getHeader())
713  continue;
714  }
715 
716  if (L->contains(User) && !Exclude.count(User)) {
717  Queue.push_back(User);
718  }
719  }
720 
721  // We also want to collect single-user "feeder" values.
722  for (User::op_iterator OI = I->op_begin(),
723  OIE = I->op_end(); OI != OIE; ++OI) {
724  if (Instruction *Op = dyn_cast<Instruction>(*OI))
725  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
726  !Final.count(Op))
727  Queue.push_back(Op);
728  }
729  }
730 }
731 
732 // Collect all of the users of all of the provided root instructions (combined
733 // into a single set).
734 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
735  const SmallInstructionVector &Roots,
736  const SmallInstructionSet &Exclude,
737  const SmallInstructionSet &Final,
738  DenseSet<Instruction *> &Users) {
739  for (Instruction *Root : Roots)
740  collectInLoopUserSet(Root, Exclude, Final, Users);
741 }
742 
744  if (LoadInst *LI = dyn_cast<LoadInst>(I))
745  return LI->isUnordered();
746  if (StoreInst *SI = dyn_cast<StoreInst>(I))
747  return SI->isUnordered();
748  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
749  return !MI->isVolatile();
750  return false;
751 }
752 
753 /// Return true if IVU is a "simple" arithmetic operation.
754 /// This is used for narrowing the search space for DAGRoots; only arithmetic
755 /// and GEPs can be part of a DAGRoot.
756 static bool isSimpleArithmeticOp(User *IVU) {
757  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
758  switch (I->getOpcode()) {
759  default: return false;
760  case Instruction::Add:
761  case Instruction::Sub:
762  case Instruction::Mul:
763  case Instruction::Shl:
764  case Instruction::AShr:
765  case Instruction::LShr:
766  case Instruction::GetElementPtr:
767  case Instruction::Trunc:
768  case Instruction::ZExt:
769  case Instruction::SExt:
770  return true;
771  }
772  }
773  return false;
774 }
775 
776 static bool isLoopIncrement(User *U, Instruction *IV) {
778 
779  if ((BO && BO->getOpcode() != Instruction::Add) ||
780  (!BO && !isa<GetElementPtrInst>(U)))
781  return false;
782 
783  for (auto *UU : U->users()) {
784  PHINode *PN = dyn_cast<PHINode>(UU);
785  if (PN && PN == IV)
786  return true;
787  }
788  return false;
789 }
790 
791 bool LoopReroll::DAGRootTracker::
792 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
793  SmallInstructionVector BaseUsers;
794 
795  for (auto *I : Base->users()) {
796  ConstantInt *CI = nullptr;
797 
798  if (isLoopIncrement(I, IV)) {
799  LoopIncs.push_back(cast<Instruction>(I));
800  continue;
801  }
802 
803  // The root nodes must be either GEPs, ORs or ADDs.
804  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
805  if (BO->getOpcode() == Instruction::Add ||
806  BO->getOpcode() == Instruction::Or)
807  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
808  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
809  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
810  CI = dyn_cast<ConstantInt>(LastOperand);
811  }
812 
813  if (!CI) {
814  if (Instruction *II = dyn_cast<Instruction>(I)) {
815  BaseUsers.push_back(II);
816  continue;
817  } else {
818  DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n");
819  return false;
820  }
821  }
822 
823  int64_t V = std::abs(CI->getValue().getSExtValue());
824  if (Roots.find(V) != Roots.end())
825  // No duplicates, please.
826  return false;
827 
828  Roots[V] = cast<Instruction>(I);
829  }
830 
831  // Make sure we have at least two roots.
832  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
833  return false;
834 
835  // If we found non-loop-inc, non-root users of Base, assume they are
836  // for the zeroth root index. This is because "add %a, 0" gets optimized
837  // away.
838  if (BaseUsers.size()) {
839  if (Roots.find(0) != Roots.end()) {
840  DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
841  return false;
842  }
843  Roots[0] = Base;
844  }
845 
846  // Calculate the number of users of the base, or lowest indexed, iteration.
847  unsigned NumBaseUses = BaseUsers.size();
848  if (NumBaseUses == 0)
849  NumBaseUses = Roots.begin()->second->getNumUses();
850 
851  // Check that every node has the same number of users.
852  for (auto &KV : Roots) {
853  if (KV.first == 0)
854  continue;
855  if (KV.second->getNumUses() != NumBaseUses) {
856  DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
857  << "#Base=" << NumBaseUses << ", #Root=" <<
858  KV.second->getNumUses() << "\n");
859  return false;
860  }
861  }
862 
863  return true;
864 }
865 
866 void LoopReroll::DAGRootTracker::
867 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
868  // Does the user look like it could be part of a root set?
869  // All its users must be simple arithmetic ops.
870  if (I->getNumUses() > IL_MaxRerollIterations)
871  return;
872 
873  if (I != IV && findRootsBase(I, SubsumedInsts))
874  return;
875 
876  SubsumedInsts.insert(I);
877 
878  for (User *V : I->users()) {
879  Instruction *I = cast<Instruction>(V);
880  if (is_contained(LoopIncs, I))
881  continue;
882 
883  if (!isSimpleArithmeticOp(I))
884  continue;
885 
886  // The recursive call makes a copy of SubsumedInsts.
887  findRootsRecursive(I, SubsumedInsts);
888  }
889 }
890 
891 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
892  if (DRS.Roots.empty())
893  return false;
894 
895  // Consider a DAGRootSet with N-1 roots (so N different values including
896  // BaseInst).
897  // Define d = Roots[0] - BaseInst, which should be the same as
898  // Roots[I] - Roots[I-1] for all I in [1..N).
899  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
900  // loop iteration J.
901  //
902  // Now, For the loop iterations to be consecutive:
903  // D = d * N
904  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
905  if (!ADR)
906  return false;
907  unsigned N = DRS.Roots.size() + 1;
908  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
909  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
910  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
911  return false;
912 
913  return true;
914 }
915 
916 bool LoopReroll::DAGRootTracker::
917 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
918  // The base of a RootSet must be an AddRec, so it can be erased.
919  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
920  if (!IVU_ADR || IVU_ADR->getLoop() != L)
921  return false;
922 
923  std::map<int64_t, Instruction*> V;
924  if (!collectPossibleRoots(IVU, V))
925  return false;
926 
927  // If we didn't get a root for index zero, then IVU must be
928  // subsumed.
929  if (V.find(0) == V.end())
930  SubsumedInsts.insert(IVU);
931 
932  // Partition the vector into monotonically increasing indexes.
933  DAGRootSet DRS;
934  DRS.BaseInst = nullptr;
935 
936  SmallVector<DAGRootSet, 16> PotentialRootSets;
937 
938  for (auto &KV : V) {
939  if (!DRS.BaseInst) {
940  DRS.BaseInst = KV.second;
941  DRS.SubsumedInsts = SubsumedInsts;
942  } else if (DRS.Roots.empty()) {
943  DRS.Roots.push_back(KV.second);
944  } else if (V.find(KV.first - 1) != V.end()) {
945  DRS.Roots.push_back(KV.second);
946  } else {
947  // Linear sequence terminated.
948  if (!validateRootSet(DRS))
949  return false;
950 
951  // Construct a new DAGRootSet with the next sequence.
952  PotentialRootSets.push_back(DRS);
953  DRS.BaseInst = KV.second;
954  DRS.Roots.clear();
955  }
956  }
957 
958  if (!validateRootSet(DRS))
959  return false;
960 
961  PotentialRootSets.push_back(DRS);
962 
963  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
964 
965  return true;
966 }
967 
968 bool LoopReroll::DAGRootTracker::findRoots() {
969  Inc = IVToIncMap[IV];
970 
971  assert(RootSets.empty() && "Unclean state!");
972  if (std::abs(Inc) == 1) {
973  for (auto *IVU : IV->users()) {
974  if (isLoopIncrement(IVU, IV))
975  LoopIncs.push_back(cast<Instruction>(IVU));
976  }
977  findRootsRecursive(IV, SmallInstructionSet());
978  LoopIncs.push_back(IV);
979  } else {
980  if (!findRootsBase(IV, SmallInstructionSet()))
981  return false;
982  }
983 
984  // Ensure all sets have the same size.
985  if (RootSets.empty()) {
986  DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
987  return false;
988  }
989  for (auto &V : RootSets) {
990  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
991  DEBUG(dbgs()
992  << "LRR: Aborting because not all root sets have the same size\n");
993  return false;
994  }
995  }
996 
997  Scale = RootSets[0].Roots.size() + 1;
998 
999  if (Scale > IL_MaxRerollIterations) {
1000  DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1001  << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations
1002  << "\n");
1003  return false;
1004  }
1005 
1006  DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n");
1007 
1008  return true;
1009 }
1010 
1011 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1012  // Populate the MapVector with all instructions in the block, in order first,
1013  // so we can iterate over the contents later in perfect order.
1014  for (auto &I : *L->getHeader()) {
1015  Uses[&I].resize(IL_End);
1016  }
1017 
1018  SmallInstructionSet Exclude;
1019  for (auto &DRS : RootSets) {
1020  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1021  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1022  Exclude.insert(DRS.BaseInst);
1023  }
1024  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1025 
1026  for (auto &DRS : RootSets) {
1027  DenseSet<Instruction*> VBase;
1028  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1029  for (auto *I : VBase) {
1030  Uses[I].set(0);
1031  }
1032 
1033  unsigned Idx = 1;
1034  for (auto *Root : DRS.Roots) {
1036  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1037 
1038  // While we're here, check the use sets are the same size.
1039  if (V.size() != VBase.size()) {
1040  DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1041  return false;
1042  }
1043 
1044  for (auto *I : V) {
1045  Uses[I].set(Idx);
1046  }
1047  ++Idx;
1048  }
1049 
1050  // Make sure our subsumed instructions are remembered too.
1051  for (auto *I : DRS.SubsumedInsts) {
1052  Uses[I].set(IL_All);
1053  }
1054  }
1055 
1056  // Make sure the loop increments are also accounted for.
1057 
1058  Exclude.clear();
1059  for (auto &DRS : RootSets) {
1060  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1061  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1062  Exclude.insert(DRS.BaseInst);
1063  }
1064 
1066  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1067  for (auto *I : V) {
1068  Uses[I].set(IL_All);
1069  }
1070 
1071  return true;
1072 
1073 }
1074 
1075 /// Get the next instruction in "In" that is a member of set Val.
1076 /// Start searching from StartI, and do not return anything in Exclude.
1077 /// If StartI is not given, start from In.begin().
1079 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1080  const SmallInstructionSet &Exclude,
1081  UsesTy::iterator *StartI) {
1082  UsesTy::iterator I = StartI ? *StartI : In.begin();
1083  while (I != In.end() && (I->second.test(Val) == 0 ||
1084  Exclude.count(I->first) != 0))
1085  ++I;
1086  return I;
1087 }
1088 
1089 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1090  for (auto &DRS : RootSets) {
1091  if (DRS.BaseInst == I)
1092  return true;
1093  }
1094  return false;
1095 }
1096 
1097 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1098  for (auto &DRS : RootSets) {
1099  if (is_contained(DRS.Roots, I))
1100  return true;
1101  }
1102  return false;
1103 }
1104 
1105 /// Return true if instruction I depends on any instruction between
1106 /// Start and End.
1107 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1108  UsesTy::iterator Start,
1109  UsesTy::iterator End) {
1110  for (auto *U : I->users()) {
1111  for (auto It = Start; It != End; ++It)
1112  if (U == It->first)
1113  return true;
1114  }
1115  return false;
1116 }
1117 
1118 static bool isIgnorableInst(const Instruction *I) {
1119  if (isa<DbgInfoIntrinsic>(I))
1120  return true;
1121  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1122  if (!II)
1123  return false;
1124  switch (II->getIntrinsicID()) {
1125  default:
1126  return false;
1127  case llvm::Intrinsic::annotation:
1128  case Intrinsic::ptr_annotation:
1129  case Intrinsic::var_annotation:
1130  // TODO: the following intrinsics may also be whitelisted:
1131  // lifetime_start, lifetime_end, invariant_start, invariant_end
1132  return true;
1133  }
1134  return false;
1135 }
1136 
1137 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1138  // We now need to check for equivalence of the use graph of each root with
1139  // that of the primary induction variable (excluding the roots). Our goal
1140  // here is not to solve the full graph isomorphism problem, but rather to
1141  // catch common cases without a lot of work. As a result, we will assume
1142  // that the relative order of the instructions in each unrolled iteration
1143  // is the same (although we will not make an assumption about how the
1144  // different iterations are intermixed). Note that while the order must be
1145  // the same, the instructions may not be in the same basic block.
1146 
1147  // An array of just the possible reductions for this scale factor. When we
1148  // collect the set of all users of some root instructions, these reduction
1149  // instructions are treated as 'final' (their uses are not considered).
1150  // This is important because we don't want the root use set to search down
1151  // the reduction chain.
1152  SmallInstructionSet PossibleRedSet;
1153  SmallInstructionSet PossibleRedLastSet;
1154  SmallInstructionSet PossibleRedPHISet;
1155  Reductions.restrictToScale(Scale, PossibleRedSet,
1156  PossibleRedPHISet, PossibleRedLastSet);
1157 
1158  // Populate "Uses" with where each instruction is used.
1159  if (!collectUsedInstructions(PossibleRedSet))
1160  return false;
1161 
1162  // Make sure we mark the reduction PHIs as used in all iterations.
1163  for (auto *I : PossibleRedPHISet) {
1164  Uses[I].set(IL_All);
1165  }
1166 
1167  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1168  // comment above LoopReroll::isLoopControlIV for more information.
1169  BasicBlock *Header = L->getHeader();
1170  if (LoopControlIV && LoopControlIV != IV) {
1171  for (auto *U : LoopControlIV->users()) {
1172  Instruction *IVUser = dyn_cast<Instruction>(U);
1173  // IVUser could be loop increment or compare
1174  Uses[IVUser].set(IL_All);
1175  for (auto *UU : IVUser->users()) {
1176  Instruction *UUser = dyn_cast<Instruction>(UU);
1177  // UUser could be compare, PHI or branch
1178  Uses[UUser].set(IL_All);
1179  // Skip SExt
1180  if (isa<SExtInst>(UUser)) {
1181  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1182  Uses[UUser].set(IL_All);
1183  }
1184  // Is UUser a compare instruction?
1185  if (UU->hasOneUse()) {
1186  Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1187  if (BI == cast<BranchInst>(Header->getTerminator()))
1188  Uses[BI].set(IL_All);
1189  }
1190  }
1191  }
1192  }
1193 
1194  // Make sure all instructions in the loop are in one and only one
1195  // set.
1196  for (auto &KV : Uses) {
1197  if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1198  DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1199  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1200  return false;
1201  }
1202  }
1203 
1204  DEBUG(
1205  for (auto &KV : Uses) {
1206  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1207  }
1208  );
1209 
1210  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1211  // In addition to regular aliasing information, we need to look for
1212  // instructions from later (future) iterations that have side effects
1213  // preventing us from reordering them past other instructions with side
1214  // effects.
1215  bool FutureSideEffects = false;
1216  AliasSetTracker AST(*AA);
1217  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1219 
1220  // Compare iteration Iter to the base.
1221  SmallInstructionSet Visited;
1222  auto BaseIt = nextInstr(0, Uses, Visited);
1223  auto RootIt = nextInstr(Iter, Uses, Visited);
1224  auto LastRootIt = Uses.begin();
1225 
1226  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1227  Instruction *BaseInst = BaseIt->first;
1228  Instruction *RootInst = RootIt->first;
1229 
1230  // Skip over the IV or root instructions; only match their users.
1231  bool Continue = false;
1232  if (isBaseInst(BaseInst)) {
1233  Visited.insert(BaseInst);
1234  BaseIt = nextInstr(0, Uses, Visited);
1235  Continue = true;
1236  }
1237  if (isRootInst(RootInst)) {
1238  LastRootIt = RootIt;
1239  Visited.insert(RootInst);
1240  RootIt = nextInstr(Iter, Uses, Visited);
1241  Continue = true;
1242  }
1243  if (Continue) continue;
1244 
1245  if (!BaseInst->isSameOperationAs(RootInst)) {
1246  // Last chance saloon. We don't try and solve the full isomorphism
1247  // problem, but try and at least catch the case where two instructions
1248  // *of different types* are round the wrong way. We won't be able to
1249  // efficiently tell, given two ADD instructions, which way around we
1250  // should match them, but given an ADD and a SUB, we can at least infer
1251  // which one is which.
1252  //
1253  // This should allow us to deal with a greater subset of the isomorphism
1254  // problem. It does however change a linear algorithm into a quadratic
1255  // one, so limit the number of probes we do.
1256  auto TryIt = RootIt;
1257  unsigned N = NumToleratedFailedMatches;
1258  while (TryIt != Uses.end() &&
1259  !BaseInst->isSameOperationAs(TryIt->first) &&
1260  N--) {
1261  ++TryIt;
1262  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1263  }
1264 
1265  if (TryIt == Uses.end() || TryIt == RootIt ||
1266  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1267  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1268  " vs. " << *RootInst << "\n");
1269  return false;
1270  }
1271 
1272  RootIt = TryIt;
1273  RootInst = TryIt->first;
1274  }
1275 
1276  // All instructions between the last root and this root
1277  // may belong to some other iteration. If they belong to a
1278  // future iteration, then they're dangerous to alias with.
1279  //
1280  // Note that because we allow a limited amount of flexibility in the order
1281  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1282  // case we've already checked this set of instructions so we shouldn't
1283  // do anything.
1284  for (; LastRootIt < RootIt; ++LastRootIt) {
1285  Instruction *I = LastRootIt->first;
1286  if (LastRootIt->second.find_first() < (int)Iter)
1287  continue;
1288  if (I->mayWriteToMemory())
1289  AST.add(I);
1290  // Note: This is specifically guarded by a check on isa<PHINode>,
1291  // which while a valid (somewhat arbitrary) micro-optimization, is
1292  // needed because otherwise isSafeToSpeculativelyExecute returns
1293  // false on PHI nodes.
1294  if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1296  // Intervening instructions cause side effects.
1297  FutureSideEffects = true;
1298  }
1299 
1300  // Make sure that this instruction, which is in the use set of this
1301  // root instruction, does not also belong to the base set or the set of
1302  // some other root instruction.
1303  if (RootIt->second.count() > 1) {
1304  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1305  " vs. " << *RootInst << " (prev. case overlap)\n");
1306  return false;
1307  }
1308 
1309  // Make sure that we don't alias with any instruction in the alias set
1310  // tracker. If we do, then we depend on a future iteration, and we
1311  // can't reroll.
1312  if (RootInst->mayReadFromMemory())
1313  for (auto &K : AST) {
1314  if (K.aliasesUnknownInst(RootInst, *AA)) {
1315  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1316  " vs. " << *RootInst << " (depends on future store)\n");
1317  return false;
1318  }
1319  }
1320 
1321  // If we've past an instruction from a future iteration that may have
1322  // side effects, and this instruction might also, then we can't reorder
1323  // them, and this matching fails. As an exception, we allow the alias
1324  // set tracker to handle regular (unordered) load/store dependencies.
1325  if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1326  !isSafeToSpeculativelyExecute(BaseInst)) ||
1327  (!isUnorderedLoadStore(RootInst) &&
1328  !isSafeToSpeculativelyExecute(RootInst)))) {
1329  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1330  " vs. " << *RootInst <<
1331  " (side effects prevent reordering)\n");
1332  return false;
1333  }
1334 
1335  // For instructions that are part of a reduction, if the operation is
1336  // associative, then don't bother matching the operands (because we
1337  // already know that the instructions are isomorphic, and the order
1338  // within the iteration does not matter). For non-associative reductions,
1339  // we do need to match the operands, because we need to reject
1340  // out-of-order instructions within an iteration!
1341  // For example (assume floating-point addition), we need to reject this:
1342  // x += a[i]; x += b[i];
1343  // x += a[i+1]; x += b[i+1];
1344  // x += b[i+2]; x += a[i+2];
1345  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1346 
1347  if (!(InReduction && BaseInst->isAssociative())) {
1348  bool Swapped = false, SomeOpMatched = false;
1349  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1350  Value *Op2 = RootInst->getOperand(j);
1351 
1352  // If this is part of a reduction (and the operation is not
1353  // associatve), then we match all operands, but not those that are
1354  // part of the reduction.
1355  if (InReduction)
1356  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1357  if (Reductions.isPairInSame(RootInst, Op2I))
1358  continue;
1359 
1360  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1361  if (BMI != BaseMap.end()) {
1362  Op2 = BMI->second;
1363  } else {
1364  for (auto &DRS : RootSets) {
1365  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1366  Op2 = DRS.BaseInst;
1367  break;
1368  }
1369  }
1370  }
1371 
1372  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1373  // If we've not already decided to swap the matched operands, and
1374  // we've not already matched our first operand (note that we could
1375  // have skipped matching the first operand because it is part of a
1376  // reduction above), and the instruction is commutative, then try
1377  // the swapped match.
1378  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1379  BaseInst->getOperand(!j) == Op2) {
1380  Swapped = true;
1381  } else {
1382  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1383  << " vs. " << *RootInst << " (operand " << j << ")\n");
1384  return false;
1385  }
1386  }
1387 
1388  SomeOpMatched = true;
1389  }
1390  }
1391 
1392  if ((!PossibleRedLastSet.count(BaseInst) &&
1393  hasUsesOutsideLoop(BaseInst, L)) ||
1394  (!PossibleRedLastSet.count(RootInst) &&
1395  hasUsesOutsideLoop(RootInst, L))) {
1396  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1397  " vs. " << *RootInst << " (uses outside loop)\n");
1398  return false;
1399  }
1400 
1401  Reductions.recordPair(BaseInst, RootInst, Iter);
1402  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1403 
1404  LastRootIt = RootIt;
1405  Visited.insert(BaseInst);
1406  Visited.insert(RootInst);
1407  BaseIt = nextInstr(0, Uses, Visited);
1408  RootIt = nextInstr(Iter, Uses, Visited);
1409  }
1410  assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
1411  "Mismatched set sizes!");
1412  }
1413 
1414  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
1415  *IV << "\n");
1416 
1417  return true;
1418 }
1419 
1420 void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
1421  BasicBlock *Header = L->getHeader();
1422  // Remove instructions associated with non-base iterations.
1423  for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
1424  J != JE;) {
1425  unsigned I = Uses[&*J].find_first();
1426  if (I > 0 && I < IL_All) {
1427  DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
1428  J++->eraseFromParent();
1429  continue;
1430  }
1431 
1432  ++J;
1433  }
1434 
1435  bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
1436 
1437  if (HasTwoIVs) {
1438  updateNonLoopCtrlIncr();
1439  replaceIV(LoopControlIV, LoopControlIV, IterCount);
1440  } else
1441  // We need to create a new induction variable for each different BaseInst.
1442  for (auto &DRS : RootSets)
1443  // Insert the new induction variable.
1444  replaceIV(DRS.BaseInst, IV, IterCount);
1445 
1446  SimplifyInstructionsInBlock(Header, TLI);
1447  DeleteDeadPHIs(Header, TLI);
1448 }
1449 
1450 // For non-loop-control IVs, we only need to update the last increment
1451 // with right amount, then we are done.
1452 void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
1453  const SCEV *NewInc = nullptr;
1454  for (auto *LoopInc : LoopIncs) {
1456  const SCEVConstant *COp = nullptr;
1457  if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
1458  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1459  } else {
1460  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
1461  if (!COp)
1462  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1463  }
1464 
1465  assert(COp && "Didn't find constant operand of LoopInc!\n");
1466 
1467  const APInt &AInt = COp->getValue()->getValue();
1468  const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
1469  if (AInt.isNegative()) {
1470  NewInc = SE->getNegativeSCEV(COp);
1471  NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
1472  NewInc = SE->getNegativeSCEV(NewInc);
1473  } else
1474  NewInc = SE->getUDivExpr(COp, ScaleSCEV);
1475 
1476  LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
1477  }
1478 }
1479 
1480 void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
1481  Instruction *InstIV,
1482  const SCEV *IterCount) {
1483  BasicBlock *Header = L->getHeader();
1484  int64_t Inc = IVToIncMap[InstIV];
1485  bool NeedNewIV = InstIV == LoopControlIV;
1486  bool Negative = !NeedNewIV && Inc < 0;
1487 
1488  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
1489  const SCEV *Start = RealIVSCEV->getStart();
1490 
1491  if (NeedNewIV)
1492  Start = SE->getConstant(Start->getType(), 0);
1493 
1494  const SCEV *SizeOfExpr = nullptr;
1495  const SCEV *IncrExpr =
1496  SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
1497  if (auto *PTy = dyn_cast<PointerType>(Inst->getType())) {
1498  Type *ElTy = PTy->getElementType();
1499  SizeOfExpr =
1500  SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy);
1501  IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr);
1502  }
1503  const SCEV *NewIVSCEV =
1504  SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1505 
1506  { // Limit the lifetime of SCEVExpander.
1507  const DataLayout &DL = Header->getModule()->getDataLayout();
1508  SCEVExpander Expander(*SE, DL, "reroll");
1509  Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1510  Header->getFirstNonPHIOrDbg());
1511 
1512  for (auto &KV : Uses)
1513  if (KV.second.find_first() == 0)
1514  KV.first->replaceUsesOfWith(Inst, NewIV);
1515 
1516  if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
1517  // FIXME: Why do we need this check?
1518  if (Uses[BI].find_first() == IL_All) {
1519  const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1520 
1521  if (NeedNewIV)
1522  ICSCEV = SE->getMulExpr(IterCount,
1523  SE->getConstant(IterCount->getType(), Scale));
1524 
1525  // Iteration count SCEV minus or plus 1
1526  const SCEV *MinusPlus1SCEV =
1527  SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
1528  if (Inst->getType()->isPointerTy()) {
1529  assert(SizeOfExpr && "SizeOfExpr is not initialized");
1530  MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr);
1531  }
1532 
1533  const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
1534  // Iteration count minus 1
1535  Instruction *InsertPtr = nullptr;
1536  if (isa<SCEVConstant>(ICMinusPlus1SCEV)) {
1537  InsertPtr = BI;
1538  } else {
1539  BasicBlock *Preheader = L->getLoopPreheader();
1540  if (!Preheader)
1541  Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
1542  InsertPtr = Preheader->getTerminator();
1543  }
1544 
1545  if (!isa<PointerType>(NewIV->getType()) && NeedNewIV &&
1546  (SE->getTypeSizeInBits(NewIV->getType()) <
1547  SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) {
1548  IRBuilder<> Builder(BI);
1549  Builder.SetCurrentDebugLocation(BI->getDebugLoc());
1550  NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType());
1551  }
1552  Value *ICMinusPlus1 = Expander.expandCodeFor(
1553  ICMinusPlus1SCEV, NewIV->getType(), InsertPtr);
1554 
1555  Value *Cond =
1556  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
1557  BI->setCondition(Cond);
1558 
1559  if (BI->getSuccessor(1) != Header)
1560  BI->swapSuccessors();
1561  }
1562  }
1563  }
1564 }
1565 
1566 // Validate the selected reductions. All iterations must have an isomorphic
1567 // part of the reduction chain and, for non-associative reductions, the chain
1568 // entries must appear in order.
1569 bool LoopReroll::ReductionTracker::validateSelected() {
1570  // For a non-associative reduction, the chain entries must appear in order.
1571  for (int i : Reds) {
1572  int PrevIter = 0, BaseCount = 0, Count = 0;
1573  for (Instruction *J : PossibleReds[i]) {
1574  // Note that all instructions in the chain must have been found because
1575  // all instructions in the function must have been assigned to some
1576  // iteration.
1577  int Iter = PossibleRedIter[J];
1578  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1579  !PossibleReds[i].getReducedValue()->isAssociative()) {
1580  DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
1581  J << "\n");
1582  return false;
1583  }
1584 
1585  if (Iter != PrevIter) {
1586  if (Count != BaseCount) {
1587  DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
1588  " reduction use count " << Count <<
1589  " is not equal to the base use count " <<
1590  BaseCount << "\n");
1591  return false;
1592  }
1593 
1594  Count = 0;
1595  }
1596 
1597  ++Count;
1598  if (Iter == 0)
1599  ++BaseCount;
1600 
1601  PrevIter = Iter;
1602  }
1603  }
1604 
1605  return true;
1606 }
1607 
1608 // For all selected reductions, remove all parts except those in the first
1609 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1610 // of the first-iteration reduced value (in other words, reroll the selected
1611 // reductions).
1612 void LoopReroll::ReductionTracker::replaceSelected() {
1613  // Fixup reductions to refer to the last instruction associated with the
1614  // first iteration (not the last).
1615  for (int i : Reds) {
1616  int j = 0;
1617  for (int e = PossibleReds[i].size(); j != e; ++j)
1618  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1619  --j;
1620  break;
1621  }
1622 
1623  // Replace users with the new end-of-chain value.
1624  SmallInstructionVector Users;
1625  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1626  Users.push_back(cast<Instruction>(U));
1627  }
1628 
1629  for (Instruction *User : Users)
1630  User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1631  PossibleReds[i][j]);
1632  }
1633 }
1634 
1635 // Reroll the provided loop with respect to the provided induction variable.
1636 // Generally, we're looking for a loop like this:
1637 //
1638 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1639 // f(%iv)
1640 // %iv.1 = add %iv, 1 <-- a root increment
1641 // f(%iv.1)
1642 // %iv.2 = add %iv, 2 <-- a root increment
1643 // f(%iv.2)
1644 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1645 // f(%iv.scale_m_1)
1646 // ...
1647 // %iv.next = add %iv, scale
1648 // %cmp = icmp(%iv, ...)
1649 // br %cmp, header, exit
1650 //
1651 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1652 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1653 // be intermixed with eachother. The restriction imposed by this algorithm is
1654 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1655 // etc. be the same.
1656 //
1657 // First, we collect the use set of %iv, excluding the other increment roots.
1658 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1659 // times, having collected the use set of f(%iv.(i+1)), during which we:
1660 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1661 // the next unmatched instruction in f(%iv.(i+1)).
1662 // - Ensure that both matched instructions don't have any external users
1663 // (with the exception of last-in-chain reduction instructions).
1664 // - Track the (aliasing) write set, and other side effects, of all
1665 // instructions that belong to future iterations that come before the matched
1666 // instructions. If the matched instructions read from that write set, then
1667 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1668 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1669 // if any of these future instructions had side effects (could not be
1670 // speculatively executed), and so do the matched instructions, when we
1671 // cannot reorder those side-effect-producing instructions, and rerolling
1672 // fails.
1673 //
1674 // Finally, we make sure that all loop instructions are either loop increment
1675 // roots, belong to simple latch code, parts of validated reductions, part of
1676 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1677 // have been validated), then we reroll the loop.
1678 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1679  const SCEV *IterCount,
1680  ReductionTracker &Reductions) {
1681  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1682  IVToIncMap, LoopControlIV);
1683 
1684  if (!DAGRoots.findRoots())
1685  return false;
1686  DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
1687  *IV << "\n");
1688 
1689  if (!DAGRoots.validate(Reductions))
1690  return false;
1691  if (!Reductions.validateSelected())
1692  return false;
1693  // At this point, we've validated the rerolling, and we're committed to
1694  // making changes!
1695 
1696  Reductions.replaceSelected();
1697  DAGRoots.replace(IterCount);
1698 
1699  ++NumRerolledLoops;
1700  return true;
1701 }
1702 
1703 bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
1704  if (skipLoop(L))
1705  return false;
1706 
1707  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1708  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1709  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1710  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1711  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1712  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1713 
1714  BasicBlock *Header = L->getHeader();
1715  DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
1716  "] Loop %" << Header->getName() << " (" <<
1717  L->getNumBlocks() << " block(s))\n");
1718 
1719  // For now, we'll handle only single BB loops.
1720  if (L->getNumBlocks() > 1)
1721  return false;
1722 
1723  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1724  return false;
1725 
1726  const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
1727  const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
1728  DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1729  DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
1730 
1731  // First, we need to find the induction variable with respect to which we can
1732  // reroll (there may be several possible options).
1733  SmallInstructionVector PossibleIVs;
1734  IVToIncMap.clear();
1735  LoopControlIV = nullptr;
1736  collectPossibleIVs(L, PossibleIVs);
1737 
1738  if (PossibleIVs.empty()) {
1739  DEBUG(dbgs() << "LRR: No possible IVs found\n");
1740  return false;
1741  }
1742 
1743  ReductionTracker Reductions;
1744  collectPossibleReductions(L, Reductions);
1745  bool Changed = false;
1746 
1747  // For each possible IV, collect the associated possible set of 'root' nodes
1748  // (i+1, i+2, etc.).
1749  for (Instruction *PossibleIV : PossibleIVs)
1750  if (reroll(PossibleIV, L, Header, IterCount, Reductions)) {
1751  Changed = true;
1752  break;
1753  }
1754  DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1755 
1756  // Trip count of L has changed so SE must be re-evaluated.
1757  if (Changed)
1758  SE->forgetLoop(L);
1759 
1760  return Changed;
1761 }
MachineLoop * L
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
VectorType::iterator iterator
Definition: MapVector.h:40
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
void push_back(const T &Elt)
Definition: SmallVector.h:211
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
Pass * createLoopRerollPass()
iterator_range< use_iterator > uses()
Definition: Value.h:326
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
size_t i
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
Implements a dense probed hash-table based set.
Definition: DenseSet.h:202
unsigned getNumOperands() const
Definition: User.h:167
The main scalar evolution driver.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:32
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:489
void initializeLoopRerollPass(PassRegistry &)
reverse_iterator rend()
Definition: BasicBlock.h:235
An instruction for reading from memory.
Definition: Instructions.h:164
reverse_iterator rbegin()
Definition: BasicBlock.h:233
Hexagon Common GEP
iv Induction Variable Users
Definition: IVUsers.cpp:51
Type * getElementType() const
Definition: DerivedTypes.h:462
op_iterator op_begin()
Definition: User.h:205
BlockT * getHeader() const
Definition: LoopInfo.h:102
const SCEV * getStart() const
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:239
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:324
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
Instruction * getFirstNonPHIOrDbg()
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:187
static bool isIgnorableInst(const Instruction *I)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
static cl::opt< unsigned > MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, cl::desc("The maximum increment for loop rerolling"))
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:143
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
static bool isUnorderedLoadStore(Instruction *I)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
This node represents multiplication of some number of SCEVs.
static const SCEVConstant * getIncrmentFactorSCEV(ScalarEvolution *SE, const SCEV *SCEVExpr, Instruction &IV)
bool mayReadFromMemory() const
Return true if this instruction may read memory.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAssociative() const
Return true if the instruction is associative:
Function Alias Analysis false
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:239
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
An instruction for storing to memory.
Definition: Instructions.h:300
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
Class to represent pointers.
Definition: DerivedTypes.h:443
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:24
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
#define P(N)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Type * getType() const
Return the LLVM type of this SCEV expression.
Conditional or Unconditional Branch instruction.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
char & LCSSAID
Definition: LCSSA.cpp:379
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:259
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:207
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
This instruction compares its operands according to the predicate given to the constructor.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
static const unsigned End
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1119
Value * getOperand(unsigned i) const
Definition: User.h:145
static bool isLoopIncrement(User *U, Instruction *IV)
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:385
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
BinaryOps getOpcode() const
Definition: InstrTypes.h:541
IterationLimits
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Provides information about what library functions are available for the current target.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:50
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate"" during fuzzy matching. (default: 400)"))
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:198
ConstantInt * getValue() const
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
iterator_range< user_iterator > users()
Definition: Value.h:370
This class uses information about analyze scalars to rewrite expressions in canonical form...
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
iv users
Definition: IVUsers.cpp:51
This class represents an analyzed expression in the program.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:148
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
bool isBinaryOp() const
Definition: Instruction.h:115
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:383
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1099
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:938
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
bool use_empty() const
Definition: Value.h:299
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:346
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:71
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
bool user_empty() const
Definition: Value.h:335
#define DEBUG(X)
Definition: Debug.h:100
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
IRTranslator LLVM IR MI
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:209
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1559
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:137
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:162
const BasicBlock * getParent() const
Definition: Instruction.h:62
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
loops
Definition: LoopInfo.cpp:709
This class represents a constant integer value.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:783