LLVM  6.0.0svn
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop reroller.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Use.h"
46 #include "llvm/IR/User.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
53 #include "llvm/Transforms/Scalar.h"
57 #include <cassert>
58 #include <cstddef>
59 #include <cstdint>
60 #include <cstdlib>
61 #include <iterator>
62 #include <map>
63 #include <utility>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "loop-reroll"
68 
69 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
70 
71 static cl::opt<unsigned>
72 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
73  cl::desc("The maximum increment for loop rerolling"));
74 
75 static cl::opt<unsigned>
76 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
77  cl::Hidden,
78  cl::desc("The maximum number of failures to tolerate"
79  " during fuzzy matching. (default: 400)"));
80 
81 // This loop re-rolling transformation aims to transform loops like this:
82 //
83 // int foo(int a);
84 // void bar(int *x) {
85 // for (int i = 0; i < 500; i += 3) {
86 // foo(i);
87 // foo(i+1);
88 // foo(i+2);
89 // }
90 // }
91 //
92 // into a loop like this:
93 //
94 // void bar(int *x) {
95 // for (int i = 0; i < 500; ++i)
96 // foo(i);
97 // }
98 //
99 // It does this by looking for loops that, besides the latch code, are composed
100 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
101 // to the induction variable, and where each DAG is isomorphic to the DAG
102 // rooted at the induction variable (excepting the sub-DAGs which root the
103 // other induction-variable increments). In other words, we're looking for loop
104 // bodies of the form:
105 //
106 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
107 // f(%iv)
108 // %iv.1 = add %iv, 1 <-- a root increment
109 // f(%iv.1)
110 // %iv.2 = add %iv, 2 <-- a root increment
111 // f(%iv.2)
112 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
113 // f(%iv.scale_m_1)
114 // ...
115 // %iv.next = add %iv, scale
116 // %cmp = icmp(%iv, ...)
117 // br %cmp, header, exit
118 //
119 // where each f(i) is a set of instructions that, collectively, are a function
120 // only of i (and other loop-invariant values).
121 //
122 // As a special case, we can also reroll loops like this:
123 //
124 // int foo(int);
125 // void bar(int *x) {
126 // for (int i = 0; i < 500; ++i) {
127 // x[3*i] = foo(0);
128 // x[3*i+1] = foo(0);
129 // x[3*i+2] = foo(0);
130 // }
131 // }
132 //
133 // into this:
134 //
135 // void bar(int *x) {
136 // for (int i = 0; i < 1500; ++i)
137 // x[i] = foo(0);
138 // }
139 //
140 // in which case, we're looking for inputs like this:
141 //
142 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
143 // %scaled.iv = mul %iv, scale
144 // f(%scaled.iv)
145 // %scaled.iv.1 = add %scaled.iv, 1
146 // f(%scaled.iv.1)
147 // %scaled.iv.2 = add %scaled.iv, 2
148 // f(%scaled.iv.2)
149 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
150 // f(%scaled.iv.scale_m_1)
151 // ...
152 // %iv.next = add %iv, 1
153 // %cmp = icmp(%iv, ...)
154 // br %cmp, header, exit
155 
156 namespace {
157 
159  /// The maximum number of iterations that we'll try and reroll.
160  IL_MaxRerollIterations = 32,
161  /// The bitvector index used by loop induction variables and other
162  /// instructions that belong to all iterations.
163  IL_All,
164  IL_End
165  };
166 
167  class LoopReroll : public LoopPass {
168  public:
169  static char ID; // Pass ID, replacement for typeid
170 
171  LoopReroll() : LoopPass(ID) {
173  }
174 
175  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
176 
177  void getAnalysisUsage(AnalysisUsage &AU) const override {
180  }
181 
182  protected:
183  AliasAnalysis *AA;
184  LoopInfo *LI;
185  ScalarEvolution *SE;
186  TargetLibraryInfo *TLI;
187  DominatorTree *DT;
188  bool PreserveLCSSA;
189 
190  using SmallInstructionVector = SmallVector<Instruction *, 16>;
191  using SmallInstructionSet = SmallSet<Instruction *, 16>;
192 
193  // Map between induction variable and its increment
195 
196  // For loop with multiple induction variable, remember the one used only to
197  // control the loop.
198  Instruction *LoopControlIV;
199 
200  // A chain of isomorphic instructions, identified by a single-use PHI
201  // representing a reduction. Only the last value may be used outside the
202  // loop.
203  struct SimpleLoopReduction {
204  SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
205  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
206  add(L);
207  }
208 
209  bool valid() const {
210  return Valid;
211  }
212 
213  Instruction *getPHI() const {
214  assert(Valid && "Using invalid reduction");
215  return Instructions.front();
216  }
217 
218  Instruction *getReducedValue() const {
219  assert(Valid && "Using invalid reduction");
220  return Instructions.back();
221  }
222 
223  Instruction *get(size_t i) const {
224  assert(Valid && "Using invalid reduction");
225  return Instructions[i+1];
226  }
227 
228  Instruction *operator [] (size_t i) const { return get(i); }
229 
230  // The size, ignoring the initial PHI.
231  size_t size() const {
232  assert(Valid && "Using invalid reduction");
233  return Instructions.size()-1;
234  }
235 
236  using iterator = SmallInstructionVector::iterator;
237  using const_iterator = SmallInstructionVector::const_iterator;
238 
239  iterator begin() {
240  assert(Valid && "Using invalid reduction");
241  return std::next(Instructions.begin());
242  }
243 
244  const_iterator begin() const {
245  assert(Valid && "Using invalid reduction");
246  return std::next(Instructions.begin());
247  }
248 
249  iterator end() { return Instructions.end(); }
250  const_iterator end() const { return Instructions.end(); }
251 
252  protected:
253  bool Valid = false;
254  SmallInstructionVector Instructions;
255 
256  void add(Loop *L);
257  };
258 
259  // The set of all reductions, and state tracking of possible reductions
260  // during loop instruction processing.
261  struct ReductionTracker {
262  using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
263 
264  // Add a new possible reduction.
265  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
266 
267  // Setup to track possible reductions corresponding to the provided
268  // rerolling scale. Only reductions with a number of non-PHI instructions
269  // that is divisible by the scale are considered. Three instructions sets
270  // are filled in:
271  // - A set of all possible instructions in eligible reductions.
272  // - A set of all PHIs in eligible reductions
273  // - A set of all reduced values (last instructions) in eligible
274  // reductions.
275  void restrictToScale(uint64_t Scale,
276  SmallInstructionSet &PossibleRedSet,
277  SmallInstructionSet &PossibleRedPHISet,
278  SmallInstructionSet &PossibleRedLastSet) {
279  PossibleRedIdx.clear();
280  PossibleRedIter.clear();
281  Reds.clear();
282 
283  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
284  if (PossibleReds[i].size() % Scale == 0) {
285  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
286  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
287 
288  PossibleRedSet.insert(PossibleReds[i].getPHI());
289  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
290  for (Instruction *J : PossibleReds[i]) {
291  PossibleRedSet.insert(J);
292  PossibleRedIdx[J] = i;
293  }
294  }
295  }
296 
297  // The functions below are used while processing the loop instructions.
298 
299  // Are the two instructions both from reductions, and furthermore, from
300  // the same reduction?
301  bool isPairInSame(Instruction *J1, Instruction *J2) {
302  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
303  if (J1I != PossibleRedIdx.end()) {
304  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
305  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
306  return true;
307  }
308 
309  return false;
310  }
311 
312  // The two provided instructions, the first from the base iteration, and
313  // the second from iteration i, form a matched pair. If these are part of
314  // a reduction, record that fact.
315  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
316  if (PossibleRedIdx.count(J1)) {
317  assert(PossibleRedIdx.count(J2) &&
318  "Recording reduction vs. non-reduction instruction?");
319 
320  PossibleRedIter[J1] = 0;
321  PossibleRedIter[J2] = i;
322 
323  int Idx = PossibleRedIdx[J1];
324  assert(Idx == PossibleRedIdx[J2] &&
325  "Recording pair from different reductions?");
326  Reds.insert(Idx);
327  }
328  }
329 
330  // The functions below can be called after we've finished processing all
331  // instructions in the loop, and we know which reductions were selected.
332 
333  bool validateSelected();
334  void replaceSelected();
335 
336  protected:
337  // The vector of all possible reductions (for any scale).
338  SmallReductionVector PossibleReds;
339 
340  DenseMap<Instruction *, int> PossibleRedIdx;
341  DenseMap<Instruction *, int> PossibleRedIter;
342  DenseSet<int> Reds;
343  };
344 
345  // A DAGRootSet models an induction variable being used in a rerollable
346  // loop. For example,
347  //
348  // x[i*3+0] = y1
349  // x[i*3+1] = y2
350  // x[i*3+2] = y3
351  //
352  // Base instruction -> i*3
353  // +---+----+
354  // / | \
355  // ST[y1] +1 +2 <-- Roots
356  // | |
357  // ST[y2] ST[y3]
358  //
359  // There may be multiple DAGRoots, for example:
360  //
361  // x[i*2+0] = ... (1)
362  // x[i*2+1] = ... (1)
363  // x[i*2+4] = ... (2)
364  // x[i*2+5] = ... (2)
365  // x[(i+1234)*2+5678] = ... (3)
366  // x[(i+1234)*2+5679] = ... (3)
367  //
368  // The loop will be rerolled by adding a new loop induction variable,
369  // one for the Base instruction in each DAGRootSet.
370  //
371  struct DAGRootSet {
372  Instruction *BaseInst;
373  SmallInstructionVector Roots;
374 
375  // The instructions between IV and BaseInst (but not including BaseInst).
376  SmallInstructionSet SubsumedInsts;
377  };
378 
379  // The set of all DAG roots, and state tracking of all roots
380  // for a particular induction variable.
381  struct DAGRootTracker {
382  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
385  bool PreserveLCSSA,
387  Instruction *LoopCtrlIV)
388  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
389  PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
390  LoopControlIV(LoopCtrlIV) {}
391 
392  /// Stage 1: Find all the DAG roots for the induction variable.
393  bool findRoots();
394 
395  /// Stage 2: Validate if the found roots are valid.
396  bool validate(ReductionTracker &Reductions);
397 
398  /// Stage 3: Assuming validate() returned true, perform the
399  /// replacement.
400  /// @param IterCount The maximum iteration count of L.
401  void replace(const SCEV *IterCount);
402 
403  protected:
405 
406  void findRootsRecursive(Instruction *IVU,
407  SmallInstructionSet SubsumedInsts);
408  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
409  bool collectPossibleRoots(Instruction *Base,
410  std::map<int64_t,Instruction*> &Roots);
411  bool validateRootSet(DAGRootSet &DRS);
412 
413  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
414  void collectInLoopUserSet(const SmallInstructionVector &Roots,
415  const SmallInstructionSet &Exclude,
416  const SmallInstructionSet &Final,
418  void collectInLoopUserSet(Instruction *Root,
419  const SmallInstructionSet &Exclude,
420  const SmallInstructionSet &Final,
421  DenseSet<Instruction *> &Users);
422 
423  UsesTy::iterator nextInstr(int Val, UsesTy &In,
424  const SmallInstructionSet &Exclude,
425  UsesTy::iterator *StartI=nullptr);
426  bool isBaseInst(Instruction *I);
427  bool isRootInst(Instruction *I);
428  bool instrDependsOn(Instruction *I,
429  UsesTy::iterator Start,
430  UsesTy::iterator End);
431  void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
432  void updateNonLoopCtrlIncr();
433 
434  LoopReroll *Parent;
435 
436  // Members of Parent, replicated here for brevity.
437  Loop *L;
438  ScalarEvolution *SE;
439  AliasAnalysis *AA;
440  TargetLibraryInfo *TLI;
441  DominatorTree *DT;
442  LoopInfo *LI;
443  bool PreserveLCSSA;
444 
445  // The loop induction variable.
446  Instruction *IV;
447 
448  // Loop step amount.
449  int64_t Inc;
450 
451  // Loop reroll count; if Inc == 1, this records the scaling applied
452  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
453  // If Inc is not 1, Scale = Inc.
454  uint64_t Scale;
455 
456  // The roots themselves.
458 
459  // All increment instructions for IV.
460  SmallInstructionVector LoopIncs;
461 
462  // Map of all instructions in the loop (in order) to the iterations
463  // they are used in (or specially, IL_All for instructions
464  // used in the loop increment mechanism).
465  UsesTy Uses;
466 
467  // Map between induction variable and its increment
469 
470  Instruction *LoopControlIV;
471  };
472 
473  // Check if it is a compare-like instruction whose user is a branch
474  bool isCompareUsedByBranch(Instruction *I) {
475  auto *TI = I->getParent()->getTerminator();
476  if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
477  return false;
478  return I->hasOneUse() && TI->getOperand(0) == I;
479  };
480 
481  bool isLoopControlIV(Loop *L, Instruction *IV);
482  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
483  void collectPossibleReductions(Loop *L,
484  ReductionTracker &Reductions);
485  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
486  ReductionTracker &Reductions);
487  };
488 
489 } // end anonymous namespace
490 
491 char LoopReroll::ID = 0;
492 
493 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
496 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
497 
499  return new LoopReroll;
500 }
501 
502 // Returns true if the provided instruction is used outside the given loop.
503 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
504 // non-loop blocks to be outside the loop.
506  for (User *U : I->users()) {
507  if (!L->contains(cast<Instruction>(U)))
508  return true;
509  }
510  return false;
511 }
512 
514  const SCEV *SCEVExpr,
515  Instruction &IV) {
516  const SCEVMulExpr *MulSCEV = dyn_cast<SCEVMulExpr>(SCEVExpr);
517 
518  // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)),
519  // Return c1.
520  if (!MulSCEV && IV.getType()->isPointerTy())
521  if (const SCEVConstant *IncSCEV = dyn_cast<SCEVConstant>(SCEVExpr)) {
522  const PointerType *PTy = cast<PointerType>(IV.getType());
523  Type *ElTy = PTy->getElementType();
524  const SCEV *SizeOfExpr =
525  SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy);
526  if (IncSCEV->getValue()->getValue().isNegative()) {
527  const SCEV *NewSCEV =
528  SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr);
529  return dyn_cast<SCEVConstant>(SE->getNegativeSCEV(NewSCEV));
530  } else {
531  return dyn_cast<SCEVConstant>(SE->getUDivExpr(SCEVExpr, SizeOfExpr));
532  }
533  }
534 
535  if (!MulSCEV)
536  return nullptr;
537 
538  // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant,
539  // Return c.
540  const SCEVConstant *CIncSCEV = nullptr;
541  for (const SCEV *Operand : MulSCEV->operands()) {
542  if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Operand)) {
543  CIncSCEV = Constant;
544  } else if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Operand)) {
545  Type *AllocTy;
546  if (!Unknown->isSizeOf(AllocTy))
547  break;
548  } else {
549  return nullptr;
550  }
551  }
552  return CIncSCEV;
553 }
554 
555 // Check if an IV is only used to control the loop. There are two cases:
556 // 1. It only has one use which is loop increment, and the increment is only
557 // used by comparison and the PHI (could has sext with nsw in between), and the
558 // comparison is only used by branch.
559 // 2. It is used by loop increment and the comparison, the loop increment is
560 // only used by the PHI, and the comparison is used only by the branch.
561 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
562  unsigned IVUses = IV->getNumUses();
563  if (IVUses != 2 && IVUses != 1)
564  return false;
565 
566  for (auto *User : IV->users()) {
567  int32_t IncOrCmpUses = User->getNumUses();
568  bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
569 
570  // User can only have one or two uses.
571  if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
572  return false;
573 
574  // Case 1
575  if (IVUses == 1) {
576  // The only user must be the loop increment.
577  // The loop increment must have two uses.
578  if (IsCompInst || IncOrCmpUses != 2)
579  return false;
580  }
581 
582  // Case 2
583  if (IVUses == 2 && IncOrCmpUses != 1)
584  return false;
585 
586  // The users of the IV must be a binary operation or a comparison
587  if (auto *BO = dyn_cast<BinaryOperator>(User)) {
588  if (BO->getOpcode() == Instruction::Add) {
589  // Loop Increment
590  // User of Loop Increment should be either PHI or CMP
591  for (auto *UU : User->users()) {
592  if (PHINode *PN = dyn_cast<PHINode>(UU)) {
593  if (PN != IV)
594  return false;
595  }
596  // Must be a CMP or an ext (of a value with nsw) then CMP
597  else {
598  Instruction *UUser = dyn_cast<Instruction>(UU);
599  // Skip SExt if we are extending an nsw value
600  // TODO: Allow ZExt too
601  if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
602  isa<SExtInst>(UUser))
603  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
604  if (!isCompareUsedByBranch(UUser))
605  return false;
606  }
607  }
608  } else
609  return false;
610  // Compare : can only have one use, and must be branch
611  } else if (!IsCompInst)
612  return false;
613  }
614  return true;
615 }
616 
617 // Collect the list of loop induction variables with respect to which it might
618 // be possible to reroll the loop.
619 void LoopReroll::collectPossibleIVs(Loop *L,
620  SmallInstructionVector &PossibleIVs) {
621  BasicBlock *Header = L->getHeader();
622  for (BasicBlock::iterator I = Header->begin(),
623  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
624  if (!isa<PHINode>(I))
625  continue;
626  if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
627  continue;
628 
629  if (const SCEVAddRecExpr *PHISCEV =
630  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
631  if (PHISCEV->getLoop() != L)
632  continue;
633  if (!PHISCEV->isAffine())
634  continue;
635  const SCEVConstant *IncSCEV = nullptr;
636  if (I->getType()->isPointerTy())
637  IncSCEV =
638  getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I);
639  else
640  IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
641  if (IncSCEV) {
642  const APInt &AInt = IncSCEV->getValue()->getValue().abs();
643  if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
644  continue;
645  IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
646  DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
647  << "\n");
648 
649  if (isLoopControlIV(L, &*I)) {
650  assert(!LoopControlIV && "Found two loop control only IV");
651  LoopControlIV = &(*I);
652  DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
653  << *PHISCEV << "\n");
654  } else
655  PossibleIVs.push_back(&*I);
656  }
657  }
658  }
659 }
660 
661 // Add the remainder of the reduction-variable chain to the instruction vector
662 // (the initial PHINode has already been added). If successful, the object is
663 // marked as valid.
665  assert(!Valid && "Cannot add to an already-valid chain");
666 
667  // The reduction variable must be a chain of single-use instructions
668  // (including the PHI), except for the last value (which is used by the PHI
669  // and also outside the loop).
670  Instruction *C = Instructions.front();
671  if (C->user_empty())
672  return;
673 
674  do {
675  C = cast<Instruction>(*C->user_begin());
676  if (C->hasOneUse()) {
677  if (!C->isBinaryOp())
678  return;
679 
680  if (!(isa<PHINode>(Instructions.back()) ||
681  C->isSameOperationAs(Instructions.back())))
682  return;
683 
684  Instructions.push_back(C);
685  }
686  } while (C->hasOneUse());
687 
688  if (Instructions.size() < 2 ||
689  !C->isSameOperationAs(Instructions.back()) ||
690  C->use_empty())
691  return;
692 
693  // C is now the (potential) last instruction in the reduction chain.
694  for (User *U : C->users()) {
695  // The only in-loop user can be the initial PHI.
696  if (L->contains(cast<Instruction>(U)))
697  if (cast<Instruction>(U) != Instructions.front())
698  return;
699  }
700 
701  Instructions.push_back(C);
702  Valid = true;
703 }
704 
705 // Collect the vector of possible reduction variables.
706 void LoopReroll::collectPossibleReductions(Loop *L,
707  ReductionTracker &Reductions) {
708  BasicBlock *Header = L->getHeader();
709  for (BasicBlock::iterator I = Header->begin(),
710  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
711  if (!isa<PHINode>(I))
712  continue;
713  if (!I->getType()->isSingleValueType())
714  continue;
715 
716  SimpleLoopReduction SLR(&*I, L);
717  if (!SLR.valid())
718  continue;
719 
720  DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
721  SLR.size() << " chained instructions)\n");
722  Reductions.addSLR(SLR);
723  }
724 }
725 
726 // Collect the set of all users of the provided root instruction. This set of
727 // users contains not only the direct users of the root instruction, but also
728 // all users of those users, and so on. There are two exceptions:
729 //
730 // 1. Instructions in the set of excluded instructions are never added to the
731 // use set (even if they are users). This is used, for example, to exclude
732 // including root increments in the use set of the primary IV.
733 //
734 // 2. Instructions in the set of final instructions are added to the use set
735 // if they are users, but their users are not added. This is used, for
736 // example, to prevent a reduction update from forcing all later reduction
737 // updates into the use set.
738 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
739  Instruction *Root, const SmallInstructionSet &Exclude,
740  const SmallInstructionSet &Final,
742  SmallInstructionVector Queue(1, Root);
743  while (!Queue.empty()) {
744  Instruction *I = Queue.pop_back_val();
745  if (!Users.insert(I).second)
746  continue;
747 
748  if (!Final.count(I))
749  for (Use &U : I->uses()) {
750  Instruction *User = cast<Instruction>(U.getUser());
751  if (PHINode *PN = dyn_cast<PHINode>(User)) {
752  // Ignore "wrap-around" uses to PHIs of this loop's header.
753  if (PN->getIncomingBlock(U) == L->getHeader())
754  continue;
755  }
756 
757  if (L->contains(User) && !Exclude.count(User)) {
758  Queue.push_back(User);
759  }
760  }
761 
762  // We also want to collect single-user "feeder" values.
763  for (User::op_iterator OI = I->op_begin(),
764  OIE = I->op_end(); OI != OIE; ++OI) {
765  if (Instruction *Op = dyn_cast<Instruction>(*OI))
766  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
767  !Final.count(Op))
768  Queue.push_back(Op);
769  }
770  }
771 }
772 
773 // Collect all of the users of all of the provided root instructions (combined
774 // into a single set).
775 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
776  const SmallInstructionVector &Roots,
777  const SmallInstructionSet &Exclude,
778  const SmallInstructionSet &Final,
779  DenseSet<Instruction *> &Users) {
780  for (Instruction *Root : Roots)
781  collectInLoopUserSet(Root, Exclude, Final, Users);
782 }
783 
785  if (LoadInst *LI = dyn_cast<LoadInst>(I))
786  return LI->isUnordered();
787  if (StoreInst *SI = dyn_cast<StoreInst>(I))
788  return SI->isUnordered();
789  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
790  return !MI->isVolatile();
791  return false;
792 }
793 
794 /// Return true if IVU is a "simple" arithmetic operation.
795 /// This is used for narrowing the search space for DAGRoots; only arithmetic
796 /// and GEPs can be part of a DAGRoot.
797 static bool isSimpleArithmeticOp(User *IVU) {
798  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
799  switch (I->getOpcode()) {
800  default: return false;
801  case Instruction::Add:
802  case Instruction::Sub:
803  case Instruction::Mul:
804  case Instruction::Shl:
805  case Instruction::AShr:
806  case Instruction::LShr:
807  case Instruction::GetElementPtr:
808  case Instruction::Trunc:
809  case Instruction::ZExt:
810  case Instruction::SExt:
811  return true;
812  }
813  }
814  return false;
815 }
816 
817 static bool isLoopIncrement(User *U, Instruction *IV) {
819 
820  if ((BO && BO->getOpcode() != Instruction::Add) ||
821  (!BO && !isa<GetElementPtrInst>(U)))
822  return false;
823 
824  for (auto *UU : U->users()) {
825  PHINode *PN = dyn_cast<PHINode>(UU);
826  if (PN && PN == IV)
827  return true;
828  }
829  return false;
830 }
831 
832 bool LoopReroll::DAGRootTracker::
833 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
834  SmallInstructionVector BaseUsers;
835 
836  for (auto *I : Base->users()) {
837  ConstantInt *CI = nullptr;
838 
839  if (isLoopIncrement(I, IV)) {
840  LoopIncs.push_back(cast<Instruction>(I));
841  continue;
842  }
843 
844  // The root nodes must be either GEPs, ORs or ADDs.
845  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
846  if (BO->getOpcode() == Instruction::Add ||
847  BO->getOpcode() == Instruction::Or)
848  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
849  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
850  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
851  CI = dyn_cast<ConstantInt>(LastOperand);
852  }
853 
854  if (!CI) {
855  if (Instruction *II = dyn_cast<Instruction>(I)) {
856  BaseUsers.push_back(II);
857  continue;
858  } else {
859  DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n");
860  return false;
861  }
862  }
863 
864  int64_t V = std::abs(CI->getValue().getSExtValue());
865  if (Roots.find(V) != Roots.end())
866  // No duplicates, please.
867  return false;
868 
869  Roots[V] = cast<Instruction>(I);
870  }
871 
872  // Make sure we have at least two roots.
873  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
874  return false;
875 
876  // If we found non-loop-inc, non-root users of Base, assume they are
877  // for the zeroth root index. This is because "add %a, 0" gets optimized
878  // away.
879  if (BaseUsers.size()) {
880  if (Roots.find(0) != Roots.end()) {
881  DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
882  return false;
883  }
884  Roots[0] = Base;
885  }
886 
887  // Calculate the number of users of the base, or lowest indexed, iteration.
888  unsigned NumBaseUses = BaseUsers.size();
889  if (NumBaseUses == 0)
890  NumBaseUses = Roots.begin()->second->getNumUses();
891 
892  // Check that every node has the same number of users.
893  for (auto &KV : Roots) {
894  if (KV.first == 0)
895  continue;
896  if (!KV.second->hasNUses(NumBaseUses)) {
897  DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
898  << "#Base=" << NumBaseUses << ", #Root=" <<
899  KV.second->getNumUses() << "\n");
900  return false;
901  }
902  }
903 
904  return true;
905 }
906 
907 void LoopReroll::DAGRootTracker::
908 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
909  // Does the user look like it could be part of a root set?
910  // All its users must be simple arithmetic ops.
911  if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
912  return;
913 
914  if (I != IV && findRootsBase(I, SubsumedInsts))
915  return;
916 
917  SubsumedInsts.insert(I);
918 
919  for (User *V : I->users()) {
920  Instruction *I = cast<Instruction>(V);
921  if (is_contained(LoopIncs, I))
922  continue;
923 
924  if (!isSimpleArithmeticOp(I))
925  continue;
926 
927  // The recursive call makes a copy of SubsumedInsts.
928  findRootsRecursive(I, SubsumedInsts);
929  }
930 }
931 
932 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
933  if (DRS.Roots.empty())
934  return false;
935 
936  // Consider a DAGRootSet with N-1 roots (so N different values including
937  // BaseInst).
938  // Define d = Roots[0] - BaseInst, which should be the same as
939  // Roots[I] - Roots[I-1] for all I in [1..N).
940  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
941  // loop iteration J.
942  //
943  // Now, For the loop iterations to be consecutive:
944  // D = d * N
945  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
946  if (!ADR)
947  return false;
948  unsigned N = DRS.Roots.size() + 1;
949  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
950  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
951  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
952  return false;
953 
954  return true;
955 }
956 
957 bool LoopReroll::DAGRootTracker::
958 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
959  // The base of a RootSet must be an AddRec, so it can be erased.
960  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
961  if (!IVU_ADR || IVU_ADR->getLoop() != L)
962  return false;
963 
964  std::map<int64_t, Instruction*> V;
965  if (!collectPossibleRoots(IVU, V))
966  return false;
967 
968  // If we didn't get a root for index zero, then IVU must be
969  // subsumed.
970  if (V.find(0) == V.end())
971  SubsumedInsts.insert(IVU);
972 
973  // Partition the vector into monotonically increasing indexes.
974  DAGRootSet DRS;
975  DRS.BaseInst = nullptr;
976 
977  SmallVector<DAGRootSet, 16> PotentialRootSets;
978 
979  for (auto &KV : V) {
980  if (!DRS.BaseInst) {
981  DRS.BaseInst = KV.second;
982  DRS.SubsumedInsts = SubsumedInsts;
983  } else if (DRS.Roots.empty()) {
984  DRS.Roots.push_back(KV.second);
985  } else if (V.find(KV.first - 1) != V.end()) {
986  DRS.Roots.push_back(KV.second);
987  } else {
988  // Linear sequence terminated.
989  if (!validateRootSet(DRS))
990  return false;
991 
992  // Construct a new DAGRootSet with the next sequence.
993  PotentialRootSets.push_back(DRS);
994  DRS.BaseInst = KV.second;
995  DRS.Roots.clear();
996  }
997  }
998 
999  if (!validateRootSet(DRS))
1000  return false;
1001 
1002  PotentialRootSets.push_back(DRS);
1003 
1004  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
1005 
1006  return true;
1007 }
1008 
1009 bool LoopReroll::DAGRootTracker::findRoots() {
1010  Inc = IVToIncMap[IV];
1011 
1012  assert(RootSets.empty() && "Unclean state!");
1013  if (std::abs(Inc) == 1) {
1014  for (auto *IVU : IV->users()) {
1015  if (isLoopIncrement(IVU, IV))
1016  LoopIncs.push_back(cast<Instruction>(IVU));
1017  }
1018  findRootsRecursive(IV, SmallInstructionSet());
1019  LoopIncs.push_back(IV);
1020  } else {
1021  if (!findRootsBase(IV, SmallInstructionSet()))
1022  return false;
1023  }
1024 
1025  // Ensure all sets have the same size.
1026  if (RootSets.empty()) {
1027  DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
1028  return false;
1029  }
1030  for (auto &V : RootSets) {
1031  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
1032  DEBUG(dbgs()
1033  << "LRR: Aborting because not all root sets have the same size\n");
1034  return false;
1035  }
1036  }
1037 
1038  Scale = RootSets[0].Roots.size() + 1;
1039 
1040  if (Scale > IL_MaxRerollIterations) {
1041  DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1042  << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations
1043  << "\n");
1044  return false;
1045  }
1046 
1047  DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n");
1048 
1049  return true;
1050 }
1051 
1052 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1053  // Populate the MapVector with all instructions in the block, in order first,
1054  // so we can iterate over the contents later in perfect order.
1055  for (auto &I : *L->getHeader()) {
1056  Uses[&I].resize(IL_End);
1057  }
1058 
1059  SmallInstructionSet Exclude;
1060  for (auto &DRS : RootSets) {
1061  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1062  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1063  Exclude.insert(DRS.BaseInst);
1064  }
1065  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1066 
1067  for (auto &DRS : RootSets) {
1068  DenseSet<Instruction*> VBase;
1069  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1070  for (auto *I : VBase) {
1071  Uses[I].set(0);
1072  }
1073 
1074  unsigned Idx = 1;
1075  for (auto *Root : DRS.Roots) {
1077  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1078 
1079  // While we're here, check the use sets are the same size.
1080  if (V.size() != VBase.size()) {
1081  DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1082  return false;
1083  }
1084 
1085  for (auto *I : V) {
1086  Uses[I].set(Idx);
1087  }
1088  ++Idx;
1089  }
1090 
1091  // Make sure our subsumed instructions are remembered too.
1092  for (auto *I : DRS.SubsumedInsts) {
1093  Uses[I].set(IL_All);
1094  }
1095  }
1096 
1097  // Make sure the loop increments are also accounted for.
1098 
1099  Exclude.clear();
1100  for (auto &DRS : RootSets) {
1101  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1102  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1103  Exclude.insert(DRS.BaseInst);
1104  }
1105 
1107  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1108  for (auto *I : V) {
1109  Uses[I].set(IL_All);
1110  }
1111 
1112  return true;
1113 }
1114 
1115 /// Get the next instruction in "In" that is a member of set Val.
1116 /// Start searching from StartI, and do not return anything in Exclude.
1117 /// If StartI is not given, start from In.begin().
1119 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1120  const SmallInstructionSet &Exclude,
1121  UsesTy::iterator *StartI) {
1122  UsesTy::iterator I = StartI ? *StartI : In.begin();
1123  while (I != In.end() && (I->second.test(Val) == 0 ||
1124  Exclude.count(I->first) != 0))
1125  ++I;
1126  return I;
1127 }
1128 
1129 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1130  for (auto &DRS : RootSets) {
1131  if (DRS.BaseInst == I)
1132  return true;
1133  }
1134  return false;
1135 }
1136 
1137 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1138  for (auto &DRS : RootSets) {
1139  if (is_contained(DRS.Roots, I))
1140  return true;
1141  }
1142  return false;
1143 }
1144 
1145 /// Return true if instruction I depends on any instruction between
1146 /// Start and End.
1147 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1148  UsesTy::iterator Start,
1149  UsesTy::iterator End) {
1150  for (auto *U : I->users()) {
1151  for (auto It = Start; It != End; ++It)
1152  if (U == It->first)
1153  return true;
1154  }
1155  return false;
1156 }
1157 
1158 static bool isIgnorableInst(const Instruction *I) {
1159  if (isa<DbgInfoIntrinsic>(I))
1160  return true;
1161  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1162  if (!II)
1163  return false;
1164  switch (II->getIntrinsicID()) {
1165  default:
1166  return false;
1167  case Intrinsic::annotation:
1168  case Intrinsic::ptr_annotation:
1169  case Intrinsic::var_annotation:
1170  // TODO: the following intrinsics may also be whitelisted:
1171  // lifetime_start, lifetime_end, invariant_start, invariant_end
1172  return true;
1173  }
1174  return false;
1175 }
1176 
1177 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1178  // We now need to check for equivalence of the use graph of each root with
1179  // that of the primary induction variable (excluding the roots). Our goal
1180  // here is not to solve the full graph isomorphism problem, but rather to
1181  // catch common cases without a lot of work. As a result, we will assume
1182  // that the relative order of the instructions in each unrolled iteration
1183  // is the same (although we will not make an assumption about how the
1184  // different iterations are intermixed). Note that while the order must be
1185  // the same, the instructions may not be in the same basic block.
1186 
1187  // An array of just the possible reductions for this scale factor. When we
1188  // collect the set of all users of some root instructions, these reduction
1189  // instructions are treated as 'final' (their uses are not considered).
1190  // This is important because we don't want the root use set to search down
1191  // the reduction chain.
1192  SmallInstructionSet PossibleRedSet;
1193  SmallInstructionSet PossibleRedLastSet;
1194  SmallInstructionSet PossibleRedPHISet;
1195  Reductions.restrictToScale(Scale, PossibleRedSet,
1196  PossibleRedPHISet, PossibleRedLastSet);
1197 
1198  // Populate "Uses" with where each instruction is used.
1199  if (!collectUsedInstructions(PossibleRedSet))
1200  return false;
1201 
1202  // Make sure we mark the reduction PHIs as used in all iterations.
1203  for (auto *I : PossibleRedPHISet) {
1204  Uses[I].set(IL_All);
1205  }
1206 
1207  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1208  // comment above LoopReroll::isLoopControlIV for more information.
1209  BasicBlock *Header = L->getHeader();
1210  if (LoopControlIV && LoopControlIV != IV) {
1211  for (auto *U : LoopControlIV->users()) {
1212  Instruction *IVUser = dyn_cast<Instruction>(U);
1213  // IVUser could be loop increment or compare
1214  Uses[IVUser].set(IL_All);
1215  for (auto *UU : IVUser->users()) {
1216  Instruction *UUser = dyn_cast<Instruction>(UU);
1217  // UUser could be compare, PHI or branch
1218  Uses[UUser].set(IL_All);
1219  // Skip SExt
1220  if (isa<SExtInst>(UUser)) {
1221  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1222  Uses[UUser].set(IL_All);
1223  }
1224  // Is UUser a compare instruction?
1225  if (UU->hasOneUse()) {
1226  Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1227  if (BI == cast<BranchInst>(Header->getTerminator()))
1228  Uses[BI].set(IL_All);
1229  }
1230  }
1231  }
1232  }
1233 
1234  // Make sure all instructions in the loop are in one and only one
1235  // set.
1236  for (auto &KV : Uses) {
1237  if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1238  DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1239  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1240  return false;
1241  }
1242  }
1243 
1244  DEBUG(
1245  for (auto &KV : Uses) {
1246  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1247  }
1248  );
1249 
1250  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1251  // In addition to regular aliasing information, we need to look for
1252  // instructions from later (future) iterations that have side effects
1253  // preventing us from reordering them past other instructions with side
1254  // effects.
1255  bool FutureSideEffects = false;
1256  AliasSetTracker AST(*AA);
1257  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1259 
1260  // Compare iteration Iter to the base.
1261  SmallInstructionSet Visited;
1262  auto BaseIt = nextInstr(0, Uses, Visited);
1263  auto RootIt = nextInstr(Iter, Uses, Visited);
1264  auto LastRootIt = Uses.begin();
1265 
1266  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1267  Instruction *BaseInst = BaseIt->first;
1268  Instruction *RootInst = RootIt->first;
1269 
1270  // Skip over the IV or root instructions; only match their users.
1271  bool Continue = false;
1272  if (isBaseInst(BaseInst)) {
1273  Visited.insert(BaseInst);
1274  BaseIt = nextInstr(0, Uses, Visited);
1275  Continue = true;
1276  }
1277  if (isRootInst(RootInst)) {
1278  LastRootIt = RootIt;
1279  Visited.insert(RootInst);
1280  RootIt = nextInstr(Iter, Uses, Visited);
1281  Continue = true;
1282  }
1283  if (Continue) continue;
1284 
1285  if (!BaseInst->isSameOperationAs(RootInst)) {
1286  // Last chance saloon. We don't try and solve the full isomorphism
1287  // problem, but try and at least catch the case where two instructions
1288  // *of different types* are round the wrong way. We won't be able to
1289  // efficiently tell, given two ADD instructions, which way around we
1290  // should match them, but given an ADD and a SUB, we can at least infer
1291  // which one is which.
1292  //
1293  // This should allow us to deal with a greater subset of the isomorphism
1294  // problem. It does however change a linear algorithm into a quadratic
1295  // one, so limit the number of probes we do.
1296  auto TryIt = RootIt;
1297  unsigned N = NumToleratedFailedMatches;
1298  while (TryIt != Uses.end() &&
1299  !BaseInst->isSameOperationAs(TryIt->first) &&
1300  N--) {
1301  ++TryIt;
1302  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1303  }
1304 
1305  if (TryIt == Uses.end() || TryIt == RootIt ||
1306  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1307  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1308  " vs. " << *RootInst << "\n");
1309  return false;
1310  }
1311 
1312  RootIt = TryIt;
1313  RootInst = TryIt->first;
1314  }
1315 
1316  // All instructions between the last root and this root
1317  // may belong to some other iteration. If they belong to a
1318  // future iteration, then they're dangerous to alias with.
1319  //
1320  // Note that because we allow a limited amount of flexibility in the order
1321  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1322  // case we've already checked this set of instructions so we shouldn't
1323  // do anything.
1324  for (; LastRootIt < RootIt; ++LastRootIt) {
1325  Instruction *I = LastRootIt->first;
1326  if (LastRootIt->second.find_first() < (int)Iter)
1327  continue;
1328  if (I->mayWriteToMemory())
1329  AST.add(I);
1330  // Note: This is specifically guarded by a check on isa<PHINode>,
1331  // which while a valid (somewhat arbitrary) micro-optimization, is
1332  // needed because otherwise isSafeToSpeculativelyExecute returns
1333  // false on PHI nodes.
1334  if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1336  // Intervening instructions cause side effects.
1337  FutureSideEffects = true;
1338  }
1339 
1340  // Make sure that this instruction, which is in the use set of this
1341  // root instruction, does not also belong to the base set or the set of
1342  // some other root instruction.
1343  if (RootIt->second.count() > 1) {
1344  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1345  " vs. " << *RootInst << " (prev. case overlap)\n");
1346  return false;
1347  }
1348 
1349  // Make sure that we don't alias with any instruction in the alias set
1350  // tracker. If we do, then we depend on a future iteration, and we
1351  // can't reroll.
1352  if (RootInst->mayReadFromMemory())
1353  for (auto &K : AST) {
1354  if (K.aliasesUnknownInst(RootInst, *AA)) {
1355  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1356  " vs. " << *RootInst << " (depends on future store)\n");
1357  return false;
1358  }
1359  }
1360 
1361  // If we've past an instruction from a future iteration that may have
1362  // side effects, and this instruction might also, then we can't reorder
1363  // them, and this matching fails. As an exception, we allow the alias
1364  // set tracker to handle regular (unordered) load/store dependencies.
1365  if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1366  !isSafeToSpeculativelyExecute(BaseInst)) ||
1367  (!isUnorderedLoadStore(RootInst) &&
1368  !isSafeToSpeculativelyExecute(RootInst)))) {
1369  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1370  " vs. " << *RootInst <<
1371  " (side effects prevent reordering)\n");
1372  return false;
1373  }
1374 
1375  // For instructions that are part of a reduction, if the operation is
1376  // associative, then don't bother matching the operands (because we
1377  // already know that the instructions are isomorphic, and the order
1378  // within the iteration does not matter). For non-associative reductions,
1379  // we do need to match the operands, because we need to reject
1380  // out-of-order instructions within an iteration!
1381  // For example (assume floating-point addition), we need to reject this:
1382  // x += a[i]; x += b[i];
1383  // x += a[i+1]; x += b[i+1];
1384  // x += b[i+2]; x += a[i+2];
1385  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1386 
1387  if (!(InReduction && BaseInst->isAssociative())) {
1388  bool Swapped = false, SomeOpMatched = false;
1389  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1390  Value *Op2 = RootInst->getOperand(j);
1391 
1392  // If this is part of a reduction (and the operation is not
1393  // associatve), then we match all operands, but not those that are
1394  // part of the reduction.
1395  if (InReduction)
1396  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1397  if (Reductions.isPairInSame(RootInst, Op2I))
1398  continue;
1399 
1400  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1401  if (BMI != BaseMap.end()) {
1402  Op2 = BMI->second;
1403  } else {
1404  for (auto &DRS : RootSets) {
1405  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1406  Op2 = DRS.BaseInst;
1407  break;
1408  }
1409  }
1410  }
1411 
1412  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1413  // If we've not already decided to swap the matched operands, and
1414  // we've not already matched our first operand (note that we could
1415  // have skipped matching the first operand because it is part of a
1416  // reduction above), and the instruction is commutative, then try
1417  // the swapped match.
1418  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1419  BaseInst->getOperand(!j) == Op2) {
1420  Swapped = true;
1421  } else {
1422  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1423  << " vs. " << *RootInst << " (operand " << j << ")\n");
1424  return false;
1425  }
1426  }
1427 
1428  SomeOpMatched = true;
1429  }
1430  }
1431 
1432  if ((!PossibleRedLastSet.count(BaseInst) &&
1433  hasUsesOutsideLoop(BaseInst, L)) ||
1434  (!PossibleRedLastSet.count(RootInst) &&
1435  hasUsesOutsideLoop(RootInst, L))) {
1436  DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1437  " vs. " << *RootInst << " (uses outside loop)\n");
1438  return false;
1439  }
1440 
1441  Reductions.recordPair(BaseInst, RootInst, Iter);
1442  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1443 
1444  LastRootIt = RootIt;
1445  Visited.insert(BaseInst);
1446  Visited.insert(RootInst);
1447  BaseIt = nextInstr(0, Uses, Visited);
1448  RootIt = nextInstr(Iter, Uses, Visited);
1449  }
1450  assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1451  "Mismatched set sizes!");
1452  }
1453 
1454  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
1455  *IV << "\n");
1456 
1457  return true;
1458 }
1459 
1460 void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
1461  BasicBlock *Header = L->getHeader();
1462  // Remove instructions associated with non-base iterations.
1463  for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
1464  J != JE;) {
1465  unsigned I = Uses[&*J].find_first();
1466  if (I > 0 && I < IL_All) {
1467  DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
1468  J++->eraseFromParent();
1469  continue;
1470  }
1471 
1472  ++J;
1473  }
1474 
1475  bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
1476 
1477  if (HasTwoIVs) {
1478  updateNonLoopCtrlIncr();
1479  replaceIV(LoopControlIV, LoopControlIV, IterCount);
1480  } else
1481  // We need to create a new induction variable for each different BaseInst.
1482  for (auto &DRS : RootSets)
1483  // Insert the new induction variable.
1484  replaceIV(DRS.BaseInst, IV, IterCount);
1485 
1486  SimplifyInstructionsInBlock(Header, TLI);
1487  DeleteDeadPHIs(Header, TLI);
1488 }
1489 
1490 // For non-loop-control IVs, we only need to update the last increment
1491 // with right amount, then we are done.
1492 void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
1493  const SCEV *NewInc = nullptr;
1494  for (auto *LoopInc : LoopIncs) {
1496  const SCEVConstant *COp = nullptr;
1497  if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
1498  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1499  } else {
1500  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
1501  if (!COp)
1502  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1503  }
1504 
1505  assert(COp && "Didn't find constant operand of LoopInc!\n");
1506 
1507  const APInt &AInt = COp->getValue()->getValue();
1508  const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
1509  if (AInt.isNegative()) {
1510  NewInc = SE->getNegativeSCEV(COp);
1511  NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
1512  NewInc = SE->getNegativeSCEV(NewInc);
1513  } else
1514  NewInc = SE->getUDivExpr(COp, ScaleSCEV);
1515 
1516  LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
1517  }
1518 }
1519 
1520 void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
1521  Instruction *InstIV,
1522  const SCEV *IterCount) {
1523  BasicBlock *Header = L->getHeader();
1524  int64_t Inc = IVToIncMap[InstIV];
1525  bool NeedNewIV = InstIV == LoopControlIV;
1526  bool Negative = !NeedNewIV && Inc < 0;
1527 
1528  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
1529  const SCEV *Start = RealIVSCEV->getStart();
1530 
1531  if (NeedNewIV)
1532  Start = SE->getConstant(Start->getType(), 0);
1533 
1534  const SCEV *SizeOfExpr = nullptr;
1535  const SCEV *IncrExpr =
1536  SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
1537  if (auto *PTy = dyn_cast<PointerType>(Inst->getType())) {
1538  Type *ElTy = PTy->getElementType();
1539  SizeOfExpr =
1540  SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy);
1541  IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr);
1542  }
1543  const SCEV *NewIVSCEV =
1544  SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1545 
1546  { // Limit the lifetime of SCEVExpander.
1547  const DataLayout &DL = Header->getModule()->getDataLayout();
1548  SCEVExpander Expander(*SE, DL, "reroll");
1549  Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1550  Header->getFirstNonPHIOrDbg());
1551 
1552  for (auto &KV : Uses)
1553  if (KV.second.find_first() == 0)
1554  KV.first->replaceUsesOfWith(Inst, NewIV);
1555 
1556  if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
1557  // FIXME: Why do we need this check?
1558  if (Uses[BI].find_first() == IL_All) {
1559  const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1560 
1561  if (NeedNewIV)
1562  ICSCEV = SE->getMulExpr(IterCount,
1563  SE->getConstant(IterCount->getType(), Scale));
1564 
1565  // Iteration count SCEV minus or plus 1
1566  const SCEV *MinusPlus1SCEV =
1567  SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
1568  if (Inst->getType()->isPointerTy()) {
1569  assert(SizeOfExpr && "SizeOfExpr is not initialized");
1570  MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr);
1571  }
1572 
1573  const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
1574  // Iteration count minus 1
1575  Instruction *InsertPtr = nullptr;
1576  if (isa<SCEVConstant>(ICMinusPlus1SCEV)) {
1577  InsertPtr = BI;
1578  } else {
1579  BasicBlock *Preheader = L->getLoopPreheader();
1580  if (!Preheader)
1581  Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
1582  InsertPtr = Preheader->getTerminator();
1583  }
1584 
1585  if (!isa<PointerType>(NewIV->getType()) && NeedNewIV &&
1586  (SE->getTypeSizeInBits(NewIV->getType()) <
1587  SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) {
1588  IRBuilder<> Builder(BI);
1589  Builder.SetCurrentDebugLocation(BI->getDebugLoc());
1590  NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType());
1591  }
1592  Value *ICMinusPlus1 = Expander.expandCodeFor(
1593  ICMinusPlus1SCEV, NewIV->getType(), InsertPtr);
1594 
1595  Value *Cond =
1596  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
1597  BI->setCondition(Cond);
1598 
1599  if (BI->getSuccessor(1) != Header)
1600  BI->swapSuccessors();
1601  }
1602  }
1603  }
1604 }
1605 
1606 // Validate the selected reductions. All iterations must have an isomorphic
1607 // part of the reduction chain and, for non-associative reductions, the chain
1608 // entries must appear in order.
1609 bool LoopReroll::ReductionTracker::validateSelected() {
1610  // For a non-associative reduction, the chain entries must appear in order.
1611  for (int i : Reds) {
1612  int PrevIter = 0, BaseCount = 0, Count = 0;
1613  for (Instruction *J : PossibleReds[i]) {
1614  // Note that all instructions in the chain must have been found because
1615  // all instructions in the function must have been assigned to some
1616  // iteration.
1617  int Iter = PossibleRedIter[J];
1618  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1619  !PossibleReds[i].getReducedValue()->isAssociative()) {
1620  DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
1621  J << "\n");
1622  return false;
1623  }
1624 
1625  if (Iter != PrevIter) {
1626  if (Count != BaseCount) {
1627  DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
1628  " reduction use count " << Count <<
1629  " is not equal to the base use count " <<
1630  BaseCount << "\n");
1631  return false;
1632  }
1633 
1634  Count = 0;
1635  }
1636 
1637  ++Count;
1638  if (Iter == 0)
1639  ++BaseCount;
1640 
1641  PrevIter = Iter;
1642  }
1643  }
1644 
1645  return true;
1646 }
1647 
1648 // For all selected reductions, remove all parts except those in the first
1649 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1650 // of the first-iteration reduced value (in other words, reroll the selected
1651 // reductions).
1652 void LoopReroll::ReductionTracker::replaceSelected() {
1653  // Fixup reductions to refer to the last instruction associated with the
1654  // first iteration (not the last).
1655  for (int i : Reds) {
1656  int j = 0;
1657  for (int e = PossibleReds[i].size(); j != e; ++j)
1658  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1659  --j;
1660  break;
1661  }
1662 
1663  // Replace users with the new end-of-chain value.
1664  SmallInstructionVector Users;
1665  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1666  Users.push_back(cast<Instruction>(U));
1667  }
1668 
1669  for (Instruction *User : Users)
1670  User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1671  PossibleReds[i][j]);
1672  }
1673 }
1674 
1675 // Reroll the provided loop with respect to the provided induction variable.
1676 // Generally, we're looking for a loop like this:
1677 //
1678 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1679 // f(%iv)
1680 // %iv.1 = add %iv, 1 <-- a root increment
1681 // f(%iv.1)
1682 // %iv.2 = add %iv, 2 <-- a root increment
1683 // f(%iv.2)
1684 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1685 // f(%iv.scale_m_1)
1686 // ...
1687 // %iv.next = add %iv, scale
1688 // %cmp = icmp(%iv, ...)
1689 // br %cmp, header, exit
1690 //
1691 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1692 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1693 // be intermixed with eachother. The restriction imposed by this algorithm is
1694 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1695 // etc. be the same.
1696 //
1697 // First, we collect the use set of %iv, excluding the other increment roots.
1698 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1699 // times, having collected the use set of f(%iv.(i+1)), during which we:
1700 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1701 // the next unmatched instruction in f(%iv.(i+1)).
1702 // - Ensure that both matched instructions don't have any external users
1703 // (with the exception of last-in-chain reduction instructions).
1704 // - Track the (aliasing) write set, and other side effects, of all
1705 // instructions that belong to future iterations that come before the matched
1706 // instructions. If the matched instructions read from that write set, then
1707 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1708 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1709 // if any of these future instructions had side effects (could not be
1710 // speculatively executed), and so do the matched instructions, when we
1711 // cannot reorder those side-effect-producing instructions, and rerolling
1712 // fails.
1713 //
1714 // Finally, we make sure that all loop instructions are either loop increment
1715 // roots, belong to simple latch code, parts of validated reductions, part of
1716 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1717 // have been validated), then we reroll the loop.
1718 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1719  const SCEV *IterCount,
1720  ReductionTracker &Reductions) {
1721  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1722  IVToIncMap, LoopControlIV);
1723 
1724  if (!DAGRoots.findRoots())
1725  return false;
1726  DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
1727  *IV << "\n");
1728 
1729  if (!DAGRoots.validate(Reductions))
1730  return false;
1731  if (!Reductions.validateSelected())
1732  return false;
1733  // At this point, we've validated the rerolling, and we're committed to
1734  // making changes!
1735 
1736  Reductions.replaceSelected();
1737  DAGRoots.replace(IterCount);
1738 
1739  ++NumRerolledLoops;
1740  return true;
1741 }
1742 
1743 bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
1744  if (skipLoop(L))
1745  return false;
1746 
1747  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1748  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1749  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1750  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1751  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1752  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1753 
1754  BasicBlock *Header = L->getHeader();
1755  DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
1756  "] Loop %" << Header->getName() << " (" <<
1757  L->getNumBlocks() << " block(s))\n");
1758 
1759  // For now, we'll handle only single BB loops.
1760  if (L->getNumBlocks() > 1)
1761  return false;
1762 
1763  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1764  return false;
1765 
1766  const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
1767  const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
1768  DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1769  DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
1770 
1771  // First, we need to find the induction variable with respect to which we can
1772  // reroll (there may be several possible options).
1773  SmallInstructionVector PossibleIVs;
1774  IVToIncMap.clear();
1775  LoopControlIV = nullptr;
1776  collectPossibleIVs(L, PossibleIVs);
1777 
1778  if (PossibleIVs.empty()) {
1779  DEBUG(dbgs() << "LRR: No possible IVs found\n");
1780  return false;
1781  }
1782 
1783  ReductionTracker Reductions;
1784  collectPossibleReductions(L, Reductions);
1785  bool Changed = false;
1786 
1787  // For each possible IV, collect the associated possible set of 'root' nodes
1788  // (i+1, i+2, etc.).
1789  for (Instruction *PossibleIV : PossibleIVs)
1790  if (reroll(PossibleIV, L, Header, IterCount, Reductions)) {
1791  Changed = true;
1792  break;
1793  }
1794  DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1795 
1796  // Trip count of L has changed so SE must be re-evaluated.
1797  if (Changed)
1798  SE->forgetLoop(L);
1799 
1800  return Changed;
1801 }
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1779
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
void push_back(const T &Elt)
Definition: SmallVector.h:212
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:244
Pass * createLoopRerollPass()
iterator_range< use_iterator > uses()
Definition: Value.h:356
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:235
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:523
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
bool user_empty() const
Definition: Value.h:365
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" " during fuzzy matching. (default: 400)"))
The main scalar evolution driver.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1390
STATISTIC(NumFunctions, "Total number of functions")
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:535
void initializeLoopRerollPass(PassRegistry &)
reverse_iterator rend()
Definition: BasicBlock.h:259
An instruction for reading from memory.
Definition: Instructions.h:164
reverse_iterator rbegin()
Definition: BasicBlock.h:257
Hexagon Common GEP
iv Induction Variable Users
Definition: IVUsers.cpp:51
This defines the Use class.
op_iterator op_begin()
Definition: User.h:214
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
static bool isIgnorableInst(const Instruction *I)
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static cl::opt< unsigned > MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, cl::desc("The maximum increment for loop rerolling"))
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
static bool isUnorderedLoadStore(Instruction *I)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
This node represents multiplication of some number of SCEVs.
static const SCEVConstant * getIncrmentFactorSCEV(ScalarEvolution *SE, const SCEV *SCEVExpr, Instruction &IV)
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1554
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This node represents a polynomial recurrence on the trip count of the specified loop.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
An instruction for storing to memory.
Definition: Instructions.h:306
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:152
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:22
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
#define P(N)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:244
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N users or more.
Definition: Value.cpp:136
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
char & LCSSAID
Definition: LCSSA.cpp:412
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:216
This instruction compares its operands according to the predicate given to the constructor.
bool isBinaryOp() const
Definition: Instruction.h:129
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
static const unsigned End
static bool isLoopIncrement(User *U, Instruction *IV)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
IterationLimits
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Type * getType() const
Return the LLVM type of this SCEV expression.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
typename VectorType::iterator iterator
Definition: MapVector.h:46
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1272
void add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo)
These methods are used to add different types of instructions to the alias sets.
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:451
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:401
This class uses information about analyze scalars to rewrite expressions in canonical form...
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:166
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
iv users
Definition: IVUsers.cpp:51
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1023
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:377
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
#define DEBUG(X)
Definition: Debug.h:118
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:414
static Expected< std::string > replace(StringRef S, StringRef From, StringRef To)
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:178
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
static bool isAssociative(const COFFSection &Section)
bool use_empty() const
Definition: Value.h:328
Type * getElementType() const
Definition: DerivedTypes.h:486
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:867