LLVM  7.0.0svn
LoopRerollPass.cpp
Go to the documentation of this file.
1 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop reroller.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/IRBuilder.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Use.h"
47 #include "llvm/IR/User.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
55 #include "llvm/Transforms/Utils.h"
58 #include <cassert>
59 #include <cstddef>
60 #include <cstdint>
61 #include <cstdlib>
62 #include <iterator>
63 #include <map>
64 #include <utility>
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "loop-reroll"
69 
70 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
71 
72 static cl::opt<unsigned>
73 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
74  cl::desc("The maximum increment for loop rerolling"));
75 
76 static cl::opt<unsigned>
77 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
78  cl::Hidden,
79  cl::desc("The maximum number of failures to tolerate"
80  " during fuzzy matching. (default: 400)"));
81 
82 // This loop re-rolling transformation aims to transform loops like this:
83 //
84 // int foo(int a);
85 // void bar(int *x) {
86 // for (int i = 0; i < 500; i += 3) {
87 // foo(i);
88 // foo(i+1);
89 // foo(i+2);
90 // }
91 // }
92 //
93 // into a loop like this:
94 //
95 // void bar(int *x) {
96 // for (int i = 0; i < 500; ++i)
97 // foo(i);
98 // }
99 //
100 // It does this by looking for loops that, besides the latch code, are composed
101 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
102 // to the induction variable, and where each DAG is isomorphic to the DAG
103 // rooted at the induction variable (excepting the sub-DAGs which root the
104 // other induction-variable increments). In other words, we're looking for loop
105 // bodies of the form:
106 //
107 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
108 // f(%iv)
109 // %iv.1 = add %iv, 1 <-- a root increment
110 // f(%iv.1)
111 // %iv.2 = add %iv, 2 <-- a root increment
112 // f(%iv.2)
113 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
114 // f(%iv.scale_m_1)
115 // ...
116 // %iv.next = add %iv, scale
117 // %cmp = icmp(%iv, ...)
118 // br %cmp, header, exit
119 //
120 // where each f(i) is a set of instructions that, collectively, are a function
121 // only of i (and other loop-invariant values).
122 //
123 // As a special case, we can also reroll loops like this:
124 //
125 // int foo(int);
126 // void bar(int *x) {
127 // for (int i = 0; i < 500; ++i) {
128 // x[3*i] = foo(0);
129 // x[3*i+1] = foo(0);
130 // x[3*i+2] = foo(0);
131 // }
132 // }
133 //
134 // into this:
135 //
136 // void bar(int *x) {
137 // for (int i = 0; i < 1500; ++i)
138 // x[i] = foo(0);
139 // }
140 //
141 // in which case, we're looking for inputs like this:
142 //
143 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
144 // %scaled.iv = mul %iv, scale
145 // f(%scaled.iv)
146 // %scaled.iv.1 = add %scaled.iv, 1
147 // f(%scaled.iv.1)
148 // %scaled.iv.2 = add %scaled.iv, 2
149 // f(%scaled.iv.2)
150 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
151 // f(%scaled.iv.scale_m_1)
152 // ...
153 // %iv.next = add %iv, 1
154 // %cmp = icmp(%iv, ...)
155 // br %cmp, header, exit
156 
157 namespace {
158 
160  /// The maximum number of iterations that we'll try and reroll.
161  IL_MaxRerollIterations = 32,
162  /// The bitvector index used by loop induction variables and other
163  /// instructions that belong to all iterations.
164  IL_All,
165  IL_End
166  };
167 
168  class LoopReroll : public LoopPass {
169  public:
170  static char ID; // Pass ID, replacement for typeid
171 
172  LoopReroll() : LoopPass(ID) {
174  }
175 
176  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
177 
178  void getAnalysisUsage(AnalysisUsage &AU) const override {
181  }
182 
183  protected:
184  AliasAnalysis *AA;
185  LoopInfo *LI;
186  ScalarEvolution *SE;
187  TargetLibraryInfo *TLI;
188  DominatorTree *DT;
189  bool PreserveLCSSA;
190 
191  using SmallInstructionVector = SmallVector<Instruction *, 16>;
192  using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
193 
194  // Map between induction variable and its increment
196 
197  // For loop with multiple induction variable, remember the one used only to
198  // control the loop.
199  Instruction *LoopControlIV;
200 
201  // A chain of isomorphic instructions, identified by a single-use PHI
202  // representing a reduction. Only the last value may be used outside the
203  // loop.
204  struct SimpleLoopReduction {
205  SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
206  assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
207  add(L);
208  }
209 
210  bool valid() const {
211  return Valid;
212  }
213 
214  Instruction *getPHI() const {
215  assert(Valid && "Using invalid reduction");
216  return Instructions.front();
217  }
218 
219  Instruction *getReducedValue() const {
220  assert(Valid && "Using invalid reduction");
221  return Instructions.back();
222  }
223 
224  Instruction *get(size_t i) const {
225  assert(Valid && "Using invalid reduction");
226  return Instructions[i+1];
227  }
228 
229  Instruction *operator [] (size_t i) const { return get(i); }
230 
231  // The size, ignoring the initial PHI.
232  size_t size() const {
233  assert(Valid && "Using invalid reduction");
234  return Instructions.size()-1;
235  }
236 
237  using iterator = SmallInstructionVector::iterator;
238  using const_iterator = SmallInstructionVector::const_iterator;
239 
240  iterator begin() {
241  assert(Valid && "Using invalid reduction");
242  return std::next(Instructions.begin());
243  }
244 
245  const_iterator begin() const {
246  assert(Valid && "Using invalid reduction");
247  return std::next(Instructions.begin());
248  }
249 
250  iterator end() { return Instructions.end(); }
251  const_iterator end() const { return Instructions.end(); }
252 
253  protected:
254  bool Valid = false;
255  SmallInstructionVector Instructions;
256 
257  void add(Loop *L);
258  };
259 
260  // The set of all reductions, and state tracking of possible reductions
261  // during loop instruction processing.
262  struct ReductionTracker {
263  using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
264 
265  // Add a new possible reduction.
266  void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
267 
268  // Setup to track possible reductions corresponding to the provided
269  // rerolling scale. Only reductions with a number of non-PHI instructions
270  // that is divisible by the scale are considered. Three instructions sets
271  // are filled in:
272  // - A set of all possible instructions in eligible reductions.
273  // - A set of all PHIs in eligible reductions
274  // - A set of all reduced values (last instructions) in eligible
275  // reductions.
276  void restrictToScale(uint64_t Scale,
277  SmallInstructionSet &PossibleRedSet,
278  SmallInstructionSet &PossibleRedPHISet,
279  SmallInstructionSet &PossibleRedLastSet) {
280  PossibleRedIdx.clear();
281  PossibleRedIter.clear();
282  Reds.clear();
283 
284  for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
285  if (PossibleReds[i].size() % Scale == 0) {
286  PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
287  PossibleRedPHISet.insert(PossibleReds[i].getPHI());
288 
289  PossibleRedSet.insert(PossibleReds[i].getPHI());
290  PossibleRedIdx[PossibleReds[i].getPHI()] = i;
291  for (Instruction *J : PossibleReds[i]) {
292  PossibleRedSet.insert(J);
293  PossibleRedIdx[J] = i;
294  }
295  }
296  }
297 
298  // The functions below are used while processing the loop instructions.
299 
300  // Are the two instructions both from reductions, and furthermore, from
301  // the same reduction?
302  bool isPairInSame(Instruction *J1, Instruction *J2) {
303  DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
304  if (J1I != PossibleRedIdx.end()) {
305  DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
306  if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
307  return true;
308  }
309 
310  return false;
311  }
312 
313  // The two provided instructions, the first from the base iteration, and
314  // the second from iteration i, form a matched pair. If these are part of
315  // a reduction, record that fact.
316  void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
317  if (PossibleRedIdx.count(J1)) {
318  assert(PossibleRedIdx.count(J2) &&
319  "Recording reduction vs. non-reduction instruction?");
320 
321  PossibleRedIter[J1] = 0;
322  PossibleRedIter[J2] = i;
323 
324  int Idx = PossibleRedIdx[J1];
325  assert(Idx == PossibleRedIdx[J2] &&
326  "Recording pair from different reductions?");
327  Reds.insert(Idx);
328  }
329  }
330 
331  // The functions below can be called after we've finished processing all
332  // instructions in the loop, and we know which reductions were selected.
333 
334  bool validateSelected();
335  void replaceSelected();
336 
337  protected:
338  // The vector of all possible reductions (for any scale).
339  SmallReductionVector PossibleReds;
340 
341  DenseMap<Instruction *, int> PossibleRedIdx;
342  DenseMap<Instruction *, int> PossibleRedIter;
343  DenseSet<int> Reds;
344  };
345 
346  // A DAGRootSet models an induction variable being used in a rerollable
347  // loop. For example,
348  //
349  // x[i*3+0] = y1
350  // x[i*3+1] = y2
351  // x[i*3+2] = y3
352  //
353  // Base instruction -> i*3
354  // +---+----+
355  // / | \
356  // ST[y1] +1 +2 <-- Roots
357  // | |
358  // ST[y2] ST[y3]
359  //
360  // There may be multiple DAGRoots, for example:
361  //
362  // x[i*2+0] = ... (1)
363  // x[i*2+1] = ... (1)
364  // x[i*2+4] = ... (2)
365  // x[i*2+5] = ... (2)
366  // x[(i+1234)*2+5678] = ... (3)
367  // x[(i+1234)*2+5679] = ... (3)
368  //
369  // The loop will be rerolled by adding a new loop induction variable,
370  // one for the Base instruction in each DAGRootSet.
371  //
372  struct DAGRootSet {
373  Instruction *BaseInst;
374  SmallInstructionVector Roots;
375 
376  // The instructions between IV and BaseInst (but not including BaseInst).
377  SmallInstructionSet SubsumedInsts;
378  };
379 
380  // The set of all DAG roots, and state tracking of all roots
381  // for a particular induction variable.
382  struct DAGRootTracker {
383  DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
386  bool PreserveLCSSA,
388  Instruction *LoopCtrlIV)
389  : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
390  PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
391  LoopControlIV(LoopCtrlIV) {}
392 
393  /// Stage 1: Find all the DAG roots for the induction variable.
394  bool findRoots();
395 
396  /// Stage 2: Validate if the found roots are valid.
397  bool validate(ReductionTracker &Reductions);
398 
399  /// Stage 3: Assuming validate() returned true, perform the
400  /// replacement.
401  /// @param IterCount The maximum iteration count of L.
402  void replace(const SCEV *IterCount);
403 
404  protected:
406 
407  void findRootsRecursive(Instruction *IVU,
408  SmallInstructionSet SubsumedInsts);
409  bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
410  bool collectPossibleRoots(Instruction *Base,
411  std::map<int64_t,Instruction*> &Roots);
412  bool validateRootSet(DAGRootSet &DRS);
413 
414  bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
415  void collectInLoopUserSet(const SmallInstructionVector &Roots,
416  const SmallInstructionSet &Exclude,
417  const SmallInstructionSet &Final,
419  void collectInLoopUserSet(Instruction *Root,
420  const SmallInstructionSet &Exclude,
421  const SmallInstructionSet &Final,
422  DenseSet<Instruction *> &Users);
423 
424  UsesTy::iterator nextInstr(int Val, UsesTy &In,
425  const SmallInstructionSet &Exclude,
426  UsesTy::iterator *StartI=nullptr);
427  bool isBaseInst(Instruction *I);
428  bool isRootInst(Instruction *I);
429  bool instrDependsOn(Instruction *I,
430  UsesTy::iterator Start,
431  UsesTy::iterator End);
432  void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
433  void updateNonLoopCtrlIncr();
434 
435  LoopReroll *Parent;
436 
437  // Members of Parent, replicated here for brevity.
438  Loop *L;
439  ScalarEvolution *SE;
440  AliasAnalysis *AA;
441  TargetLibraryInfo *TLI;
442  DominatorTree *DT;
443  LoopInfo *LI;
444  bool PreserveLCSSA;
445 
446  // The loop induction variable.
447  Instruction *IV;
448 
449  // Loop step amount.
450  int64_t Inc;
451 
452  // Loop reroll count; if Inc == 1, this records the scaling applied
453  // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
454  // If Inc is not 1, Scale = Inc.
455  uint64_t Scale;
456 
457  // The roots themselves.
459 
460  // All increment instructions for IV.
461  SmallInstructionVector LoopIncs;
462 
463  // Map of all instructions in the loop (in order) to the iterations
464  // they are used in (or specially, IL_All for instructions
465  // used in the loop increment mechanism).
466  UsesTy Uses;
467 
468  // Map between induction variable and its increment
470 
471  Instruction *LoopControlIV;
472  };
473 
474  // Check if it is a compare-like instruction whose user is a branch
475  bool isCompareUsedByBranch(Instruction *I) {
476  auto *TI = I->getParent()->getTerminator();
477  if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
478  return false;
479  return I->hasOneUse() && TI->getOperand(0) == I;
480  };
481 
482  bool isLoopControlIV(Loop *L, Instruction *IV);
483  void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
484  void collectPossibleReductions(Loop *L,
485  ReductionTracker &Reductions);
486  bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
487  ReductionTracker &Reductions);
488  };
489 
490 } // end anonymous namespace
491 
492 char LoopReroll::ID = 0;
493 
494 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
497 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
498 
500  return new LoopReroll;
501 }
502 
503 // Returns true if the provided instruction is used outside the given loop.
504 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
505 // non-loop blocks to be outside the loop.
507  for (User *U : I->users()) {
508  if (!L->contains(cast<Instruction>(U)))
509  return true;
510  }
511  return false;
512 }
513 
515  const SCEV *SCEVExpr,
516  Instruction &IV) {
517  const SCEVMulExpr *MulSCEV = dyn_cast<SCEVMulExpr>(SCEVExpr);
518 
519  // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)),
520  // Return c1.
521  if (!MulSCEV && IV.getType()->isPointerTy())
522  if (const SCEVConstant *IncSCEV = dyn_cast<SCEVConstant>(SCEVExpr)) {
523  const PointerType *PTy = cast<PointerType>(IV.getType());
524  Type *ElTy = PTy->getElementType();
525  const SCEV *SizeOfExpr =
526  SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy);
527  if (IncSCEV->getValue()->getValue().isNegative()) {
528  const SCEV *NewSCEV =
529  SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr);
530  return dyn_cast<SCEVConstant>(SE->getNegativeSCEV(NewSCEV));
531  } else {
532  return dyn_cast<SCEVConstant>(SE->getUDivExpr(SCEVExpr, SizeOfExpr));
533  }
534  }
535 
536  if (!MulSCEV)
537  return nullptr;
538 
539  // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant,
540  // Return c.
541  const SCEVConstant *CIncSCEV = nullptr;
542  for (const SCEV *Operand : MulSCEV->operands()) {
543  if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Operand)) {
544  CIncSCEV = Constant;
545  } else if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Operand)) {
546  Type *AllocTy;
547  if (!Unknown->isSizeOf(AllocTy))
548  break;
549  } else {
550  return nullptr;
551  }
552  }
553  return CIncSCEV;
554 }
555 
556 // Check if an IV is only used to control the loop. There are two cases:
557 // 1. It only has one use which is loop increment, and the increment is only
558 // used by comparison and the PHI (could has sext with nsw in between), and the
559 // comparison is only used by branch.
560 // 2. It is used by loop increment and the comparison, the loop increment is
561 // only used by the PHI, and the comparison is used only by the branch.
562 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
563  unsigned IVUses = IV->getNumUses();
564  if (IVUses != 2 && IVUses != 1)
565  return false;
566 
567  for (auto *User : IV->users()) {
568  int32_t IncOrCmpUses = User->getNumUses();
569  bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
570 
571  // User can only have one or two uses.
572  if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
573  return false;
574 
575  // Case 1
576  if (IVUses == 1) {
577  // The only user must be the loop increment.
578  // The loop increment must have two uses.
579  if (IsCompInst || IncOrCmpUses != 2)
580  return false;
581  }
582 
583  // Case 2
584  if (IVUses == 2 && IncOrCmpUses != 1)
585  return false;
586 
587  // The users of the IV must be a binary operation or a comparison
588  if (auto *BO = dyn_cast<BinaryOperator>(User)) {
589  if (BO->getOpcode() == Instruction::Add) {
590  // Loop Increment
591  // User of Loop Increment should be either PHI or CMP
592  for (auto *UU : User->users()) {
593  if (PHINode *PN = dyn_cast<PHINode>(UU)) {
594  if (PN != IV)
595  return false;
596  }
597  // Must be a CMP or an ext (of a value with nsw) then CMP
598  else {
599  Instruction *UUser = dyn_cast<Instruction>(UU);
600  // Skip SExt if we are extending an nsw value
601  // TODO: Allow ZExt too
602  if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
603  isa<SExtInst>(UUser))
604  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
605  if (!isCompareUsedByBranch(UUser))
606  return false;
607  }
608  }
609  } else
610  return false;
611  // Compare : can only have one use, and must be branch
612  } else if (!IsCompInst)
613  return false;
614  }
615  return true;
616 }
617 
618 // Collect the list of loop induction variables with respect to which it might
619 // be possible to reroll the loop.
620 void LoopReroll::collectPossibleIVs(Loop *L,
621  SmallInstructionVector &PossibleIVs) {
622  BasicBlock *Header = L->getHeader();
623  for (BasicBlock::iterator I = Header->begin(),
624  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
625  if (!isa<PHINode>(I))
626  continue;
627  if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
628  continue;
629 
630  if (const SCEVAddRecExpr *PHISCEV =
631  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
632  if (PHISCEV->getLoop() != L)
633  continue;
634  if (!PHISCEV->isAffine())
635  continue;
636  const SCEVConstant *IncSCEV = nullptr;
637  if (I->getType()->isPointerTy())
638  IncSCEV =
639  getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I);
640  else
641  IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
642  if (IncSCEV) {
643  const APInt &AInt = IncSCEV->getValue()->getValue().abs();
644  if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
645  continue;
646  IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
647  LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
648  << "\n");
649 
650  if (isLoopControlIV(L, &*I)) {
651  assert(!LoopControlIV && "Found two loop control only IV");
652  LoopControlIV = &(*I);
653  LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I
654  << " = " << *PHISCEV << "\n");
655  } else
656  PossibleIVs.push_back(&*I);
657  }
658  }
659  }
660 }
661 
662 // Add the remainder of the reduction-variable chain to the instruction vector
663 // (the initial PHINode has already been added). If successful, the object is
664 // marked as valid.
666  assert(!Valid && "Cannot add to an already-valid chain");
667 
668  // The reduction variable must be a chain of single-use instructions
669  // (including the PHI), except for the last value (which is used by the PHI
670  // and also outside the loop).
671  Instruction *C = Instructions.front();
672  if (C->user_empty())
673  return;
674 
675  do {
676  C = cast<Instruction>(*C->user_begin());
677  if (C->hasOneUse()) {
678  if (!C->isBinaryOp())
679  return;
680 
681  if (!(isa<PHINode>(Instructions.back()) ||
682  C->isSameOperationAs(Instructions.back())))
683  return;
684 
685  Instructions.push_back(C);
686  }
687  } while (C->hasOneUse());
688 
689  if (Instructions.size() < 2 ||
690  !C->isSameOperationAs(Instructions.back()) ||
691  C->use_empty())
692  return;
693 
694  // C is now the (potential) last instruction in the reduction chain.
695  for (User *U : C->users()) {
696  // The only in-loop user can be the initial PHI.
697  if (L->contains(cast<Instruction>(U)))
698  if (cast<Instruction>(U) != Instructions.front())
699  return;
700  }
701 
702  Instructions.push_back(C);
703  Valid = true;
704 }
705 
706 // Collect the vector of possible reduction variables.
707 void LoopReroll::collectPossibleReductions(Loop *L,
708  ReductionTracker &Reductions) {
709  BasicBlock *Header = L->getHeader();
710  for (BasicBlock::iterator I = Header->begin(),
711  IE = Header->getFirstInsertionPt(); I != IE; ++I) {
712  if (!isa<PHINode>(I))
713  continue;
714  if (!I->getType()->isSingleValueType())
715  continue;
716 
717  SimpleLoopReduction SLR(&*I, L);
718  if (!SLR.valid())
719  continue;
720 
721  LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with "
722  << SLR.size() << " chained instructions)\n");
723  Reductions.addSLR(SLR);
724  }
725 }
726 
727 // Collect the set of all users of the provided root instruction. This set of
728 // users contains not only the direct users of the root instruction, but also
729 // all users of those users, and so on. There are two exceptions:
730 //
731 // 1. Instructions in the set of excluded instructions are never added to the
732 // use set (even if they are users). This is used, for example, to exclude
733 // including root increments in the use set of the primary IV.
734 //
735 // 2. Instructions in the set of final instructions are added to the use set
736 // if they are users, but their users are not added. This is used, for
737 // example, to prevent a reduction update from forcing all later reduction
738 // updates into the use set.
739 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
740  Instruction *Root, const SmallInstructionSet &Exclude,
741  const SmallInstructionSet &Final,
743  SmallInstructionVector Queue(1, Root);
744  while (!Queue.empty()) {
745  Instruction *I = Queue.pop_back_val();
746  if (!Users.insert(I).second)
747  continue;
748 
749  if (!Final.count(I))
750  for (Use &U : I->uses()) {
751  Instruction *User = cast<Instruction>(U.getUser());
752  if (PHINode *PN = dyn_cast<PHINode>(User)) {
753  // Ignore "wrap-around" uses to PHIs of this loop's header.
754  if (PN->getIncomingBlock(U) == L->getHeader())
755  continue;
756  }
757 
758  if (L->contains(User) && !Exclude.count(User)) {
759  Queue.push_back(User);
760  }
761  }
762 
763  // We also want to collect single-user "feeder" values.
764  for (User::op_iterator OI = I->op_begin(),
765  OIE = I->op_end(); OI != OIE; ++OI) {
766  if (Instruction *Op = dyn_cast<Instruction>(*OI))
767  if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
768  !Final.count(Op))
769  Queue.push_back(Op);
770  }
771  }
772 }
773 
774 // Collect all of the users of all of the provided root instructions (combined
775 // into a single set).
776 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
777  const SmallInstructionVector &Roots,
778  const SmallInstructionSet &Exclude,
779  const SmallInstructionSet &Final,
780  DenseSet<Instruction *> &Users) {
781  for (Instruction *Root : Roots)
782  collectInLoopUserSet(Root, Exclude, Final, Users);
783 }
784 
786  if (LoadInst *LI = dyn_cast<LoadInst>(I))
787  return LI->isUnordered();
788  if (StoreInst *SI = dyn_cast<StoreInst>(I))
789  return SI->isUnordered();
790  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
791  return !MI->isVolatile();
792  return false;
793 }
794 
795 /// Return true if IVU is a "simple" arithmetic operation.
796 /// This is used for narrowing the search space for DAGRoots; only arithmetic
797 /// and GEPs can be part of a DAGRoot.
798 static bool isSimpleArithmeticOp(User *IVU) {
799  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
800  switch (I->getOpcode()) {
801  default: return false;
802  case Instruction::Add:
803  case Instruction::Sub:
804  case Instruction::Mul:
805  case Instruction::Shl:
806  case Instruction::AShr:
807  case Instruction::LShr:
808  case Instruction::GetElementPtr:
809  case Instruction::Trunc:
810  case Instruction::ZExt:
811  case Instruction::SExt:
812  return true;
813  }
814  }
815  return false;
816 }
817 
818 static bool isLoopIncrement(User *U, Instruction *IV) {
820 
821  if ((BO && BO->getOpcode() != Instruction::Add) ||
822  (!BO && !isa<GetElementPtrInst>(U)))
823  return false;
824 
825  for (auto *UU : U->users()) {
826  PHINode *PN = dyn_cast<PHINode>(UU);
827  if (PN && PN == IV)
828  return true;
829  }
830  return false;
831 }
832 
833 bool LoopReroll::DAGRootTracker::
834 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
835  SmallInstructionVector BaseUsers;
836 
837  for (auto *I : Base->users()) {
838  ConstantInt *CI = nullptr;
839 
840  if (isLoopIncrement(I, IV)) {
841  LoopIncs.push_back(cast<Instruction>(I));
842  continue;
843  }
844 
845  // The root nodes must be either GEPs, ORs or ADDs.
846  if (auto *BO = dyn_cast<BinaryOperator>(I)) {
847  if (BO->getOpcode() == Instruction::Add ||
848  BO->getOpcode() == Instruction::Or)
849  CI = dyn_cast<ConstantInt>(BO->getOperand(1));
850  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
851  Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
852  CI = dyn_cast<ConstantInt>(LastOperand);
853  }
854 
855  if (!CI) {
856  if (Instruction *II = dyn_cast<Instruction>(I)) {
857  BaseUsers.push_back(II);
858  continue;
859  } else {
860  LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I
861  << "\n");
862  return false;
863  }
864  }
865 
866  int64_t V = std::abs(CI->getValue().getSExtValue());
867  if (Roots.find(V) != Roots.end())
868  // No duplicates, please.
869  return false;
870 
871  Roots[V] = cast<Instruction>(I);
872  }
873 
874  // Make sure we have at least two roots.
875  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
876  return false;
877 
878  // If we found non-loop-inc, non-root users of Base, assume they are
879  // for the zeroth root index. This is because "add %a, 0" gets optimized
880  // away.
881  if (BaseUsers.size()) {
882  if (Roots.find(0) != Roots.end()) {
883  LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
884  return false;
885  }
886  Roots[0] = Base;
887  }
888 
889  // Calculate the number of users of the base, or lowest indexed, iteration.
890  unsigned NumBaseUses = BaseUsers.size();
891  if (NumBaseUses == 0)
892  NumBaseUses = Roots.begin()->second->getNumUses();
893 
894  // Check that every node has the same number of users.
895  for (auto &KV : Roots) {
896  if (KV.first == 0)
897  continue;
898  if (!KV.second->hasNUses(NumBaseUses)) {
899  LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
900  << "#Base=" << NumBaseUses
901  << ", #Root=" << KV.second->getNumUses() << "\n");
902  return false;
903  }
904  }
905 
906  return true;
907 }
908 
909 void LoopReroll::DAGRootTracker::
910 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
911  // Does the user look like it could be part of a root set?
912  // All its users must be simple arithmetic ops.
913  if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
914  return;
915 
916  if (I != IV && findRootsBase(I, SubsumedInsts))
917  return;
918 
919  SubsumedInsts.insert(I);
920 
921  for (User *V : I->users()) {
922  Instruction *I = cast<Instruction>(V);
923  if (is_contained(LoopIncs, I))
924  continue;
925 
926  if (!isSimpleArithmeticOp(I))
927  continue;
928 
929  // The recursive call makes a copy of SubsumedInsts.
930  findRootsRecursive(I, SubsumedInsts);
931  }
932 }
933 
934 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
935  if (DRS.Roots.empty())
936  return false;
937 
938  // Consider a DAGRootSet with N-1 roots (so N different values including
939  // BaseInst).
940  // Define d = Roots[0] - BaseInst, which should be the same as
941  // Roots[I] - Roots[I-1] for all I in [1..N).
942  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
943  // loop iteration J.
944  //
945  // Now, For the loop iterations to be consecutive:
946  // D = d * N
947  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
948  if (!ADR)
949  return false;
950  unsigned N = DRS.Roots.size() + 1;
951  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
952  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
953  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
954  return false;
955 
956  return true;
957 }
958 
959 bool LoopReroll::DAGRootTracker::
960 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
961  // The base of a RootSet must be an AddRec, so it can be erased.
962  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
963  if (!IVU_ADR || IVU_ADR->getLoop() != L)
964  return false;
965 
966  std::map<int64_t, Instruction*> V;
967  if (!collectPossibleRoots(IVU, V))
968  return false;
969 
970  // If we didn't get a root for index zero, then IVU must be
971  // subsumed.
972  if (V.find(0) == V.end())
973  SubsumedInsts.insert(IVU);
974 
975  // Partition the vector into monotonically increasing indexes.
976  DAGRootSet DRS;
977  DRS.BaseInst = nullptr;
978 
979  SmallVector<DAGRootSet, 16> PotentialRootSets;
980 
981  for (auto &KV : V) {
982  if (!DRS.BaseInst) {
983  DRS.BaseInst = KV.second;
984  DRS.SubsumedInsts = SubsumedInsts;
985  } else if (DRS.Roots.empty()) {
986  DRS.Roots.push_back(KV.second);
987  } else if (V.find(KV.first - 1) != V.end()) {
988  DRS.Roots.push_back(KV.second);
989  } else {
990  // Linear sequence terminated.
991  if (!validateRootSet(DRS))
992  return false;
993 
994  // Construct a new DAGRootSet with the next sequence.
995  PotentialRootSets.push_back(DRS);
996  DRS.BaseInst = KV.second;
997  DRS.Roots.clear();
998  }
999  }
1000 
1001  if (!validateRootSet(DRS))
1002  return false;
1003 
1004  PotentialRootSets.push_back(DRS);
1005 
1006  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
1007 
1008  return true;
1009 }
1010 
1011 bool LoopReroll::DAGRootTracker::findRoots() {
1012  Inc = IVToIncMap[IV];
1013 
1014  assert(RootSets.empty() && "Unclean state!");
1015  if (std::abs(Inc) == 1) {
1016  for (auto *IVU : IV->users()) {
1017  if (isLoopIncrement(IVU, IV))
1018  LoopIncs.push_back(cast<Instruction>(IVU));
1019  }
1020  findRootsRecursive(IV, SmallInstructionSet());
1021  LoopIncs.push_back(IV);
1022  } else {
1023  if (!findRootsBase(IV, SmallInstructionSet()))
1024  return false;
1025  }
1026 
1027  // Ensure all sets have the same size.
1028  if (RootSets.empty()) {
1029  LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
1030  return false;
1031  }
1032  for (auto &V : RootSets) {
1033  if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
1034  LLVM_DEBUG(
1035  dbgs()
1036  << "LRR: Aborting because not all root sets have the same size\n");
1037  return false;
1038  }
1039  }
1040 
1041  Scale = RootSets[0].Roots.size() + 1;
1042 
1043  if (Scale > IL_MaxRerollIterations) {
1044  LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1045  << "#Found=" << Scale
1046  << ", #Max=" << IL_MaxRerollIterations << "\n");
1047  return false;
1048  }
1049 
1050  LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale
1051  << "\n");
1052 
1053  return true;
1054 }
1055 
1056 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1057  // Populate the MapVector with all instructions in the block, in order first,
1058  // so we can iterate over the contents later in perfect order.
1059  for (auto &I : *L->getHeader()) {
1060  Uses[&I].resize(IL_End);
1061  }
1062 
1063  SmallInstructionSet Exclude;
1064  for (auto &DRS : RootSets) {
1065  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1066  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1067  Exclude.insert(DRS.BaseInst);
1068  }
1069  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1070 
1071  for (auto &DRS : RootSets) {
1072  DenseSet<Instruction*> VBase;
1073  collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1074  for (auto *I : VBase) {
1075  Uses[I].set(0);
1076  }
1077 
1078  unsigned Idx = 1;
1079  for (auto *Root : DRS.Roots) {
1081  collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1082 
1083  // While we're here, check the use sets are the same size.
1084  if (V.size() != VBase.size()) {
1085  LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1086  return false;
1087  }
1088 
1089  for (auto *I : V) {
1090  Uses[I].set(Idx);
1091  }
1092  ++Idx;
1093  }
1094 
1095  // Make sure our subsumed instructions are remembered too.
1096  for (auto *I : DRS.SubsumedInsts) {
1097  Uses[I].set(IL_All);
1098  }
1099  }
1100 
1101  // Make sure the loop increments are also accounted for.
1102 
1103  Exclude.clear();
1104  for (auto &DRS : RootSets) {
1105  Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1106  Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1107  Exclude.insert(DRS.BaseInst);
1108  }
1109 
1111  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1112  for (auto *I : V) {
1113  Uses[I].set(IL_All);
1114  }
1115 
1116  return true;
1117 }
1118 
1119 /// Get the next instruction in "In" that is a member of set Val.
1120 /// Start searching from StartI, and do not return anything in Exclude.
1121 /// If StartI is not given, start from In.begin().
1123 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1124  const SmallInstructionSet &Exclude,
1125  UsesTy::iterator *StartI) {
1126  UsesTy::iterator I = StartI ? *StartI : In.begin();
1127  while (I != In.end() && (I->second.test(Val) == 0 ||
1128  Exclude.count(I->first) != 0))
1129  ++I;
1130  return I;
1131 }
1132 
1133 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1134  for (auto &DRS : RootSets) {
1135  if (DRS.BaseInst == I)
1136  return true;
1137  }
1138  return false;
1139 }
1140 
1141 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1142  for (auto &DRS : RootSets) {
1143  if (is_contained(DRS.Roots, I))
1144  return true;
1145  }
1146  return false;
1147 }
1148 
1149 /// Return true if instruction I depends on any instruction between
1150 /// Start and End.
1151 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1152  UsesTy::iterator Start,
1153  UsesTy::iterator End) {
1154  for (auto *U : I->users()) {
1155  for (auto It = Start; It != End; ++It)
1156  if (U == It->first)
1157  return true;
1158  }
1159  return false;
1160 }
1161 
1162 static bool isIgnorableInst(const Instruction *I) {
1163  if (isa<DbgInfoIntrinsic>(I))
1164  return true;
1165  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1166  if (!II)
1167  return false;
1168  switch (II->getIntrinsicID()) {
1169  default:
1170  return false;
1171  case Intrinsic::annotation:
1172  case Intrinsic::ptr_annotation:
1173  case Intrinsic::var_annotation:
1174  // TODO: the following intrinsics may also be whitelisted:
1175  // lifetime_start, lifetime_end, invariant_start, invariant_end
1176  return true;
1177  }
1178  return false;
1179 }
1180 
1181 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1182  // We now need to check for equivalence of the use graph of each root with
1183  // that of the primary induction variable (excluding the roots). Our goal
1184  // here is not to solve the full graph isomorphism problem, but rather to
1185  // catch common cases without a lot of work. As a result, we will assume
1186  // that the relative order of the instructions in each unrolled iteration
1187  // is the same (although we will not make an assumption about how the
1188  // different iterations are intermixed). Note that while the order must be
1189  // the same, the instructions may not be in the same basic block.
1190 
1191  // An array of just the possible reductions for this scale factor. When we
1192  // collect the set of all users of some root instructions, these reduction
1193  // instructions are treated as 'final' (their uses are not considered).
1194  // This is important because we don't want the root use set to search down
1195  // the reduction chain.
1196  SmallInstructionSet PossibleRedSet;
1197  SmallInstructionSet PossibleRedLastSet;
1198  SmallInstructionSet PossibleRedPHISet;
1199  Reductions.restrictToScale(Scale, PossibleRedSet,
1200  PossibleRedPHISet, PossibleRedLastSet);
1201 
1202  // Populate "Uses" with where each instruction is used.
1203  if (!collectUsedInstructions(PossibleRedSet))
1204  return false;
1205 
1206  // Make sure we mark the reduction PHIs as used in all iterations.
1207  for (auto *I : PossibleRedPHISet) {
1208  Uses[I].set(IL_All);
1209  }
1210 
1211  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1212  // comment above LoopReroll::isLoopControlIV for more information.
1213  BasicBlock *Header = L->getHeader();
1214  if (LoopControlIV && LoopControlIV != IV) {
1215  for (auto *U : LoopControlIV->users()) {
1216  Instruction *IVUser = dyn_cast<Instruction>(U);
1217  // IVUser could be loop increment or compare
1218  Uses[IVUser].set(IL_All);
1219  for (auto *UU : IVUser->users()) {
1220  Instruction *UUser = dyn_cast<Instruction>(UU);
1221  // UUser could be compare, PHI or branch
1222  Uses[UUser].set(IL_All);
1223  // Skip SExt
1224  if (isa<SExtInst>(UUser)) {
1225  UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1226  Uses[UUser].set(IL_All);
1227  }
1228  // Is UUser a compare instruction?
1229  if (UU->hasOneUse()) {
1230  Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1231  if (BI == cast<BranchInst>(Header->getTerminator()))
1232  Uses[BI].set(IL_All);
1233  }
1234  }
1235  }
1236  }
1237 
1238  // Make sure all instructions in the loop are in one and only one
1239  // set.
1240  for (auto &KV : Uses) {
1241  if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1242  LLVM_DEBUG(
1243  dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1244  << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1245  return false;
1246  }
1247  }
1248 
1249  LLVM_DEBUG(for (auto &KV
1250  : Uses) {
1251  dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1252  });
1253 
1254  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1255  // In addition to regular aliasing information, we need to look for
1256  // instructions from later (future) iterations that have side effects
1257  // preventing us from reordering them past other instructions with side
1258  // effects.
1259  bool FutureSideEffects = false;
1260  AliasSetTracker AST(*AA);
1261  // The map between instructions in f(%iv.(i+1)) and f(%iv).
1263 
1264  // Compare iteration Iter to the base.
1265  SmallInstructionSet Visited;
1266  auto BaseIt = nextInstr(0, Uses, Visited);
1267  auto RootIt = nextInstr(Iter, Uses, Visited);
1268  auto LastRootIt = Uses.begin();
1269 
1270  while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1271  Instruction *BaseInst = BaseIt->first;
1272  Instruction *RootInst = RootIt->first;
1273 
1274  // Skip over the IV or root instructions; only match their users.
1275  bool Continue = false;
1276  if (isBaseInst(BaseInst)) {
1277  Visited.insert(BaseInst);
1278  BaseIt = nextInstr(0, Uses, Visited);
1279  Continue = true;
1280  }
1281  if (isRootInst(RootInst)) {
1282  LastRootIt = RootIt;
1283  Visited.insert(RootInst);
1284  RootIt = nextInstr(Iter, Uses, Visited);
1285  Continue = true;
1286  }
1287  if (Continue) continue;
1288 
1289  if (!BaseInst->isSameOperationAs(RootInst)) {
1290  // Last chance saloon. We don't try and solve the full isomorphism
1291  // problem, but try and at least catch the case where two instructions
1292  // *of different types* are round the wrong way. We won't be able to
1293  // efficiently tell, given two ADD instructions, which way around we
1294  // should match them, but given an ADD and a SUB, we can at least infer
1295  // which one is which.
1296  //
1297  // This should allow us to deal with a greater subset of the isomorphism
1298  // problem. It does however change a linear algorithm into a quadratic
1299  // one, so limit the number of probes we do.
1300  auto TryIt = RootIt;
1301  unsigned N = NumToleratedFailedMatches;
1302  while (TryIt != Uses.end() &&
1303  !BaseInst->isSameOperationAs(TryIt->first) &&
1304  N--) {
1305  ++TryIt;
1306  TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1307  }
1308 
1309  if (TryIt == Uses.end() || TryIt == RootIt ||
1310  instrDependsOn(TryIt->first, RootIt, TryIt)) {
1311  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1312  << *BaseInst << " vs. " << *RootInst << "\n");
1313  return false;
1314  }
1315 
1316  RootIt = TryIt;
1317  RootInst = TryIt->first;
1318  }
1319 
1320  // All instructions between the last root and this root
1321  // may belong to some other iteration. If they belong to a
1322  // future iteration, then they're dangerous to alias with.
1323  //
1324  // Note that because we allow a limited amount of flexibility in the order
1325  // that we visit nodes, LastRootIt might be *before* RootIt, in which
1326  // case we've already checked this set of instructions so we shouldn't
1327  // do anything.
1328  for (; LastRootIt < RootIt; ++LastRootIt) {
1329  Instruction *I = LastRootIt->first;
1330  if (LastRootIt->second.find_first() < (int)Iter)
1331  continue;
1332  if (I->mayWriteToMemory())
1333  AST.add(I);
1334  // Note: This is specifically guarded by a check on isa<PHINode>,
1335  // which while a valid (somewhat arbitrary) micro-optimization, is
1336  // needed because otherwise isSafeToSpeculativelyExecute returns
1337  // false on PHI nodes.
1338  if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1340  // Intervening instructions cause side effects.
1341  FutureSideEffects = true;
1342  }
1343 
1344  // Make sure that this instruction, which is in the use set of this
1345  // root instruction, does not also belong to the base set or the set of
1346  // some other root instruction.
1347  if (RootIt->second.count() > 1) {
1348  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1349  << " vs. " << *RootInst << " (prev. case overlap)\n");
1350  return false;
1351  }
1352 
1353  // Make sure that we don't alias with any instruction in the alias set
1354  // tracker. If we do, then we depend on a future iteration, and we
1355  // can't reroll.
1356  if (RootInst->mayReadFromMemory())
1357  for (auto &K : AST) {
1358  if (K.aliasesUnknownInst(RootInst, *AA)) {
1359  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1360  << *BaseInst << " vs. " << *RootInst
1361  << " (depends on future store)\n");
1362  return false;
1363  }
1364  }
1365 
1366  // If we've past an instruction from a future iteration that may have
1367  // side effects, and this instruction might also, then we can't reorder
1368  // them, and this matching fails. As an exception, we allow the alias
1369  // set tracker to handle regular (unordered) load/store dependencies.
1370  if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1371  !isSafeToSpeculativelyExecute(BaseInst)) ||
1372  (!isUnorderedLoadStore(RootInst) &&
1373  !isSafeToSpeculativelyExecute(RootInst)))) {
1374  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1375  << " vs. " << *RootInst
1376  << " (side effects prevent reordering)\n");
1377  return false;
1378  }
1379 
1380  // For instructions that are part of a reduction, if the operation is
1381  // associative, then don't bother matching the operands (because we
1382  // already know that the instructions are isomorphic, and the order
1383  // within the iteration does not matter). For non-associative reductions,
1384  // we do need to match the operands, because we need to reject
1385  // out-of-order instructions within an iteration!
1386  // For example (assume floating-point addition), we need to reject this:
1387  // x += a[i]; x += b[i];
1388  // x += a[i+1]; x += b[i+1];
1389  // x += b[i+2]; x += a[i+2];
1390  bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1391 
1392  if (!(InReduction && BaseInst->isAssociative())) {
1393  bool Swapped = false, SomeOpMatched = false;
1394  for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1395  Value *Op2 = RootInst->getOperand(j);
1396 
1397  // If this is part of a reduction (and the operation is not
1398  // associatve), then we match all operands, but not those that are
1399  // part of the reduction.
1400  if (InReduction)
1401  if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1402  if (Reductions.isPairInSame(RootInst, Op2I))
1403  continue;
1404 
1405  DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1406  if (BMI != BaseMap.end()) {
1407  Op2 = BMI->second;
1408  } else {
1409  for (auto &DRS : RootSets) {
1410  if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1411  Op2 = DRS.BaseInst;
1412  break;
1413  }
1414  }
1415  }
1416 
1417  if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1418  // If we've not already decided to swap the matched operands, and
1419  // we've not already matched our first operand (note that we could
1420  // have skipped matching the first operand because it is part of a
1421  // reduction above), and the instruction is commutative, then try
1422  // the swapped match.
1423  if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1424  BaseInst->getOperand(!j) == Op2) {
1425  Swapped = true;
1426  } else {
1427  LLVM_DEBUG(dbgs()
1428  << "LRR: iteration root match failed at " << *BaseInst
1429  << " vs. " << *RootInst << " (operand " << j << ")\n");
1430  return false;
1431  }
1432  }
1433 
1434  SomeOpMatched = true;
1435  }
1436  }
1437 
1438  if ((!PossibleRedLastSet.count(BaseInst) &&
1439  hasUsesOutsideLoop(BaseInst, L)) ||
1440  (!PossibleRedLastSet.count(RootInst) &&
1441  hasUsesOutsideLoop(RootInst, L))) {
1442  LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1443  << " vs. " << *RootInst << " (uses outside loop)\n");
1444  return false;
1445  }
1446 
1447  Reductions.recordPair(BaseInst, RootInst, Iter);
1448  BaseMap.insert(std::make_pair(RootInst, BaseInst));
1449 
1450  LastRootIt = RootIt;
1451  Visited.insert(BaseInst);
1452  Visited.insert(RootInst);
1453  BaseIt = nextInstr(0, Uses, Visited);
1454  RootIt = nextInstr(Iter, Uses, Visited);
1455  }
1456  assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1457  "Mismatched set sizes!");
1458  }
1459 
1460  LLVM_DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *IV
1461  << "\n");
1462 
1463  return true;
1464 }
1465 
1466 void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
1467  BasicBlock *Header = L->getHeader();
1468  // Remove instructions associated with non-base iterations.
1469  for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
1470  J != JE;) {
1471  unsigned I = Uses[&*J].find_first();
1472  if (I > 0 && I < IL_All) {
1473  LLVM_DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
1474  J++->eraseFromParent();
1475  continue;
1476  }
1477 
1478  ++J;
1479  }
1480 
1481  bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
1482 
1483  if (HasTwoIVs) {
1484  updateNonLoopCtrlIncr();
1485  replaceIV(LoopControlIV, LoopControlIV, IterCount);
1486  } else
1487  // We need to create a new induction variable for each different BaseInst.
1488  for (auto &DRS : RootSets)
1489  // Insert the new induction variable.
1490  replaceIV(DRS.BaseInst, IV, IterCount);
1491 
1492  SimplifyInstructionsInBlock(Header, TLI);
1493  DeleteDeadPHIs(Header, TLI);
1494 }
1495 
1496 // For non-loop-control IVs, we only need to update the last increment
1497 // with right amount, then we are done.
1498 void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
1499  const SCEV *NewInc = nullptr;
1500  for (auto *LoopInc : LoopIncs) {
1502  const SCEVConstant *COp = nullptr;
1503  if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
1504  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1505  } else {
1506  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
1507  if (!COp)
1508  COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1509  }
1510 
1511  assert(COp && "Didn't find constant operand of LoopInc!\n");
1512 
1513  const APInt &AInt = COp->getValue()->getValue();
1514  const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
1515  if (AInt.isNegative()) {
1516  NewInc = SE->getNegativeSCEV(COp);
1517  NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
1518  NewInc = SE->getNegativeSCEV(NewInc);
1519  } else
1520  NewInc = SE->getUDivExpr(COp, ScaleSCEV);
1521 
1522  LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
1523  }
1524 }
1525 
1526 void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
1527  Instruction *InstIV,
1528  const SCEV *IterCount) {
1529  BasicBlock *Header = L->getHeader();
1530  int64_t Inc = IVToIncMap[InstIV];
1531  bool NeedNewIV = InstIV == LoopControlIV;
1532  bool Negative = !NeedNewIV && Inc < 0;
1533 
1534  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
1535  const SCEV *Start = RealIVSCEV->getStart();
1536 
1537  if (NeedNewIV)
1538  Start = SE->getConstant(Start->getType(), 0);
1539 
1540  const SCEV *SizeOfExpr = nullptr;
1541  const SCEV *IncrExpr =
1542  SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
1543  if (auto *PTy = dyn_cast<PointerType>(Inst->getType())) {
1544  Type *ElTy = PTy->getElementType();
1545  SizeOfExpr =
1546  SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy);
1547  IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr);
1548  }
1549  const SCEV *NewIVSCEV =
1550  SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1551 
1552  { // Limit the lifetime of SCEVExpander.
1553  const DataLayout &DL = Header->getModule()->getDataLayout();
1554  SCEVExpander Expander(*SE, DL, "reroll");
1555  Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1556  Header->getFirstNonPHIOrDbg());
1557 
1558  for (auto &KV : Uses)
1559  if (KV.second.find_first() == 0)
1560  KV.first->replaceUsesOfWith(Inst, NewIV);
1561 
1562  if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
1563  // FIXME: Why do we need this check?
1564  if (Uses[BI].find_first() == IL_All) {
1565  const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1566 
1567  if (NeedNewIV)
1568  ICSCEV = SE->getMulExpr(IterCount,
1569  SE->getConstant(IterCount->getType(), Scale));
1570 
1571  // Iteration count SCEV minus or plus 1
1572  const SCEV *MinusPlus1SCEV =
1573  SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
1574  if (Inst->getType()->isPointerTy()) {
1575  assert(SizeOfExpr && "SizeOfExpr is not initialized");
1576  MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr);
1577  }
1578 
1579  const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
1580  // Iteration count minus 1
1581  Instruction *InsertPtr = nullptr;
1582  if (isa<SCEVConstant>(ICMinusPlus1SCEV)) {
1583  InsertPtr = BI;
1584  } else {
1585  BasicBlock *Preheader = L->getLoopPreheader();
1586  if (!Preheader)
1587  Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
1588  InsertPtr = Preheader->getTerminator();
1589  }
1590 
1591  if (!isa<PointerType>(NewIV->getType()) && NeedNewIV &&
1592  (SE->getTypeSizeInBits(NewIV->getType()) <
1593  SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) {
1594  IRBuilder<> Builder(BI);
1595  Builder.SetCurrentDebugLocation(BI->getDebugLoc());
1596  NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType());
1597  }
1598  Value *ICMinusPlus1 = Expander.expandCodeFor(
1599  ICMinusPlus1SCEV, NewIV->getType(), InsertPtr);
1600 
1601  Value *Cond =
1602  new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
1603  BI->setCondition(Cond);
1604 
1605  if (BI->getSuccessor(1) != Header)
1606  BI->swapSuccessors();
1607  }
1608  }
1609  }
1610 }
1611 
1612 // Validate the selected reductions. All iterations must have an isomorphic
1613 // part of the reduction chain and, for non-associative reductions, the chain
1614 // entries must appear in order.
1615 bool LoopReroll::ReductionTracker::validateSelected() {
1616  // For a non-associative reduction, the chain entries must appear in order.
1617  for (int i : Reds) {
1618  int PrevIter = 0, BaseCount = 0, Count = 0;
1619  for (Instruction *J : PossibleReds[i]) {
1620  // Note that all instructions in the chain must have been found because
1621  // all instructions in the function must have been assigned to some
1622  // iteration.
1623  int Iter = PossibleRedIter[J];
1624  if (Iter != PrevIter && Iter != PrevIter + 1 &&
1625  !PossibleReds[i].getReducedValue()->isAssociative()) {
1626  LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: "
1627  << J << "\n");
1628  return false;
1629  }
1630 
1631  if (Iter != PrevIter) {
1632  if (Count != BaseCount) {
1633  LLVM_DEBUG(dbgs()
1634  << "LRR: Iteration " << PrevIter << " reduction use count "
1635  << Count << " is not equal to the base use count "
1636  << BaseCount << "\n");
1637  return false;
1638  }
1639 
1640  Count = 0;
1641  }
1642 
1643  ++Count;
1644  if (Iter == 0)
1645  ++BaseCount;
1646 
1647  PrevIter = Iter;
1648  }
1649  }
1650 
1651  return true;
1652 }
1653 
1654 // For all selected reductions, remove all parts except those in the first
1655 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1656 // of the first-iteration reduced value (in other words, reroll the selected
1657 // reductions).
1658 void LoopReroll::ReductionTracker::replaceSelected() {
1659  // Fixup reductions to refer to the last instruction associated with the
1660  // first iteration (not the last).
1661  for (int i : Reds) {
1662  int j = 0;
1663  for (int e = PossibleReds[i].size(); j != e; ++j)
1664  if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1665  --j;
1666  break;
1667  }
1668 
1669  // Replace users with the new end-of-chain value.
1670  SmallInstructionVector Users;
1671  for (User *U : PossibleReds[i].getReducedValue()->users()) {
1672  Users.push_back(cast<Instruction>(U));
1673  }
1674 
1675  for (Instruction *User : Users)
1676  User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1677  PossibleReds[i][j]);
1678  }
1679 }
1680 
1681 // Reroll the provided loop with respect to the provided induction variable.
1682 // Generally, we're looking for a loop like this:
1683 //
1684 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1685 // f(%iv)
1686 // %iv.1 = add %iv, 1 <-- a root increment
1687 // f(%iv.1)
1688 // %iv.2 = add %iv, 2 <-- a root increment
1689 // f(%iv.2)
1690 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
1691 // f(%iv.scale_m_1)
1692 // ...
1693 // %iv.next = add %iv, scale
1694 // %cmp = icmp(%iv, ...)
1695 // br %cmp, header, exit
1696 //
1697 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1698 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1699 // be intermixed with eachother. The restriction imposed by this algorithm is
1700 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1701 // etc. be the same.
1702 //
1703 // First, we collect the use set of %iv, excluding the other increment roots.
1704 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1705 // times, having collected the use set of f(%iv.(i+1)), during which we:
1706 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1707 // the next unmatched instruction in f(%iv.(i+1)).
1708 // - Ensure that both matched instructions don't have any external users
1709 // (with the exception of last-in-chain reduction instructions).
1710 // - Track the (aliasing) write set, and other side effects, of all
1711 // instructions that belong to future iterations that come before the matched
1712 // instructions. If the matched instructions read from that write set, then
1713 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1714 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1715 // if any of these future instructions had side effects (could not be
1716 // speculatively executed), and so do the matched instructions, when we
1717 // cannot reorder those side-effect-producing instructions, and rerolling
1718 // fails.
1719 //
1720 // Finally, we make sure that all loop instructions are either loop increment
1721 // roots, belong to simple latch code, parts of validated reductions, part of
1722 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1723 // have been validated), then we reroll the loop.
1724 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1725  const SCEV *IterCount,
1726  ReductionTracker &Reductions) {
1727  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1728  IVToIncMap, LoopControlIV);
1729 
1730  if (!DAGRoots.findRoots())
1731  return false;
1732  LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV
1733  << "\n");
1734 
1735  if (!DAGRoots.validate(Reductions))
1736  return false;
1737  if (!Reductions.validateSelected())
1738  return false;
1739  // At this point, we've validated the rerolling, and we're committed to
1740  // making changes!
1741 
1742  Reductions.replaceSelected();
1743  DAGRoots.replace(IterCount);
1744 
1745  ++NumRerolledLoops;
1746  return true;
1747 }
1748 
1749 bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
1750  if (skipLoop(L))
1751  return false;
1752 
1753  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1754  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1755  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1756  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1757  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1758  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1759 
1760  BasicBlock *Header = L->getHeader();
1761  LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
1762  << Header->getName() << " (" << L->getNumBlocks()
1763  << " block(s))\n");
1764 
1765  // For now, we'll handle only single BB loops.
1766  if (L->getNumBlocks() > 1)
1767  return false;
1768 
1769  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1770  return false;
1771 
1772  const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
1773  const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
1774  LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1775  LLVM_DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
1776 
1777  // First, we need to find the induction variable with respect to which we can
1778  // reroll (there may be several possible options).
1779  SmallInstructionVector PossibleIVs;
1780  IVToIncMap.clear();
1781  LoopControlIV = nullptr;
1782  collectPossibleIVs(L, PossibleIVs);
1783 
1784  if (PossibleIVs.empty()) {
1785  LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n");
1786  return false;
1787  }
1788 
1789  ReductionTracker Reductions;
1790  collectPossibleReductions(L, Reductions);
1791  bool Changed = false;
1792 
1793  // For each possible IV, collect the associated possible set of 'root' nodes
1794  // (i+1, i+2, etc.).
1795  for (Instruction *PossibleIV : PossibleIVs)
1796  if (reroll(PossibleIV, L, Header, IterCount, Reductions)) {
1797  Changed = true;
1798  break;
1799  }
1800  LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1801 
1802  // Trip count of L has changed so SE must be re-evaluated.
1803  if (Changed)
1804  SE->forgetLoop(L);
1805 
1806  return Changed;
1807 }
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1784
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
void push_back(const T &Elt)
Definition: SmallVector.h:213
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:250
Pass * createLoopRerollPass()
iterator_range< use_iterator > uses()
Definition: Value.h:354
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:241
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:555
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
bool user_empty() const
Definition: Value.h:363
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
static cl::opt< unsigned > NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" " during fuzzy matching. (default: 400)"))
void add(Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo)
These methods are used to add different types of instructions to the alias sets.
The main scalar evolution driver.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1560
STATISTIC(NumFunctions, "Total number of functions")
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:577
void initializeLoopRerollPass(PassRegistry &)
reverse_iterator rend()
Definition: BasicBlock.h:271
An instruction for reading from memory.
Definition: Instructions.h:164
reverse_iterator rbegin()
Definition: BasicBlock.h:269
Hexagon Common GEP
iv Induction Variable Users
Definition: IVUsers.cpp:52
This defines the Use class.
op_iterator op_begin()
Definition: User.h:230
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
static bool isIgnorableInst(const Instruction *I)
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static cl::opt< unsigned > MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, cl::desc("The maximum increment for loop rerolling"))
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
static bool isUnorderedLoadStore(Instruction *I)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
This node represents multiplication of some number of SCEVs.
static const SCEVConstant * getIncrmentFactorSCEV(ScalarEvolution *SE, const SCEV *SCEVExpr, Instruction &IV)
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1559
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This node represents a polynomial recurrence on the trip count of the specified loop.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
An instruction for storing to memory.
Definition: Instructions.h:306
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:151
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
#define P(N)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:250
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N users or more.
Definition: Value.cpp:140
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:218
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
char & LCSSAID
Definition: LCSSA.cpp:423
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:232
This instruction compares its operands according to the predicate given to the constructor.
bool isBinaryOp() const
Definition: Instruction.h:130
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
static const unsigned End
static bool isLoopIncrement(User *U, Instruction *IV)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
IterationLimits
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1032
Type * getType() const
Return the LLVM type of this SCEV expression.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
typename VectorType::iterator iterator
Definition: MapVector.h:50
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1277
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:452
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
This class uses information about analyze scalars to rewrite expressions in canonical form...
static bool isSimpleArithmeticOp(User *IVU)
Return true if IVU is a "simple" arithmetic operation.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:170
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
iv users
Definition: IVUsers.cpp:52
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:445
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1226
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
static Expected< std::string > replace(StringRef S, StringRef From, StringRef To)
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:196
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
static bool isAssociative(const COFFSection &Section)
#define LLVM_DEBUG(X)
Definition: Debug.h:119
bool use_empty() const
Definition: Value.h:322
Type * getElementType() const
Definition: DerivedTypes.h:486
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:967