LCOV - code coverage report
Current view: top level - lib/Analysis - DependenceAnalysis.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1047 1337 78.3 %
Date: 2018-10-20 13:21:21 Functions: 82 94 87.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // DependenceAnalysis is an LLVM pass that analyses dependences between memory
      11             : // accesses. Currently, it is an (incomplete) implementation of the approach
      12             : // described in
      13             : //
      14             : //            Practical Dependence Testing
      15             : //            Goff, Kennedy, Tseng
      16             : //            PLDI 1991
      17             : //
      18             : // There's a single entry point that analyzes the dependence between a pair
      19             : // of memory references in a function, returning either NULL, for no dependence,
      20             : // or a more-or-less detailed description of the dependence between them.
      21             : //
      22             : // Currently, the implementation cannot propagate constraints between
      23             : // coupled RDIV subscripts and lacks a multi-subscript MIV test.
      24             : // Both of these are conservative weaknesses;
      25             : // that is, not a source of correctness problems.
      26             : //
      27             : // Since Clang linearizes some array subscripts, the dependence
      28             : // analysis is using SCEV->delinearize to recover the representation of multiple
      29             : // subscripts, and thus avoid the more expensive and less precise MIV tests. The
      30             : // delinearization is controlled by the flag -da-delinearize.
      31             : //
      32             : // We should pay some careful attention to the possibility of integer overflow
      33             : // in the implementation of the various tests. This could happen with Add,
      34             : // Subtract, or Multiply, with both APInt's and SCEV's.
      35             : //
      36             : // Some non-linear subscript pairs can be handled by the GCD test
      37             : // (and perhaps other tests).
      38             : // Should explore how often these things occur.
      39             : //
      40             : // Finally, it seems like certain test cases expose weaknesses in the SCEV
      41             : // simplification, especially in the handling of sign and zero extensions.
      42             : // It could be useful to spend time exploring these.
      43             : //
      44             : // Please note that this is work in progress and the interface is subject to
      45             : // change.
      46             : //
      47             : //===----------------------------------------------------------------------===//
      48             : //                                                                            //
      49             : //                   In memory of Ken Kennedy, 1945 - 2007                    //
      50             : //                                                                            //
      51             : //===----------------------------------------------------------------------===//
      52             : 
      53             : #include "llvm/Analysis/DependenceAnalysis.h"
      54             : #include "llvm/ADT/STLExtras.h"
      55             : #include "llvm/ADT/Statistic.h"
      56             : #include "llvm/Analysis/AliasAnalysis.h"
      57             : #include "llvm/Analysis/LoopInfo.h"
      58             : #include "llvm/Analysis/ScalarEvolution.h"
      59             : #include "llvm/Analysis/ScalarEvolutionExpressions.h"
      60             : #include "llvm/Analysis/ValueTracking.h"
      61             : #include "llvm/Config/llvm-config.h"
      62             : #include "llvm/IR/InstIterator.h"
      63             : #include "llvm/IR/Module.h"
      64             : #include "llvm/IR/Operator.h"
      65             : #include "llvm/Support/CommandLine.h"
      66             : #include "llvm/Support/Debug.h"
      67             : #include "llvm/Support/ErrorHandling.h"
      68             : #include "llvm/Support/raw_ostream.h"
      69             : 
      70             : using namespace llvm;
      71             : 
      72             : #define DEBUG_TYPE "da"
      73             : 
      74             : //===----------------------------------------------------------------------===//
      75             : // statistics
      76             : 
      77             : STATISTIC(TotalArrayPairs, "Array pairs tested");
      78             : STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
      79             : STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
      80             : STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
      81             : STATISTIC(ZIVapplications, "ZIV applications");
      82             : STATISTIC(ZIVindependence, "ZIV independence");
      83             : STATISTIC(StrongSIVapplications, "Strong SIV applications");
      84             : STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
      85             : STATISTIC(StrongSIVindependence, "Strong SIV independence");
      86             : STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
      87             : STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
      88             : STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
      89             : STATISTIC(ExactSIVapplications, "Exact SIV applications");
      90             : STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
      91             : STATISTIC(ExactSIVindependence, "Exact SIV independence");
      92             : STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
      93             : STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
      94             : STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
      95             : STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
      96             : STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
      97             : STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
      98             : STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
      99             : STATISTIC(DeltaApplications, "Delta applications");
     100             : STATISTIC(DeltaSuccesses, "Delta successes");
     101             : STATISTIC(DeltaIndependence, "Delta independence");
     102             : STATISTIC(DeltaPropagations, "Delta propagations");
     103             : STATISTIC(GCDapplications, "GCD applications");
     104             : STATISTIC(GCDsuccesses, "GCD successes");
     105             : STATISTIC(GCDindependence, "GCD independence");
     106             : STATISTIC(BanerjeeApplications, "Banerjee applications");
     107             : STATISTIC(BanerjeeIndependence, "Banerjee independence");
     108             : STATISTIC(BanerjeeSuccesses, "Banerjee successes");
     109             : 
     110             : static cl::opt<bool>
     111             :     Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
     112             :                 cl::desc("Try to delinearize array references."));
     113             : 
     114             : //===----------------------------------------------------------------------===//
     115             : // basics
     116             : 
     117             : DependenceAnalysis::Result
     118           0 : DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
     119             :   auto &AA = FAM.getResult<AAManager>(F);
     120             :   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
     121             :   auto &LI = FAM.getResult<LoopAnalysis>(F);
     122           0 :   return DependenceInfo(&F, &AA, &SE, &LI);
     123             : }
     124             : 
     125             : AnalysisKey DependenceAnalysis::Key;
     126             : 
     127       32058 : INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
     128             :                       "Dependence Analysis", true, true)
     129       32058 : INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
     130       32058 : INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
     131       32058 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     132      106973 : INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
     133             :                     true, true)
     134             : 
     135             : char DependenceAnalysisWrapperPass::ID = 0;
     136             : 
     137           0 : FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
     138           0 :   return new DependenceAnalysisWrapperPass();
     139             : }
     140             : 
     141         273 : bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
     142         273 :   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
     143         273 :   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     144         273 :   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     145         273 :   info.reset(new DependenceInfo(&F, &AA, &SE, &LI));
     146         273 :   return false;
     147             : }
     148             : 
     149         284 : DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
     150             : 
     151         273 : void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
     152             : 
     153          43 : void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
     154             :   AU.setPreservesAll();
     155             :   AU.addRequiredTransitive<AAResultsWrapperPass>();
     156             :   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
     157             :   AU.addRequiredTransitive<LoopInfoWrapperPass>();
     158          43 : }
     159             : 
     160             : 
     161             : // Used to test the dependence analyzer.
     162             : // Looks through the function, noting loads and stores.
     163             : // Calls depends() on every possible pair and prints out the result.
     164             : // Ignores all other instructions.
     165         192 : static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) {
     166         192 :   auto *F = DA->getFunction();
     167        4158 :   for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
     168        4158 :        ++SrcI) {
     169        4158 :     if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
     170        7114 :       for (inst_iterator DstI = SrcI, DstE = inst_end(F);
     171        7114 :            DstI != DstE; ++DstI) {
     172        7114 :         if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
     173        1662 :           OS << "da analyze - ";
     174        1662 :           if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
     175         913 :             D->dump(OS);
     176        1608 :             for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
     177         695 :               if (D->isSplitable(Level)) {
     178           3 :                 OS << "da analyze - split level = " << Level;
     179           3 :                 OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
     180           3 :                 OS << "!\n";
     181             :               }
     182             :             }
     183             :           }
     184             :           else
     185         749 :             OS << "none!\n";
     186             :         }
     187             :       }
     188             :     }
     189             :   }
     190         192 : }
     191             : 
     192         192 : void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
     193             :                                           const Module *) const {
     194         192 :   dumpExampleDependence(OS, info.get());
     195         192 : }
     196             : 
     197             : //===----------------------------------------------------------------------===//
     198             : // Dependence methods
     199             : 
     200             : // Returns true if this is an input dependence.
     201         104 : bool Dependence::isInput() const {
     202         104 :   return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
     203             : }
     204             : 
     205             : 
     206             : // Returns true if this is an output dependence.
     207         286 : bool Dependence::isOutput() const {
     208         286 :   return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
     209             : }
     210             : 
     211             : 
     212             : // Returns true if this is an flow (aka true)  dependence.
     213         384 : bool Dependence::isFlow() const {
     214         384 :   return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
     215             : }
     216             : 
     217             : 
     218             : // Returns true if this is an anti dependence.
     219         133 : bool Dependence::isAnti() const {
     220         133 :   return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
     221             : }
     222             : 
     223             : 
     224             : // Returns true if a particular level is scalar; that is,
     225             : // if no subscript in the source or destination mention the induction
     226             : // variable associated with the loop at this level.
     227             : // Leave this out of line, so it will serve as a virtual method anchor
     228           0 : bool Dependence::isScalar(unsigned level) const {
     229           0 :   return false;
     230             : }
     231             : 
     232             : 
     233             : //===----------------------------------------------------------------------===//
     234             : // FullDependence methods
     235             : 
     236         933 : FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
     237             :                                bool PossiblyLoopIndependent,
     238         933 :                                unsigned CommonLevels)
     239             :     : Dependence(Source, Destination), Levels(CommonLevels),
     240         933 :       LoopIndependent(PossiblyLoopIndependent) {
     241         933 :   Consistent = true;
     242         933 :   if (CommonLevels)
     243        1704 :     DV = make_unique<DVEntry[]>(CommonLevels);
     244         933 : }
     245             : 
     246             : // The rest are simple getters that hide the implementation.
     247             : 
     248             : // getDirection - Returns the direction associated with a particular level.
     249        1641 : unsigned FullDependence::getDirection(unsigned Level) const {
     250             :   assert(0 < Level && Level <= Levels && "Level out of range");
     251        3282 :   return DV[Level - 1].Direction;
     252             : }
     253             : 
     254             : 
     255             : // Returns the distance (or NULL) associated with a particular level.
     256         735 : const SCEV *FullDependence::getDistance(unsigned Level) const {
     257             :   assert(0 < Level && Level <= Levels && "Level out of range");
     258        1470 :   return DV[Level - 1].Distance;
     259             : }
     260             : 
     261             : 
     262             : // Returns true if a particular level is scalar; that is,
     263             : // if no subscript in the source or destination mention the induction
     264             : // variable associated with the loop at this level.
     265         701 : bool FullDependence::isScalar(unsigned Level) const {
     266             :   assert(0 < Level && Level <= Levels && "Level out of range");
     267        1402 :   return DV[Level - 1].Scalar;
     268             : }
     269             : 
     270             : 
     271             : // Returns true if peeling the first iteration from this loop
     272             : // will break this dependence.
     273         695 : bool FullDependence::isPeelFirst(unsigned Level) const {
     274             :   assert(0 < Level && Level <= Levels && "Level out of range");
     275        1390 :   return DV[Level - 1].PeelFirst;
     276             : }
     277             : 
     278             : 
     279             : // Returns true if peeling the last iteration from this loop
     280             : // will break this dependence.
     281         695 : bool FullDependence::isPeelLast(unsigned Level) const {
     282             :   assert(0 < Level && Level <= Levels && "Level out of range");
     283        1390 :   return DV[Level - 1].PeelLast;
     284             : }
     285             : 
     286             : 
     287             : // Returns true if splitting this loop will break the dependence.
     288        1390 : bool FullDependence::isSplitable(unsigned Level) const {
     289             :   assert(0 < Level && Level <= Levels && "Level out of range");
     290        2780 :   return DV[Level - 1].Splitable;
     291             : }
     292             : 
     293             : 
     294             : //===----------------------------------------------------------------------===//
     295             : // DependenceInfo::Constraint methods
     296             : 
     297             : // If constraint is a point <X, Y>, returns X.
     298             : // Otherwise assert.
     299           3 : const SCEV *DependenceInfo::Constraint::getX() const {
     300             :   assert(Kind == Point && "Kind should be Point");
     301           3 :   return A;
     302             : }
     303             : 
     304             : 
     305             : // If constraint is a point <X, Y>, returns Y.
     306             : // Otherwise assert.
     307           3 : const SCEV *DependenceInfo::Constraint::getY() const {
     308             :   assert(Kind == Point && "Kind should be Point");
     309           3 :   return B;
     310             : }
     311             : 
     312             : 
     313             : // If constraint is a line AX + BY = C, returns A.
     314             : // Otherwise assert.
     315           6 : const SCEV *DependenceInfo::Constraint::getA() const {
     316             :   assert((Kind == Line || Kind == Distance) &&
     317             :          "Kind should be Line (or Distance)");
     318           6 :   return A;
     319             : }
     320             : 
     321             : 
     322             : // If constraint is a line AX + BY = C, returns B.
     323             : // Otherwise assert.
     324           6 : const SCEV *DependenceInfo::Constraint::getB() const {
     325             :   assert((Kind == Line || Kind == Distance) &&
     326             :          "Kind should be Line (or Distance)");
     327           6 :   return B;
     328             : }
     329             : 
     330             : 
     331             : // If constraint is a line AX + BY = C, returns C.
     332             : // Otherwise assert.
     333           4 : const SCEV *DependenceInfo::Constraint::getC() const {
     334             :   assert((Kind == Line || Kind == Distance) &&
     335             :          "Kind should be Line (or Distance)");
     336           4 :   return C;
     337             : }
     338             : 
     339             : 
     340             : // If constraint is a distance, returns D.
     341             : // Otherwise assert.
     342           0 : const SCEV *DependenceInfo::Constraint::getD() const {
     343             :   assert(Kind == Distance && "Kind should be Distance");
     344           0 :   return SE->getNegativeSCEV(C);
     345             : }
     346             : 
     347             : 
     348             : // Returns the loop associated with this constraint.
     349           2 : const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
     350             :   assert((Kind == Distance || Kind == Line || Kind == Point) &&
     351             :          "Kind should be Distance, Line, or Point");
     352           2 :   return AssociatedLoop;
     353             : }
     354             : 
     355           1 : void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
     356             :                                           const Loop *CurLoop) {
     357           1 :   Kind = Point;
     358           1 :   A = X;
     359           1 :   B = Y;
     360           1 :   AssociatedLoop = CurLoop;
     361           1 : }
     362             : 
     363          71 : void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
     364             :                                          const SCEV *CC, const Loop *CurLoop) {
     365          71 :   Kind = Line;
     366          71 :   A = AA;
     367          71 :   B = BB;
     368          71 :   C = CC;
     369          71 :   AssociatedLoop = CurLoop;
     370          71 : }
     371             : 
     372         366 : void DependenceInfo::Constraint::setDistance(const SCEV *D,
     373             :                                              const Loop *CurLoop) {
     374         366 :   Kind = Distance;
     375         366 :   A = SE->getOne(D->getType());
     376         366 :   B = SE->getNegativeSCEV(A);
     377         366 :   C = SE->getNegativeSCEV(D);
     378         366 :   AssociatedLoop = CurLoop;
     379         366 : }
     380             : 
     381           0 : void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
     382             : 
     383         936 : void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
     384         936 :   SE = NewSE;
     385         936 :   Kind = Any;
     386         936 : }
     387             : 
     388             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     389             : // For debugging purposes. Dumps the constraint out to OS.
     390             : LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
     391             :   if (isEmpty())
     392             :     OS << " Empty\n";
     393             :   else if (isAny())
     394             :     OS << " Any\n";
     395             :   else if (isPoint())
     396             :     OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
     397             :   else if (isDistance())
     398             :     OS << " Distance is " << *getD() <<
     399             :       " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
     400             :   else if (isLine())
     401             :     OS << " Line is " << *getA() << "*X + " <<
     402             :       *getB() << "*Y = " << *getC() << "\n";
     403             :   else
     404             :     llvm_unreachable("unknown constraint type in Constraint::dump");
     405             : }
     406             : #endif
     407             : 
     408             : 
     409             : // Updates X with the intersection
     410             : // of the Constraints X and Y. Returns true if X has changed.
     411             : // Corresponds to Figure 4 from the paper
     412             : //
     413             : //            Practical Dependence Testing
     414             : //            Goff, Kennedy, Tseng
     415             : //            PLDI 1991
     416           2 : bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
     417             :   ++DeltaApplications;
     418             :   LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
     419             :   LLVM_DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
     420             :   LLVM_DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
     421             :   assert(!Y->isPoint() && "Y must not be a Point");
     422           2 :   if (X->isAny()) {
     423           1 :     if (Y->isAny())
     424             :       return false;
     425           1 :     *X = *Y;
     426           1 :     return true;
     427             :   }
     428           1 :   if (X->isEmpty())
     429             :     return false;
     430           1 :   if (Y->isEmpty()) {
     431           0 :     X->setEmpty();
     432           0 :     return true;
     433             :   }
     434             : 
     435           1 :   if (X->isDistance() && Y->isDistance()) {
     436             :     LLVM_DEBUG(dbgs() << "\t    intersect 2 distances\n");
     437           0 :     if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
     438             :       return false;
     439           0 :     if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
     440           0 :       X->setEmpty();
     441             :       ++DeltaSuccesses;
     442           0 :       return true;
     443             :     }
     444             :     // Hmmm, interesting situation.
     445             :     // I guess if either is constant, keep it and ignore the other.
     446           0 :     if (isa<SCEVConstant>(Y->getD())) {
     447           0 :       *X = *Y;
     448           0 :       return true;
     449             :     }
     450             :     return false;
     451             :   }
     452             : 
     453             :   // At this point, the pseudo-code in Figure 4 of the paper
     454             :   // checks if (X->isPoint() && Y->isPoint()).
     455             :   // This case can't occur in our implementation,
     456             :   // since a Point can only arise as the result of intersecting
     457             :   // two Line constraints, and the right-hand value, Y, is never
     458             :   // the result of an intersection.
     459             :   assert(!(X->isPoint() && Y->isPoint()) &&
     460             :          "We shouldn't ever see X->isPoint() && Y->isPoint()");
     461             : 
     462           1 :   if (X->isLine() && Y->isLine()) {
     463             :     LLVM_DEBUG(dbgs() << "\t    intersect 2 lines\n");
     464           1 :     const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
     465           1 :     const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
     466           1 :     if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
     467             :       // slopes are equal, so lines are parallel
     468             :       LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
     469           0 :       Prod1 = SE->getMulExpr(X->getC(), Y->getB());
     470           0 :       Prod2 = SE->getMulExpr(X->getB(), Y->getC());
     471           0 :       if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
     472             :         return false;
     473           0 :       if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
     474           0 :         X->setEmpty();
     475             :         ++DeltaSuccesses;
     476           0 :         return true;
     477             :       }
     478             :       return false;
     479             :     }
     480           1 :     if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
     481             :       // slopes differ, so lines intersect
     482             :       LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
     483           1 :       const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
     484           1 :       const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
     485           1 :       const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
     486           1 :       const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
     487           1 :       const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
     488           1 :       const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
     489             :       const SCEVConstant *C1A2_C2A1 =
     490           1 :         dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
     491             :       const SCEVConstant *C1B2_C2B1 =
     492           1 :         dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
     493             :       const SCEVConstant *A1B2_A2B1 =
     494           1 :         dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
     495             :       const SCEVConstant *A2B1_A1B2 =
     496           1 :         dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
     497           1 :       if (!C1B2_C2B1 || !C1A2_C2A1 ||
     498           1 :           !A1B2_A2B1 || !A2B1_A1B2)
     499             :         return false;
     500             :       APInt Xtop = C1B2_C2B1->getAPInt();
     501             :       APInt Xbot = A1B2_A2B1->getAPInt();
     502             :       APInt Ytop = C1A2_C2A1->getAPInt();
     503             :       APInt Ybot = A2B1_A1B2->getAPInt();
     504             :       LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
     505             :       LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
     506             :       LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
     507             :       LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
     508             :       APInt Xq = Xtop; // these need to be initialized, even
     509             :       APInt Xr = Xtop; // though they're just going to be overwritten
     510           1 :       APInt::sdivrem(Xtop, Xbot, Xq, Xr);
     511             :       APInt Yq = Ytop;
     512             :       APInt Yr = Ytop;
     513           1 :       APInt::sdivrem(Ytop, Ybot, Yq, Yr);
     514           1 :       if (Xr != 0 || Yr != 0) {
     515           0 :         X->setEmpty();
     516             :         ++DeltaSuccesses;
     517           0 :         return true;
     518             :       }
     519             :       LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
     520           1 :       if (Xq.slt(0) || Yq.slt(0)) {
     521           0 :         X->setEmpty();
     522             :         ++DeltaSuccesses;
     523           0 :         return true;
     524             :       }
     525           1 :       if (const SCEVConstant *CUB =
     526           1 :           collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
     527             :         const APInt &UpperBound = CUB->getAPInt();
     528             :         LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
     529           0 :         if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
     530           0 :           X->setEmpty();
     531             :           ++DeltaSuccesses;
     532           0 :           return true;
     533             :         }
     534             :       }
     535           2 :       X->setPoint(SE->getConstant(Xq),
     536           1 :                   SE->getConstant(Yq),
     537             :                   X->getAssociatedLoop());
     538             :       ++DeltaSuccesses;
     539           1 :       return true;
     540             :     }
     541             :     return false;
     542             :   }
     543             : 
     544             :   // if (X->isLine() && Y->isPoint()) This case can't occur.
     545             :   assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
     546             : 
     547           0 :   if (X->isPoint() && Y->isLine()) {
     548             :     LLVM_DEBUG(dbgs() << "\t    intersect Point and Line\n");
     549           0 :     const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
     550           0 :     const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
     551           0 :     const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
     552           0 :     if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
     553             :       return false;
     554           0 :     if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
     555           0 :       X->setEmpty();
     556             :       ++DeltaSuccesses;
     557           0 :       return true;
     558             :     }
     559             :     return false;
     560             :   }
     561             : 
     562           0 :   llvm_unreachable("shouldn't reach the end of Constraint intersection");
     563             :   return false;
     564             : }
     565             : 
     566             : 
     567             : //===----------------------------------------------------------------------===//
     568             : // DependenceInfo methods
     569             : 
     570             : // For debugging purposes. Dumps a dependence to OS.
     571         913 : void Dependence::dump(raw_ostream &OS) const {
     572             :   bool Splitable = false;
     573         913 :   if (isConfused())
     574         529 :     OS << "confused";
     575             :   else {
     576         384 :     if (isConsistent())
     577         153 :       OS << "consistent ";
     578         384 :     if (isFlow())
     579          98 :       OS << "flow";
     580         286 :     else if (isOutput())
     581         153 :       OS << "output";
     582         133 :     else if (isAnti())
     583          29 :       OS << "anti";
     584         104 :     else if (isInput())
     585         104 :       OS << "input";
     586         384 :     unsigned Levels = getLevels();
     587         384 :     OS << " [";
     588        1079 :     for (unsigned II = 1; II <= Levels; ++II) {
     589         695 :       if (isSplitable(II))
     590             :         Splitable = true;
     591         695 :       if (isPeelFirst(II))
     592             :         OS << 'p';
     593         695 :       const SCEV *Distance = getDistance(II);
     594         695 :       if (Distance)
     595             :         OS << *Distance;
     596         661 :       else if (isScalar(II))
     597         241 :         OS << "S";
     598             :       else {
     599         420 :         unsigned Direction = getDirection(II);
     600         420 :         if (Direction == DVEntry::ALL)
     601         319 :           OS << "*";
     602             :         else {
     603         101 :           if (Direction & DVEntry::LT)
     604          60 :             OS << "<";
     605         101 :           if (Direction & DVEntry::EQ)
     606          39 :             OS << "=";
     607         101 :           if (Direction & DVEntry::GT)
     608          50 :             OS << ">";
     609             :         }
     610             :       }
     611         695 :       if (isPeelLast(II))
     612             :         OS << 'p';
     613         695 :       if (II < Levels)
     614         327 :         OS << " ";
     615             :     }
     616         384 :     if (isLoopIndependent())
     617         182 :       OS << "|<";
     618         384 :     OS << "]";
     619         384 :     if (Splitable)
     620           3 :       OS << " splitable";
     621             :   }
     622         913 :   OS << "!\n";
     623         913 : }
     624             : 
     625             : // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
     626             : // underlaying objects. If LocA and LocB are known to not alias (for any reason:
     627             : // tbaa, non-overlapping regions etc), then it is known there is no dependecy.
     628             : // Otherwise the underlying objects are checked to see if they point to
     629             : // different identifiable objects.
     630        1798 : static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
     631             :                                           const DataLayout &DL,
     632             :                                           const MemoryLocation &LocA,
     633             :                                           const MemoryLocation &LocB) {
     634             :   // Check the original locations (minus size) for noalias, which can happen for
     635             :   // tbaa, incompatible underlying object locations, etc.
     636        1798 :   MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags);
     637        1798 :   MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags);
     638        1798 :   if (AA->alias(LocAS, LocBS) == NoAlias)
     639             :     return NoAlias;
     640             : 
     641             :   // Check the underlying objects are the same
     642        1468 :   const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL);
     643        1468 :   const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL);
     644             : 
     645             :   // If the underlying objects are the same, they must alias
     646        1468 :   if (AObj == BObj)
     647             :     return MustAlias;
     648             : 
     649             :   // We may have hit the recursion limit for underlying objects, or have
     650             :   // underlying objects where we don't know they will alias.
     651         538 :   if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))
     652         538 :     return MayAlias;
     653             : 
     654             :   // Otherwise we know the objects are different and both identified objects so
     655             :   // must not alias.
     656             :   return NoAlias;
     657             : }
     658             : 
     659             : 
     660             : // Returns true if the load or store can be analyzed. Atomic and volatile
     661             : // operations have properties which this analysis does not understand.
     662             : static
     663        3596 : bool isLoadOrStore(const Instruction *I) {
     664             :   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     665             :     return LI->isUnordered();
     666             :   else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
     667             :     return SI->isUnordered();
     668             :   return false;
     669             : }
     670             : 
     671             : 
     672             : // Examines the loop nesting of the Src and Dst
     673             : // instructions and establishes their shared loops. Sets the variables
     674             : // CommonLevels, SrcLevels, and MaxLevels.
     675             : // The source and destination instructions needn't be contained in the same
     676             : // loop. The routine establishNestingLevels finds the level of most deeply
     677             : // nested loop that contains them both, CommonLevels. An instruction that's
     678             : // not contained in a loop is at level = 0. MaxLevels is equal to the level
     679             : // of the source plus the level of the destination, minus CommonLevels.
     680             : // This lets us allocate vectors MaxLevels in length, with room for every
     681             : // distinct loop referenced in both the source and destination subscripts.
     682             : // The variable SrcLevels is the nesting depth of the source instruction.
     683             : // It's used to help calculate distinct loops referenced by the destination.
     684             : // Here's the map from loops to levels:
     685             : //            0 - unused
     686             : //            1 - outermost common loop
     687             : //          ... - other common loops
     688             : // CommonLevels - innermost common loop
     689             : //          ... - loops containing Src but not Dst
     690             : //    SrcLevels - innermost loop containing Src but not Dst
     691             : //          ... - loops containing Dst but not Src
     692             : //    MaxLevels - innermost loops containing Dst but not Src
     693             : // Consider the follow code fragment:
     694             : //   for (a = ...) {
     695             : //     for (b = ...) {
     696             : //       for (c = ...) {
     697             : //         for (d = ...) {
     698             : //           A[] = ...;
     699             : //         }
     700             : //       }
     701             : //       for (e = ...) {
     702             : //         for (f = ...) {
     703             : //           for (g = ...) {
     704             : //             ... = A[];
     705             : //           }
     706             : //         }
     707             : //       }
     708             : //     }
     709             : //   }
     710             : // If we're looking at the possibility of a dependence between the store
     711             : // to A (the Src) and the load from A (the Dst), we'll note that they
     712             : // have 2 loops in common, so CommonLevels will equal 2 and the direction
     713             : // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
     714             : // A map from loop names to loop numbers would look like
     715             : //     a - 1
     716             : //     b - 2 = CommonLevels
     717             : //     c - 3
     718             : //     d - 4 = SrcLevels
     719             : //     e - 5
     720             : //     f - 6
     721             : //     g - 7 = MaxLevels
     722         933 : void DependenceInfo::establishNestingLevels(const Instruction *Src,
     723             :                                             const Instruction *Dst) {
     724         933 :   const BasicBlock *SrcBlock = Src->getParent();
     725         933 :   const BasicBlock *DstBlock = Dst->getParent();
     726         933 :   unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
     727         933 :   unsigned DstLevel = LI->getLoopDepth(DstBlock);
     728         933 :   const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
     729         933 :   const Loop *DstLoop = LI->getLoopFor(DstBlock);
     730         933 :   SrcLevels = SrcLevel;
     731         933 :   MaxLevels = SrcLevel + DstLevel;
     732         949 :   while (SrcLevel > DstLevel) {
     733          16 :     SrcLoop = SrcLoop->getParentLoop();
     734          16 :     SrcLevel--;
     735             :   }
     736         957 :   while (DstLevel > SrcLevel) {
     737          24 :     DstLoop = DstLoop->getParentLoop();
     738          24 :     DstLevel--;
     739             :   }
     740         973 :   while (SrcLoop != DstLoop) {
     741          40 :     SrcLoop = SrcLoop->getParentLoop();
     742          40 :     DstLoop = DstLoop->getParentLoop();
     743          40 :     SrcLevel--;
     744             :   }
     745         933 :   CommonLevels = SrcLevel;
     746         933 :   MaxLevels -= CommonLevels;
     747         933 : }
     748             : 
     749             : 
     750             : // Given one of the loops containing the source, return
     751             : // its level index in our numbering scheme.
     752        1991 : unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
     753        1991 :   return SrcLoop->getLoopDepth();
     754             : }
     755             : 
     756             : 
     757             : // Given one of the loops containing the destination,
     758             : // return its level index in our numbering scheme.
     759        1511 : unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
     760        1511 :   unsigned D = DstLoop->getLoopDepth();
     761        1511 :   if (D > CommonLevels)
     762          23 :     return D - CommonLevels + SrcLevels;
     763             :   else
     764             :     return D;
     765             : }
     766             : 
     767             : 
     768             : // Returns true if Expression is loop invariant in LoopNest.
     769       10433 : bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
     770             :                                      const Loop *LoopNest) const {
     771       10433 :   if (!LoopNest)
     772             :     return true;
     773       13252 :   return SE->isLoopInvariant(Expression, LoopNest) &&
     774        6586 :     isLoopInvariant(Expression, LoopNest->getParentLoop());
     775             : }
     776             : 
     777             : 
     778             : 
     779             : // Finds the set of loops from the LoopNest that
     780             : // have a level <= CommonLevels and are referred to by the SCEV Expression.
     781         160 : void DependenceInfo::collectCommonLoops(const SCEV *Expression,
     782             :                                         const Loop *LoopNest,
     783             :                                         SmallBitVector &Loops) const {
     784         459 :   while (LoopNest) {
     785         299 :     unsigned Level = LoopNest->getLoopDepth();
     786         299 :     if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
     787         280 :       Loops.set(Level);
     788         299 :     LoopNest = LoopNest->getParentLoop();
     789             :   }
     790         160 : }
     791             : 
     792          66 : void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
     793             : 
     794             :   unsigned widestWidthSeen = 0;
     795             :   Type *widestType;
     796             : 
     797             :   // Go through each pair and find the widest bit to which we need
     798             :   // to extend all of them.
     799         133 :   for (Subscript *Pair : Pairs) {
     800          67 :     const SCEV *Src = Pair->Src;
     801          67 :     const SCEV *Dst = Pair->Dst;
     802          67 :     IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
     803          67 :     IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
     804          67 :     if (SrcTy == nullptr || DstTy == nullptr) {
     805             :       assert(SrcTy == DstTy && "This function only unify integer types and "
     806             :              "expect Src and Dst share the same type "
     807             :              "otherwise.");
     808             :       continue;
     809             :     }
     810          67 :     if (SrcTy->getBitWidth() > widestWidthSeen) {
     811             :       widestWidthSeen = SrcTy->getBitWidth();
     812             :       widestType = SrcTy;
     813             :     }
     814          67 :     if (DstTy->getBitWidth() > widestWidthSeen) {
     815             :       widestWidthSeen = DstTy->getBitWidth();
     816             :       widestType = DstTy;
     817             :     }
     818             :   }
     819             : 
     820             : 
     821             :   assert(widestWidthSeen > 0);
     822             : 
     823             :   // Now extend each pair to the widest seen.
     824         133 :   for (Subscript *Pair : Pairs) {
     825          67 :     const SCEV *Src = Pair->Src;
     826          67 :     const SCEV *Dst = Pair->Dst;
     827          67 :     IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
     828          67 :     IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
     829          67 :     if (SrcTy == nullptr || DstTy == nullptr) {
     830             :       assert(SrcTy == DstTy && "This function only unify integer types and "
     831             :              "expect Src and Dst share the same type "
     832             :              "otherwise.");
     833             :       continue;
     834             :     }
     835          67 :     if (SrcTy->getBitWidth() < widestWidthSeen)
     836             :       // Sign-extend Src to widestType
     837           0 :       Pair->Src = SE->getSignExtendExpr(Src, widestType);
     838          67 :     if (DstTy->getBitWidth() < widestWidthSeen) {
     839             :       // Sign-extend Dst to widestType
     840           0 :       Pair->Dst = SE->getSignExtendExpr(Dst, widestType);
     841             :     }
     842             :   }
     843          66 : }
     844             : 
     845             : // removeMatchingExtensions - Examines a subscript pair.
     846             : // If the source and destination are identically sign (or zero)
     847             : // extended, it strips off the extension in an effect to simplify
     848             : // the actual analysis.
     849         974 : void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
     850         974 :   const SCEV *Src = Pair->Src;
     851         974 :   const SCEV *Dst = Pair->Dst;
     852         974 :   if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
     853           0 :       (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
     854             :     const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
     855             :     const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
     856           0 :     const SCEV *SrcCastOp = SrcCast->getOperand();
     857           0 :     const SCEV *DstCastOp = DstCast->getOperand();
     858           0 :     if (SrcCastOp->getType() == DstCastOp->getType()) {
     859           0 :       Pair->Src = SrcCastOp;
     860           0 :       Pair->Dst = DstCastOp;
     861             :     }
     862             :   }
     863         974 : }
     864             : 
     865             : 
     866             : // Examine the scev and return true iff it's linear.
     867             : // Collect any loops mentioned in the set of "Loops".
     868         974 : bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
     869             :                                        SmallBitVector &Loops) {
     870             :   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
     871             :   if (!AddRec)
     872         974 :     return isLoopInvariant(Src, LoopNest);
     873        1000 :   const SCEV *Start = AddRec->getStart();
     874        1000 :   const SCEV *Step = AddRec->getStepRecurrence(*SE);
     875        1000 :   const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
     876        1000 :   if (!isa<SCEVCouldNotCompute>(UB)) {
     877        1990 :     if (SE->getTypeSizeInBits(Start->getType()) <
     878         995 :         SE->getTypeSizeInBits(UB->getType())) {
     879           0 :       if (!AddRec->getNoWrapFlags())
     880             :         return false;
     881             :     }
     882             :   }
     883        1000 :   if (!isLoopInvariant(Step, LoopNest))
     884             :     return false;
     885        1000 :   Loops.set(mapSrcLoop(AddRec->getLoop()));
     886        1000 :   return checkSrcSubscript(Start, LoopNest, Loops);
     887             : }
     888             : 
     889             : 
     890             : 
     891             : // Examine the scev and return true iff it's linear.
     892             : // Collect any loops mentioned in the set of "Loops".
     893         895 : bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
     894             :                                        SmallBitVector &Loops) {
     895             :   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
     896             :   if (!AddRec)
     897         895 :     return isLoopInvariant(Dst, LoopNest);
     898         978 :   const SCEV *Start = AddRec->getStart();
     899         978 :   const SCEV *Step = AddRec->getStepRecurrence(*SE);
     900         978 :   const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
     901         978 :   if (!isa<SCEVCouldNotCompute>(UB)) {
     902        1946 :     if (SE->getTypeSizeInBits(Start->getType()) <
     903         973 :         SE->getTypeSizeInBits(UB->getType())) {
     904           0 :       if (!AddRec->getNoWrapFlags())
     905             :         return false;
     906             :     }
     907             :   }
     908         978 :   if (!isLoopInvariant(Step, LoopNest))
     909             :     return false;
     910         978 :   Loops.set(mapDstLoop(AddRec->getLoop()));
     911         978 :   return checkDstSubscript(Start, LoopNest, Loops);
     912             : }
     913             : 
     914             : 
     915             : // Examines the subscript pair (the Src and Dst SCEVs)
     916             : // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
     917             : // Collects the associated loops in a set.
     918             : DependenceInfo::Subscript::ClassificationKind
     919         974 : DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
     920             :                              const SCEV *Dst, const Loop *DstLoopNest,
     921             :                              SmallBitVector &Loops) {
     922        1948 :   SmallBitVector SrcLoops(MaxLevels + 1);
     923        1948 :   SmallBitVector DstLoops(MaxLevels + 1);
     924         974 :   if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
     925             :     return Subscript::NonLinear;
     926         895 :   if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
     927             :     return Subscript::NonLinear;
     928         894 :   Loops = SrcLoops;
     929         894 :   Loops |= DstLoops;
     930         894 :   unsigned N = Loops.count();
     931         894 :   if (N == 0)
     932             :     return Subscript::ZIV;
     933         707 :   if (N == 1)
     934             :     return Subscript::SIV;
     935         489 :   if (N == 2 && (SrcLoops.count() == 0 ||
     936         439 :                  DstLoops.count() == 0 ||
     937          18 :                  (SrcLoops.count() == 1 && DstLoops.count() == 1)))
     938          22 :     return Subscript::RDIV;
     939             :   return Subscript::MIV;
     940             : }
     941             : 
     942             : 
     943             : // A wrapper around SCEV::isKnownPredicate.
     944             : // Looks for cases where we're interested in comparing for equality.
     945             : // If both X and Y have been identically sign or zero extended,
     946             : // it strips off the (confusing) extensions before invoking
     947             : // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
     948             : // will be similarly updated.
     949             : //
     950             : // If SCEV::isKnownPredicate can't prove the predicate,
     951             : // we try simple subtraction, which seems to help in some cases
     952             : // involving symbolics.
     953        7333 : bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
     954             :                                       const SCEV *Y) const {
     955        7333 :   if (Pred == CmpInst::ICMP_EQ ||
     956             :       Pred == CmpInst::ICMP_NE) {
     957           0 :     if ((isa<SCEVSignExtendExpr>(X) &&
     958         283 :          isa<SCEVSignExtendExpr>(Y)) ||
     959           0 :         (isa<SCEVZeroExtendExpr>(X) &&
     960             :          isa<SCEVZeroExtendExpr>(Y))) {
     961             :       const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
     962             :       const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
     963           0 :       const SCEV *Xop = CX->getOperand();
     964           0 :       const SCEV *Yop = CY->getOperand();
     965           0 :       if (Xop->getType() == Yop->getType()) {
     966             :         X = Xop;
     967             :         Y = Yop;
     968             :       }
     969             :     }
     970             :   }
     971        7333 :   if (SE->isKnownPredicate(Pred, X, Y))
     972             :     return true;
     973             :   // If SE->isKnownPredicate can't prove the condition,
     974             :   // we try the brute-force approach of subtracting
     975             :   // and testing the difference.
     976             :   // By testing with SE->isKnownPredicate first, we avoid
     977             :   // the possibility of overflow when the arguments are constants.
     978        5964 :   const SCEV *Delta = SE->getMinusSCEV(X, Y);
     979        5964 :   switch (Pred) {
     980          33 :   case CmpInst::ICMP_EQ:
     981          33 :     return Delta->isZero();
     982           4 :   case CmpInst::ICMP_NE:
     983           4 :     return SE->isKnownNonZero(Delta);
     984           1 :   case CmpInst::ICMP_SGE:
     985           1 :     return SE->isKnownNonNegative(Delta);
     986           0 :   case CmpInst::ICMP_SLE:
     987           0 :     return SE->isKnownNonPositive(Delta);
     988        5546 :   case CmpInst::ICMP_SGT:
     989        5546 :     return SE->isKnownPositive(Delta);
     990         380 :   case CmpInst::ICMP_SLT:
     991         380 :     return SE->isKnownNegative(Delta);
     992           0 :   default:
     993           0 :     llvm_unreachable("unexpected predicate in isKnownPredicate");
     994             :   }
     995             : }
     996             : 
     997             : /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
     998             : /// with some extra checking if S is an AddRec and we can prove less-than using
     999             : /// the loop bounds.
    1000         116 : bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
    1001             :   // First unify to the same type
    1002         116 :   auto *SType = dyn_cast<IntegerType>(S->getType());
    1003         116 :   auto *SizeType = dyn_cast<IntegerType>(Size->getType());
    1004         116 :   if (!SType || !SizeType)
    1005             :     return false;
    1006             :   Type *MaxType =
    1007         116 :       (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
    1008         116 :   S = SE->getTruncateOrZeroExtend(S, MaxType);
    1009         116 :   Size = SE->getTruncateOrZeroExtend(Size, MaxType);
    1010             : 
    1011             :   // Special check for addrecs using BE taken count
    1012         116 :   const SCEV *Bound = SE->getMinusSCEV(S, Size);
    1013             :   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {
    1014         104 :     if (AddRec->isAffine()) {
    1015         104 :       const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
    1016         104 :       if (!isa<SCEVCouldNotCompute>(BECount)) {
    1017         104 :         const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
    1018         104 :         if (SE->isKnownNegative(Limit))
    1019             :           return true;
    1020             :       }
    1021             :     }
    1022             :   }
    1023             : 
    1024             :   // Check using normal isKnownNegative
    1025             :   const SCEV *LimitedBound =
    1026          52 :       SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
    1027          26 :   return SE->isKnownNegative(LimitedBound);
    1028             : }
    1029             : 
    1030         127 : bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {
    1031             :   bool Inbounds = false;
    1032             :   if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
    1033         127 :     Inbounds = SrcGEP->isInBounds();
    1034         127 :   if (Inbounds) {
    1035             :     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
    1036         109 :       if (AddRec->isAffine()) {
    1037             :         // We know S is for Ptr, the operand on a load/store, so doesn't wrap.
    1038             :         // If both parts are NonNegative, the end result will be NonNegative
    1039         322 :         if (SE->isKnownNonNegative(AddRec->getStart()) &&
    1040         208 :             SE->isKnownNonNegative(AddRec->getOperand(1)))
    1041             :           return true;
    1042             :       }
    1043             :     }
    1044             :   }
    1045             : 
    1046          23 :   return SE->isKnownNonNegative(S);
    1047             : }
    1048             : 
    1049             : // All subscripts are all the same type.
    1050             : // Loop bound may be smaller (e.g., a char).
    1051             : // Should zero extend loop bound, since it's always >= 0.
    1052             : // This routine collects upper bound and extends or truncates if needed.
    1053             : // Truncating is safe when subscripts are known not to wrap. Cases without
    1054             : // nowrap flags should have been rejected earlier.
    1055             : // Return null if no bound available.
    1056        2317 : const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
    1057        2317 :   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
    1058        2303 :     const SCEV *UB = SE->getBackedgeTakenCount(L);
    1059        2303 :     return SE->getTruncateOrZeroExtend(UB, T);
    1060             :   }
    1061             :   return nullptr;
    1062             : }
    1063             : 
    1064             : 
    1065             : // Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
    1066             : // If the cast fails, returns NULL.
    1067          49 : const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
    1068             :                                                               Type *T) const {
    1069          49 :   if (const SCEV *UB = collectUpperBound(L, T))
    1070             :     return dyn_cast<SCEVConstant>(UB);
    1071             :   return nullptr;
    1072             : }
    1073             : 
    1074             : 
    1075             : // testZIV -
    1076             : // When we have a pair of subscripts of the form [c1] and [c2],
    1077             : // where c1 and c2 are both loop invariant, we attack it using
    1078             : // the ZIV test. Basically, we test by comparing the two values,
    1079             : // but there are actually three possible results:
    1080             : // 1) the values are equal, so there's a dependence
    1081             : // 2) the values are different, so there's no dependence
    1082             : // 3) the values might be equal, so we have to assume a dependence.
    1083             : //
    1084             : // Return true if dependence disproved.
    1085         187 : bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
    1086             :                              FullDependence &Result) const {
    1087             :   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
    1088             :   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
    1089             :   ++ZIVapplications;
    1090         187 :   if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
    1091             :     LLVM_DEBUG(dbgs() << "    provably dependent\n");
    1092             :     return false; // provably dependent
    1093             :   }
    1094           7 :   if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
    1095             :     LLVM_DEBUG(dbgs() << "    provably independent\n");
    1096             :     ++ZIVindependence;
    1097             :     return true; // provably independent
    1098             :   }
    1099             :   LLVM_DEBUG(dbgs() << "    possibly dependent\n");
    1100           4 :   Result.Consistent = false;
    1101           4 :   return false; // possibly dependent
    1102             : }
    1103             : 
    1104             : 
    1105             : // strongSIVtest -
    1106             : // From the paper, Practical Dependence Testing, Section 4.2.1
    1107             : //
    1108             : // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
    1109             : // where i is an induction variable, c1 and c2 are loop invariant,
    1110             : //  and a is a constant, we can solve it exactly using the Strong SIV test.
    1111             : //
    1112             : // Can prove independence. Failing that, can compute distance (and direction).
    1113             : // In the presence of symbolic terms, we can sometimes make progress.
    1114             : //
    1115             : // If there's a dependence,
    1116             : //
    1117             : //    c1 + a*i = c2 + a*i'
    1118             : //
    1119             : // The dependence distance is
    1120             : //
    1121             : //    d = i' - i = (c1 - c2)/a
    1122             : //
    1123             : // A dependence only exists if d is an integer and abs(d) <= U, where U is the
    1124             : // loop's upper bound. If a dependence exists, the dependence direction is
    1125             : // defined as
    1126             : //
    1127             : //                { < if d > 0
    1128             : //    direction = { = if d = 0
    1129             : //                { > if d < 0
    1130             : //
    1131             : // Return true if dependence disproved.
    1132         374 : bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
    1133             :                                    const SCEV *DstConst, const Loop *CurLoop,
    1134             :                                    unsigned Level, FullDependence &Result,
    1135             :                                    Constraint &NewConstraint) const {
    1136             :   LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
    1137             :   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
    1138             :   LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
    1139             :   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
    1140             :   LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
    1141             :   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
    1142             :   LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
    1143             :   ++StrongSIVapplications;
    1144             :   assert(0 < Level && Level <= CommonLevels && "level out of range");
    1145         374 :   Level--;
    1146             : 
    1147         374 :   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
    1148             :   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta);
    1149             :   LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
    1150             : 
    1151             :   // check that |Delta| < iteration count
    1152         374 :   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
    1153             :     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
    1154             :     LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
    1155             :     const SCEV *AbsDelta =
    1156         370 :       SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
    1157             :     const SCEV *AbsCoeff =
    1158         370 :       SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
    1159         370 :     const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
    1160         370 :     if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
    1161             :       // Distance greater than trip count - no dependence
    1162             :       ++StrongSIVindependence;
    1163             :       ++StrongSIVsuccesses;
    1164             :       return true;
    1165             :     }
    1166             :   }
    1167             : 
    1168             :   // Can we compute distance?
    1169         372 :   if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
    1170             :     APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
    1171             :     APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
    1172             :     APInt Distance  = ConstDelta; // these need to be initialized
    1173             :     APInt Remainder = ConstDelta;
    1174         355 :     APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
    1175             :     LLVM_DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
    1176             :     LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
    1177             :     // Make sure Coeff divides Delta exactly
    1178         355 :     if (Remainder != 0) {
    1179             :       // Coeff doesn't divide Distance, no dependence
    1180             :       ++StrongSIVindependence;
    1181             :       ++StrongSIVsuccesses;
    1182             :       return true;
    1183             :     }
    1184         706 :     Result.DV[Level].Distance = SE->getConstant(Distance);
    1185         353 :     NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
    1186         353 :     if (Distance.sgt(0))
    1187          18 :       Result.DV[Level].Direction &= Dependence::DVEntry::LT;
    1188         335 :     else if (Distance.slt(0))
    1189          10 :       Result.DV[Level].Direction &= Dependence::DVEntry::GT;
    1190             :     else
    1191         325 :       Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
    1192             :     ++StrongSIVsuccesses;
    1193             :   }
    1194          17 :   else if (Delta->isZero()) {
    1195             :     // since 0/X == 0
    1196          13 :     Result.DV[Level].Distance = Delta;
    1197          13 :     NewConstraint.setDistance(Delta, CurLoop);
    1198          13 :     Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
    1199             :     ++StrongSIVsuccesses;
    1200             :   }
    1201             :   else {
    1202           4 :     if (Coeff->isOne()) {
    1203             :       LLVM_DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
    1204           0 :       Result.DV[Level].Distance = Delta; // since X/1 == X
    1205           0 :       NewConstraint.setDistance(Delta, CurLoop);
    1206             :     }
    1207             :     else {
    1208           4 :       Result.Consistent = false;
    1209           4 :       NewConstraint.setLine(Coeff,
    1210           4 :                             SE->getNegativeSCEV(Coeff),
    1211           4 :                             SE->getNegativeSCEV(Delta), CurLoop);
    1212             :     }
    1213             : 
    1214             :     // maybe we can get a useful direction
    1215           4 :     bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
    1216           4 :     bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
    1217           4 :     bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
    1218           4 :     bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
    1219           4 :     bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
    1220             :     // The double negatives above are confusing.
    1221             :     // It helps to read !SE->isKnownNonZero(Delta)
    1222             :     // as "Delta might be Zero"
    1223             :     unsigned NewDirection = Dependence::DVEntry::NONE;
    1224           4 :     if ((DeltaMaybePositive && CoeffMaybePositive) ||
    1225             :         (DeltaMaybeNegative && CoeffMaybeNegative))
    1226             :       NewDirection = Dependence::DVEntry::LT;
    1227           4 :     if (DeltaMaybeZero)
    1228           4 :       NewDirection |= Dependence::DVEntry::EQ;
    1229           4 :     if ((DeltaMaybeNegative && CoeffMaybePositive) ||
    1230             :         (DeltaMaybePositive && CoeffMaybeNegative))
    1231           4 :       NewDirection |= Dependence::DVEntry::GT;
    1232           4 :     if (NewDirection < Result.DV[Level].Direction)
    1233             :       ++StrongSIVsuccesses;
    1234           4 :     Result.DV[Level].Direction &= NewDirection;
    1235             :   }
    1236             :   return false;
    1237             : }
    1238             : 
    1239             : 
    1240             : // weakCrossingSIVtest -
    1241             : // From the paper, Practical Dependence Testing, Section 4.2.2
    1242             : //
    1243             : // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
    1244             : // where i is an induction variable, c1 and c2 are loop invariant,
    1245             : // and a is a constant, we can solve it exactly using the
    1246             : // Weak-Crossing SIV test.
    1247             : //
    1248             : // Given c1 + a*i = c2 - a*i', we can look for the intersection of
    1249             : // the two lines, where i = i', yielding
    1250             : //
    1251             : //    c1 + a*i = c2 - a*i
    1252             : //    2a*i = c2 - c1
    1253             : //    i = (c2 - c1)/2a
    1254             : //
    1255             : // If i < 0, there is no dependence.
    1256             : // If i > upperbound, there is no dependence.
    1257             : // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
    1258             : // If i = upperbound, there's a dependence with distance = 0.
    1259             : // If i is integral, there's a dependence (all directions).
    1260             : // If the non-integer part = 1/2, there's a dependence (<> directions).
    1261             : // Otherwise, there's no dependence.
    1262             : //
    1263             : // Can prove independence. Failing that,
    1264             : // can sometimes refine the directions.
    1265             : // Can determine iteration for splitting.
    1266             : //
    1267             : // Return true if dependence disproved.
    1268          11 : bool DependenceInfo::weakCrossingSIVtest(
    1269             :     const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
    1270             :     const Loop *CurLoop, unsigned Level, FullDependence &Result,
    1271             :     Constraint &NewConstraint, const SCEV *&SplitIter) const {
    1272             :   LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
    1273             :   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
    1274             :   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
    1275             :   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
    1276             :   ++WeakCrossingSIVapplications;
    1277             :   assert(0 < Level && Level <= CommonLevels && "Level out of range");
    1278          11 :   Level--;
    1279          11 :   Result.Consistent = false;
    1280          11 :   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
    1281             :   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
    1282          11 :   NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
    1283          11 :   if (Delta->isZero()) {
    1284           1 :     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
    1285           1 :     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
    1286             :     ++WeakCrossingSIVsuccesses;
    1287           1 :     if (!Result.DV[Level].Direction) {
    1288             :       ++WeakCrossingSIVindependence;
    1289             :       return true;
    1290             :     }
    1291           1 :     Result.DV[Level].Distance = Delta; // = 0
    1292           1 :     return false;
    1293             :   }
    1294             :   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
    1295             :   if (!ConstCoeff)
    1296             :     return false;
    1297             : 
    1298          10 :   Result.DV[Level].Splitable = true;
    1299          10 :   if (SE->isKnownNegative(ConstCoeff)) {
    1300           0 :     ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
    1301             :     assert(ConstCoeff &&
    1302             :            "dynamic cast of negative of ConstCoeff should yield constant");
    1303           0 :     Delta = SE->getNegativeSCEV(Delta);
    1304             :   }
    1305             :   assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
    1306             : 
    1307             :   // compute SplitIter for use by DependenceInfo::getSplitIteration()
    1308          20 :   SplitIter = SE->getUDivExpr(
    1309          10 :       SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
    1310             :       SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
    1311             :   LLVM_DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
    1312             : 
    1313             :   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
    1314             :   if (!ConstDelta)
    1315             :     return false;
    1316             : 
    1317             :   // We're certain that ConstCoeff > 0; therefore,
    1318             :   // if Delta < 0, then no dependence.
    1319             :   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
    1320             :   LLVM_DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
    1321           8 :   if (SE->isKnownNegative(Delta)) {
    1322             :     // No dependence, Delta < 0
    1323             :     ++WeakCrossingSIVindependence;
    1324             :     ++WeakCrossingSIVsuccesses;
    1325             :     return true;
    1326             :   }
    1327             : 
    1328             :   // We're certain that Delta > 0 and ConstCoeff > 0.
    1329             :   // Check Delta/(2*ConstCoeff) against upper loop bound
    1330           7 :   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
    1331             :     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
    1332           7 :     const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
    1333           7 :     const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
    1334             :                                     ConstantTwo);
    1335             :     LLVM_DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
    1336           7 :     if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
    1337             :       // Delta too big, no dependence
    1338             :       ++WeakCrossingSIVindependence;
    1339             :       ++WeakCrossingSIVsuccesses;
    1340             :       return true;
    1341             :     }
    1342           6 :     if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
    1343             :       // i = i' = UB
    1344           1 :       Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
    1345           1 :       Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
    1346             :       ++WeakCrossingSIVsuccesses;
    1347           1 :       if (!Result.DV[Level].Direction) {
    1348             :         ++WeakCrossingSIVindependence;
    1349             :         return true;
    1350             :       }
    1351           1 :       Result.DV[Level].Splitable = false;
    1352           1 :       Result.DV[Level].Distance = SE->getZero(Delta->getType());
    1353           1 :       return false;
    1354             :     }
    1355             :   }
    1356             : 
    1357             :   // check that Coeff divides Delta
    1358             :   APInt APDelta = ConstDelta->getAPInt();
    1359             :   APInt APCoeff = ConstCoeff->getAPInt();
    1360             :   APInt Distance = APDelta; // these need to be initialzed
    1361             :   APInt Remainder = APDelta;
    1362           5 :   APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
    1363             :   LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
    1364           5 :   if (Remainder != 0) {
    1365             :     // Coeff doesn't divide Delta, no dependence
    1366             :     ++WeakCrossingSIVindependence;
    1367             :     ++WeakCrossingSIVsuccesses;
    1368             :     return true;
    1369             :   }
    1370             :   LLVM_DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
    1371             : 
    1372             :   // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
    1373           4 :   APInt Two = APInt(Distance.getBitWidth(), 2, true);
    1374           8 :   Remainder = Distance.srem(Two);
    1375             :   LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
    1376           4 :   if (Remainder != 0) {
    1377             :     // Equal direction isn't possible
    1378           4 :     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
    1379             :     ++WeakCrossingSIVsuccesses;
    1380             :   }
    1381             :   return false;
    1382             : }
    1383             : 
    1384             : 
    1385             : // Kirch's algorithm, from
    1386             : //
    1387             : //        Optimizing Supercompilers for Supercomputers
    1388             : //        Michael Wolfe
    1389             : //        MIT Press, 1989
    1390             : //
    1391             : // Program 2.1, page 29.
    1392             : // Computes the GCD of AM and BM.
    1393             : // Also finds a solution to the equation ax - by = gcd(a, b).
    1394             : // Returns true if dependence disproved; i.e., gcd does not divide Delta.
    1395          40 : static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
    1396             :                     const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
    1397             :   APInt A0(Bits, 1, true), A1(Bits, 0, true);
    1398             :   APInt B0(Bits, 0, true), B1(Bits, 1, true);
    1399          40 :   APInt G0 = AM.abs();
    1400          40 :   APInt G1 = BM.abs();
    1401             :   APInt Q = G0; // these need to be initialized
    1402             :   APInt R = G0;
    1403          40 :   APInt::sdivrem(G0, G1, Q, R);
    1404          76 :   while (R != 0) {
    1405          72 :     APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
    1406          72 :     APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
    1407          36 :     G0 = G1; G1 = R;
    1408          36 :     APInt::sdivrem(G0, G1, Q, R);
    1409             :   }
    1410          40 :   G = G1;
    1411             :   LLVM_DEBUG(dbgs() << "\t    GCD = " << G << "\n");
    1412          80 :   X = AM.slt(0) ? -A1 : A1;
    1413          80 :   Y = BM.slt(0) ? B1 : -B1;
    1414             : 
    1415             :   // make sure gcd divides Delta
    1416          80 :   R = Delta.srem(G);
    1417          40 :   if (R != 0)
    1418             :     return true; // gcd doesn't divide Delta, no dependence
    1419          36 :   Q = Delta.sdiv(G);
    1420          36 :   X *= Q;
    1421          36 :   Y *= Q;
    1422          36 :   return false;
    1423             : }
    1424             : 
    1425         108 : static APInt floorOfQuotient(const APInt &A, const APInt &B) {
    1426             :   APInt Q = A; // these need to be initialized
    1427             :   APInt R = A;
    1428         108 :   APInt::sdivrem(A, B, Q, R);
    1429         108 :   if (R == 0)
    1430             :     return Q;
    1431          56 :   if ((A.sgt(0) && B.sgt(0)) ||
    1432          39 :       (A.slt(0) && B.slt(0)))
    1433             :     return Q;
    1434             :   else
    1435          27 :     return Q - 1;
    1436             : }
    1437             : 
    1438         108 : static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
    1439             :   APInt Q = A; // these need to be initialized
    1440             :   APInt R = A;
    1441         108 :   APInt::sdivrem(A, B, Q, R);
    1442         108 :   if (R == 0)
    1443             :     return Q;
    1444          59 :   if ((A.sgt(0) && B.sgt(0)) ||
    1445          30 :       (A.slt(0) && B.slt(0)))
    1446          29 :     return Q + 1;
    1447             :   else
    1448             :     return Q;
    1449             : }
    1450             : 
    1451             : 
    1452             : static
    1453         108 : APInt maxAPInt(APInt A, APInt B) {
    1454         108 :   return A.sgt(B) ? A : B;
    1455             : }
    1456             : 
    1457             : 
    1458             : static
    1459         108 : APInt minAPInt(APInt A, APInt B) {
    1460         108 :   return A.slt(B) ? A : B;
    1461             : }
    1462             : 
    1463             : 
    1464             : // exactSIVtest -
    1465             : // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
    1466             : // where i is an induction variable, c1 and c2 are loop invariant, and a1
    1467             : // and a2 are constant, we can solve it exactly using an algorithm developed
    1468             : // by Banerjee and Wolfe. See Section 2.5.3 in
    1469             : //
    1470             : //        Optimizing Supercompilers for Supercomputers
    1471             : //        Michael Wolfe
    1472             : //        MIT Press, 1989
    1473             : //
    1474             : // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
    1475             : // so use them if possible. They're also a bit better with symbolics and,
    1476             : // in the case of the strong SIV test, can compute Distances.
    1477             : //
    1478             : // Return true if dependence disproved.
    1479          36 : bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
    1480             :                                   const SCEV *SrcConst, const SCEV *DstConst,
    1481             :                                   const Loop *CurLoop, unsigned Level,
    1482             :                                   FullDependence &Result,
    1483             :                                   Constraint &NewConstraint) const {
    1484             :   LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
    1485             :   LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
    1486             :   LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
    1487             :   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
    1488             :   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
    1489             :   ++ExactSIVapplications;
    1490             :   assert(0 < Level && Level <= CommonLevels && "Level out of range");
    1491          36 :   Level--;
    1492          36 :   Result.Consistent = false;
    1493          36 :   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
    1494             :   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
    1495          36 :   NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
    1496             :                         Delta, CurLoop);
    1497             :   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
    1498             :   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
    1499             :   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
    1500          36 :   if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
    1501             :     return false;
    1502             : 
    1503             :   // find gcd
    1504             :   APInt G, X, Y;
    1505             :   APInt AM = ConstSrcCoeff->getAPInt();
    1506             :   APInt BM = ConstDstCoeff->getAPInt();
    1507          27 :   unsigned Bits = AM.getBitWidth();
    1508          27 :   if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
    1509             :     // gcd doesn't divide Delta, no dependence
    1510             :     ++ExactSIVindependence;
    1511             :     ++ExactSIVsuccesses;
    1512             :     return true;
    1513             :   }
    1514             : 
    1515             :   LLVM_DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
    1516             : 
    1517             :   // since SCEV construction normalizes, LM = 0
    1518             :   APInt UM(Bits, 1, true);
    1519             :   bool UMvalid = false;
    1520             :   // UM is perhaps unavailable, let's check
    1521          24 :   if (const SCEVConstant *CUB =
    1522          24 :       collectConstantUpperBound(CurLoop, Delta->getType())) {
    1523          24 :     UM = CUB->getAPInt();
    1524             :     LLVM_DEBUG(dbgs() << "\t    UM = " << UM << "\n");
    1525             :     UMvalid = true;
    1526             :   }
    1527             : 
    1528          24 :   APInt TU(APInt::getSignedMaxValue(Bits));
    1529          24 :   APInt TL(APInt::getSignedMinValue(Bits));
    1530             : 
    1531             :   // test(BM/G, LM-X) and test(-BM/G, X-UM)
    1532          24 :   APInt TMUL = BM.sdiv(G);
    1533          24 :   if (TMUL.sgt(0)) {
    1534          54 :     TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
    1535             :     LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1536          18 :     if (UMvalid) {
    1537          54 :       TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
    1538             :       LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1539             :     }
    1540             :   }
    1541             :   else {
    1542          18 :     TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
    1543             :     LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1544           6 :     if (UMvalid) {
    1545          18 :       TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
    1546             :       LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1547             :     }
    1548             :   }
    1549             : 
    1550             :   // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
    1551          24 :   TMUL = AM.sdiv(G);
    1552          24 :   if (TMUL.sgt(0)) {
    1553          54 :     TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
    1554             :     LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1555          18 :     if (UMvalid) {
    1556          54 :       TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
    1557             :       LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1558             :     }
    1559             :   }
    1560             :   else {
    1561          18 :     TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
    1562             :     LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1563           6 :     if (UMvalid) {
    1564          18 :       TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
    1565             :       LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1566             :     }
    1567             :   }
    1568          24 :   if (TL.sgt(TU)) {
    1569             :     ++ExactSIVindependence;
    1570             :     ++ExactSIVsuccesses;
    1571             :     return true;
    1572             :   }
    1573             : 
    1574             :   // explore directions
    1575             :   unsigned NewDirection = Dependence::DVEntry::NONE;
    1576             : 
    1577             :   // less than
    1578             :   APInt SaveTU(TU); // save these
    1579             :   APInt SaveTL(TL);
    1580             :   LLVM_DEBUG(dbgs() << "\t    exploring LT direction\n");
    1581          18 :   TMUL = AM - BM;
    1582          18 :   if (TMUL.sgt(0)) {
    1583          36 :     TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
    1584             :     LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
    1585             :   }
    1586             :   else {
    1587          18 :     TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
    1588             :     LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
    1589             :   }
    1590          18 :   if (TL.sle(TU)) {
    1591             :     NewDirection |= Dependence::DVEntry::LT;
    1592             :     ++ExactSIVsuccesses;
    1593             :   }
    1594             : 
    1595             :   // equal
    1596          18 :   TU = SaveTU; // restore
    1597          18 :   TL = SaveTL;
    1598             :   LLVM_DEBUG(dbgs() << "\t    exploring EQ direction\n");
    1599          18 :   if (TMUL.sgt(0)) {
    1600          36 :     TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
    1601             :     LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
    1602             :   }
    1603             :   else {
    1604          18 :     TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
    1605             :     LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
    1606             :   }
    1607          18 :   TMUL = BM - AM;
    1608          18 :   if (TMUL.sgt(0)) {
    1609          18 :     TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
    1610             :     LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
    1611             :   }
    1612             :   else {
    1613          36 :     TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
    1614             :     LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
    1615             :   }
    1616          18 :   if (TL.sle(TU)) {
    1617           1 :     NewDirection |= Dependence::DVEntry::EQ;
    1618             :     ++ExactSIVsuccesses;
    1619             :   }
    1620             : 
    1621             :   // greater than
    1622          18 :   TU = SaveTU; // restore
    1623          18 :   TL = SaveTL;
    1624             :   LLVM_DEBUG(dbgs() << "\t    exploring GT direction\n");
    1625          18 :   if (TMUL.sgt(0)) {
    1626          18 :     TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
    1627             :     LLVM_DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
    1628             :   }
    1629             :   else {
    1630          36 :     TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
    1631             :     LLVM_DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
    1632             :   }
    1633          18 :   if (TL.sle(TU)) {
    1634           4 :     NewDirection |= Dependence::DVEntry::GT;
    1635             :     ++ExactSIVsuccesses;
    1636             :   }
    1637             : 
    1638             :   // finished
    1639          18 :   Result.DV[Level].Direction &= NewDirection;
    1640          18 :   if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
    1641             :     ++ExactSIVindependence;
    1642             :   return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
    1643             : }
    1644             : 
    1645             : 
    1646             : 
    1647             : // Return true if the divisor evenly divides the dividend.
    1648             : static
    1649           6 : bool isRemainderZero(const SCEVConstant *Dividend,
    1650             :                      const SCEVConstant *Divisor) {
    1651             :   const APInt &ConstDividend = Dividend->getAPInt();
    1652             :   const APInt &ConstDivisor = Divisor->getAPInt();
    1653           6 :   return ConstDividend.srem(ConstDivisor) == 0;
    1654             : }
    1655             : 
    1656             : 
    1657             : // weakZeroSrcSIVtest -
    1658             : // From the paper, Practical Dependence Testing, Section 4.2.2
    1659             : //
    1660             : // When we have a pair of subscripts of the form [c1] and [c2 + a*i],
    1661             : // where i is an induction variable, c1 and c2 are loop invariant,
    1662             : // and a is a constant, we can solve it exactly using the
    1663             : // Weak-Zero SIV test.
    1664             : //
    1665             : // Given
    1666             : //
    1667             : //    c1 = c2 + a*i
    1668             : //
    1669             : // we get
    1670             : //
    1671             : //    (c1 - c2)/a = i
    1672             : //
    1673             : // If i is not an integer, there's no dependence.
    1674             : // If i < 0 or > UB, there's no dependence.
    1675             : // If i = 0, the direction is >= and peeling the
    1676             : // 1st iteration will break the dependence.
    1677             : // If i = UB, the direction is <= and peeling the
    1678             : // last iteration will break the dependence.
    1679             : // Otherwise, the direction is *.
    1680             : //
    1681             : // Can prove independence. Failing that, we can sometimes refine
    1682             : // the directions. Can sometimes show that first or last
    1683             : // iteration carries all the dependences (so worth peeling).
    1684             : //
    1685             : // (see also weakZeroDstSIVtest)
    1686             : //
    1687             : // Return true if dependence disproved.
    1688          10 : bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
    1689             :                                         const SCEV *SrcConst,
    1690             :                                         const SCEV *DstConst,
    1691             :                                         const Loop *CurLoop, unsigned Level,
    1692             :                                         FullDependence &Result,
    1693             :                                         Constraint &NewConstraint) const {
    1694             :   // For the WeakSIV test, it's possible the loop isn't common to
    1695             :   // the Src and Dst loops. If it isn't, then there's no need to
    1696             :   // record a direction.
    1697             :   LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
    1698             :   LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
    1699             :   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
    1700             :   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
    1701             :   ++WeakZeroSIVapplications;
    1702             :   assert(0 < Level && Level <= MaxLevels && "Level out of range");
    1703          10 :   Level--;
    1704          10 :   Result.Consistent = false;
    1705          10 :   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
    1706          20 :   NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
    1707             :                         CurLoop);
    1708             :   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
    1709          10 :   if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
    1710           5 :     if (Level < CommonLevels) {
    1711           4 :       Result.DV[Level].Direction &= Dependence::DVEntry::GE;
    1712           4 :       Result.DV[Level].PeelFirst = true;
    1713             :       ++WeakZeroSIVsuccesses;
    1714             :     }
    1715           5 :     return false; // dependences caused by first iteration
    1716             :   }
    1717             :   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
    1718             :   if (!ConstCoeff)
    1719             :     return false;
    1720             :   const SCEV *AbsCoeff =
    1721           5 :     SE->isKnownNegative(ConstCoeff) ?
    1722           0 :     SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
    1723             :   const SCEV *NewDelta =
    1724           5 :     SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
    1725             : 
    1726             :   // check that Delta/SrcCoeff < iteration count
    1727             :   // really check NewDelta < count*AbsCoeff
    1728           5 :   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
    1729             :     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
    1730           5 :     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
    1731           5 :     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
    1732             :       ++WeakZeroSIVindependence;
    1733             :       ++WeakZeroSIVsuccesses;
    1734             :       return true;
    1735             :     }
    1736           4 :     if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
    1737             :       // dependences caused by last iteration
    1738           1 :       if (Level < CommonLevels) {
    1739           1 :         Result.DV[Level].Direction &= Dependence::DVEntry::LE;
    1740           1 :         Result.DV[Level].PeelLast = true;
    1741             :         ++WeakZeroSIVsuccesses;
    1742             :       }
    1743           1 :       return false;
    1744             :     }
    1745             :   }
    1746             : 
    1747             :   // check that Delta/SrcCoeff >= 0
    1748             :   // really check that NewDelta >= 0
    1749           3 :   if (SE->isKnownNegative(NewDelta)) {
    1750             :     // No dependence, newDelta < 0
    1751             :     ++WeakZeroSIVindependence;
    1752             :     ++WeakZeroSIVsuccesses;
    1753             :     return true;
    1754             :   }
    1755             : 
    1756             :   // if SrcCoeff doesn't divide Delta, then no dependence
    1757           4 :   if (isa<SCEVConstant>(Delta) &&
    1758           2 :       !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
    1759             :     ++WeakZeroSIVindependence;
    1760             :     ++WeakZeroSIVsuccesses;
    1761           1 :     return true;
    1762             :   }
    1763             :   return false;
    1764             : }
    1765             : 
    1766             : 
    1767             : // weakZeroDstSIVtest -
    1768             : // From the paper, Practical Dependence Testing, Section 4.2.2
    1769             : //
    1770             : // When we have a pair of subscripts of the form [c1 + a*i] and [c2],
    1771             : // where i is an induction variable, c1 and c2 are loop invariant,
    1772             : // and a is a constant, we can solve it exactly using the
    1773             : // Weak-Zero SIV test.
    1774             : //
    1775             : // Given
    1776             : //
    1777             : //    c1 + a*i = c2
    1778             : //
    1779             : // we get
    1780             : //
    1781             : //    i = (c2 - c1)/a
    1782             : //
    1783             : // If i is not an integer, there's no dependence.
    1784             : // If i < 0 or > UB, there's no dependence.
    1785             : // If i = 0, the direction is <= and peeling the
    1786             : // 1st iteration will break the dependence.
    1787             : // If i = UB, the direction is >= and peeling the
    1788             : // last iteration will break the dependence.
    1789             : // Otherwise, the direction is *.
    1790             : //
    1791             : // Can prove independence. Failing that, we can sometimes refine
    1792             : // the directions. Can sometimes show that first or last
    1793             : // iteration carries all the dependences (so worth peeling).
    1794             : //
    1795             : // (see also weakZeroSrcSIVtest)
    1796             : //
    1797             : // Return true if dependence disproved.
    1798          10 : bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
    1799             :                                         const SCEV *SrcConst,
    1800             :                                         const SCEV *DstConst,
    1801             :                                         const Loop *CurLoop, unsigned Level,
    1802             :                                         FullDependence &Result,
    1803             :                                         Constraint &NewConstraint) const {
    1804             :   // For the WeakSIV test, it's possible the loop isn't common to the
    1805             :   // Src and Dst loops. If it isn't, then there's no need to record a direction.
    1806             :   LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
    1807             :   LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
    1808             :   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
    1809             :   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
    1810             :   ++WeakZeroSIVapplications;
    1811             :   assert(0 < Level && Level <= SrcLevels && "Level out of range");
    1812          10 :   Level--;
    1813          10 :   Result.Consistent = false;
    1814          10 :   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
    1815          20 :   NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
    1816             :                         CurLoop);
    1817             :   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
    1818          10 :   if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
    1819           3 :     if (Level < CommonLevels) {
    1820           3 :       Result.DV[Level].Direction &= Dependence::DVEntry::LE;
    1821           3 :       Result.DV[Level].PeelFirst = true;
    1822             :       ++WeakZeroSIVsuccesses;
    1823             :     }
    1824           3 :     return false; // dependences caused by first iteration
    1825             :   }
    1826             :   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
    1827             :   if (!ConstCoeff)
    1828             :     return false;
    1829             :   const SCEV *AbsCoeff =
    1830           7 :     SE->isKnownNegative(ConstCoeff) ?
    1831           0 :     SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
    1832             :   const SCEV *NewDelta =
    1833           7 :     SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
    1834             : 
    1835             :   // check that Delta/SrcCoeff < iteration count
    1836             :   // really check NewDelta < count*AbsCoeff
    1837           7 :   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
    1838             :     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
    1839           7 :     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
    1840           7 :     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
    1841             :       ++WeakZeroSIVindependence;
    1842             :       ++WeakZeroSIVsuccesses;
    1843             :       return true;
    1844             :     }
    1845           6 :     if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
    1846             :       // dependences caused by last iteration
    1847           1 :       if (Level < CommonLevels) {
    1848           1 :         Result.DV[Level].Direction &= Dependence::DVEntry::GE;
    1849           1 :         Result.DV[Level].PeelLast = true;
    1850             :         ++WeakZeroSIVsuccesses;
    1851             :       }
    1852           1 :       return false;
    1853             :     }
    1854             :   }
    1855             : 
    1856             :   // check that Delta/SrcCoeff >= 0
    1857             :   // really check that NewDelta >= 0
    1858           5 :   if (SE->isKnownNegative(NewDelta)) {
    1859             :     // No dependence, newDelta < 0
    1860             :     ++WeakZeroSIVindependence;
    1861             :     ++WeakZeroSIVsuccesses;
    1862             :     return true;
    1863             :   }
    1864             : 
    1865             :   // if SrcCoeff doesn't divide Delta, then no dependence
    1866           8 :   if (isa<SCEVConstant>(Delta) &&
    1867           4 :       !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
    1868             :     ++WeakZeroSIVindependence;
    1869             :     ++WeakZeroSIVsuccesses;
    1870           1 :     return true;
    1871             :   }
    1872             :   return false;
    1873             : }
    1874             : 
    1875             : 
    1876             : // exactRDIVtest - Tests the RDIV subscript pair for dependence.
    1877             : // Things of the form [c1 + a*i] and [c2 + b*j],
    1878             : // where i and j are induction variable, c1 and c2 are loop invariant,
    1879             : // and a and b are constants.
    1880             : // Returns true if any possible dependence is disproved.
    1881             : // Marks the result as inconsistent.
    1882             : // Works in some cases that symbolicRDIVtest doesn't, and vice versa.
    1883          22 : bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
    1884             :                                    const SCEV *SrcConst, const SCEV *DstConst,
    1885             :                                    const Loop *SrcLoop, const Loop *DstLoop,
    1886             :                                    FullDependence &Result) const {
    1887             :   LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
    1888             :   LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
    1889             :   LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
    1890             :   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
    1891             :   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
    1892             :   ++ExactRDIVapplications;
    1893          22 :   Result.Consistent = false;
    1894          22 :   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
    1895             :   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
    1896             :   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
    1897             :   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
    1898             :   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
    1899          22 :   if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
    1900             :     return false;
    1901             : 
    1902             :   // find gcd
    1903             :   APInt G, X, Y;
    1904             :   APInt AM = ConstSrcCoeff->getAPInt();
    1905             :   APInt BM = ConstDstCoeff->getAPInt();
    1906          13 :   unsigned Bits = AM.getBitWidth();
    1907          13 :   if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
    1908             :     // gcd doesn't divide Delta, no dependence
    1909             :     ++ExactRDIVindependence;
    1910             :     return true;
    1911             :   }
    1912             : 
    1913             :   LLVM_DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
    1914             : 
    1915             :   // since SCEV construction seems to normalize, LM = 0
    1916             :   APInt SrcUM(Bits, 1, true);
    1917             :   bool SrcUMvalid = false;
    1918             :   // SrcUM is perhaps unavailable, let's check
    1919          12 :   if (const SCEVConstant *UpperBound =
    1920          12 :       collectConstantUpperBound(SrcLoop, Delta->getType())) {
    1921          12 :     SrcUM = UpperBound->getAPInt();
    1922             :     LLVM_DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
    1923             :     SrcUMvalid = true;
    1924             :   }
    1925             : 
    1926             :   APInt DstUM(Bits, 1, true);
    1927             :   bool DstUMvalid = false;
    1928             :   // UM is perhaps unavailable, let's check
    1929          12 :   if (const SCEVConstant *UpperBound =
    1930          12 :       collectConstantUpperBound(DstLoop, Delta->getType())) {
    1931          12 :     DstUM = UpperBound->getAPInt();
    1932             :     LLVM_DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
    1933             :     DstUMvalid = true;
    1934             :   }
    1935             : 
    1936          12 :   APInt TU(APInt::getSignedMaxValue(Bits));
    1937          12 :   APInt TL(APInt::getSignedMinValue(Bits));
    1938             : 
    1939             :   // test(BM/G, LM-X) and test(-BM/G, X-UM)
    1940          12 :   APInt TMUL = BM.sdiv(G);
    1941          12 :   if (TMUL.sgt(0)) {
    1942          24 :     TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
    1943             :     LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1944           8 :     if (SrcUMvalid) {
    1945          24 :       TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
    1946             :       LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1947             :     }
    1948             :   }
    1949             :   else {
    1950          12 :     TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
    1951             :     LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1952           4 :     if (SrcUMvalid) {
    1953          12 :       TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
    1954             :       LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1955             :     }
    1956             :   }
    1957             : 
    1958             :   // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
    1959          12 :   TMUL = AM.sdiv(G);
    1960          12 :   if (TMUL.sgt(0)) {
    1961          24 :     TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
    1962             :     LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1963           8 :     if (DstUMvalid) {
    1964          24 :       TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
    1965             :       LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1966             :     }
    1967             :   }
    1968             :   else {
    1969          12 :     TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
    1970             :     LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
    1971           4 :     if (DstUMvalid) {
    1972          12 :       TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
    1973             :       LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
    1974             :     }
    1975             :   }
    1976             :   if (TL.sgt(TU))
    1977             :     ++ExactRDIVindependence;
    1978             :   return TL.sgt(TU);
    1979             : }
    1980             : 
    1981             : 
    1982             : // symbolicRDIVtest -
    1983             : // In Section 4.5 of the Practical Dependence Testing paper,the authors
    1984             : // introduce a special case of Banerjee's Inequalities (also called the
    1985             : // Extreme-Value Test) that can handle some of the SIV and RDIV cases,
    1986             : // particularly cases with symbolics. Since it's only able to disprove
    1987             : // dependence (not compute distances or directions), we'll use it as a
    1988             : // fall back for the other tests.
    1989             : //
    1990             : // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
    1991             : // where i and j are induction variables and c1 and c2 are loop invariants,
    1992             : // we can use the symbolic tests to disprove some dependences, serving as a
    1993             : // backup for the RDIV test. Note that i and j can be the same variable,
    1994             : // letting this test serve as a backup for the various SIV tests.
    1995             : //
    1996             : // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
    1997             : //  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
    1998             : // loop bounds for the i and j loops, respectively. So, ...
    1999             : //
    2000             : // c1 + a1*i = c2 + a2*j
    2001             : // a1*i - a2*j = c2 - c1
    2002             : //
    2003             : // To test for a dependence, we compute c2 - c1 and make sure it's in the
    2004             : // range of the maximum and minimum possible values of a1*i - a2*j.
    2005             : // Considering the signs of a1 and a2, we have 4 possible cases:
    2006             : //
    2007             : // 1) If a1 >= 0 and a2 >= 0, then
    2008             : //        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
    2009             : //              -a2*N2 <= c2 - c1 <= a1*N1
    2010             : //
    2011             : // 2) If a1 >= 0 and a2 <= 0, then
    2012             : //        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
    2013             : //                  0 <= c2 - c1 <= a1*N1 - a2*N2
    2014             : //
    2015             : // 3) If a1 <= 0 and a2 >= 0, then
    2016             : //        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
    2017             : //        a1*N1 - a2*N2 <= c2 - c1 <= 0
    2018             : //
    2019             : // 4) If a1 <= 0 and a2 <= 0, then
    2020             : //        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
    2021             : //        a1*N1         <= c2 - c1 <=       -a2*N2
    2022             : //
    2023             : // return true if dependence disproved
    2024         415 : bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
    2025             :                                       const SCEV *C1, const SCEV *C2,
    2026             :                                       const Loop *Loop1,
    2027             :                                       const Loop *Loop2) const {
    2028             :   ++SymbolicRDIVapplications;
    2029             :   LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
    2030             :   LLVM_DEBUG(dbgs() << "\t    A1 = " << *A1);
    2031             :   LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
    2032             :   LLVM_DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
    2033             :   LLVM_DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
    2034             :   LLVM_DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
    2035         415 :   const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
    2036         415 :   const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
    2037             :   LLVM_DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
    2038             :   LLVM_DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
    2039         415 :   const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
    2040         415 :   const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
    2041             :   LLVM_DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
    2042             :   LLVM_DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
    2043         415 :   if (SE->isKnownNonNegative(A1)) {
    2044         347 :     if (SE->isKnownNonNegative(A2)) {
    2045             :       // A1 >= 0 && A2 >= 0
    2046         336 :       if (N1) {
    2047             :         // make sure that c2 - c1 <= a1*N1
    2048         334 :         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
    2049             :         LLVM_DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
    2050         334 :         if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
    2051             :           ++SymbolicRDIVindependence;
    2052             :           return true;
    2053             :         }
    2054             :       }
    2055         334 :       if (N2) {
    2056             :         // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
    2057         332 :         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
    2058             :         LLVM_DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
    2059         332 :         if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
    2060             :           ++SymbolicRDIVindependence;
    2061           2 :           return true;
    2062             :         }
    2063             :       }
    2064             :     }
    2065          11 :     else if (SE->isKnownNonPositive(A2)) {
    2066             :       // a1 >= 0 && a2 <= 0
    2067          11 :       if (N1 && N2) {
    2068             :         // make sure that c2 - c1 <= a1*N1 - a2*N2
    2069          11 :         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
    2070          11 :         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
    2071          11 :         const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
    2072             :         LLVM_DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
    2073          11 :         if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
    2074             :           ++SymbolicRDIVindependence;
    2075             :           return true;
    2076             :         }
    2077             :       }
    2078             :       // make sure that 0 <= c2 - c1
    2079           9 :       if (SE->isKnownNegative(C2_C1)) {
    2080             :         ++SymbolicRDIVindependence;
    2081           0 :         return true;
    2082             :       }
    2083             :     }
    2084             :   }
    2085          68 :   else if (SE->isKnownNonPositive(A1)) {
    2086          52 :     if (SE->isKnownNonNegative(A2)) {
    2087             :       // a1 <= 0 && a2 >= 0
    2088           2 :       if (N1 && N2) {
    2089             :         // make sure that a1*N1 - a2*N2 <= c2 - c1
    2090           2 :         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
    2091           2 :         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
    2092           2 :         const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
    2093             :         LLVM_DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
    2094           2 :         if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
    2095             :           ++SymbolicRDIVindependence;
    2096             :           return true;
    2097             :         }
    2098             :       }
    2099             :       // make sure that c2 - c1 <= 0
    2100           0 :       if (SE->isKnownPositive(C2_C1)) {
    2101             :         ++SymbolicRDIVindependence;
    2102           0 :         return true;
    2103             :       }
    2104             :     }
    2105          50 :     else if (SE->isKnownNonPositive(A2)) {
    2106             :       // a1 <= 0 && a2 <= 0
    2107          50 :       if (N1) {
    2108             :         // make sure that a1*N1 <= c2 - c1
    2109          50 :         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
    2110             :         LLVM_DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
    2111          50 :         if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
    2112             :           ++SymbolicRDIVindependence;
    2113             :           return true;
    2114             :         }
    2115             :       }
    2116          48 :       if (N2) {
    2117             :         // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
    2118          48 :         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
    2119             :         LLVM_DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
    2120          48 :         if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
    2121             :           ++SymbolicRDIVindependence;
    2122           3 :           return true;
    2123             :         }
    2124             :       }
    2125             :     }
    2126             :   }
    2127             :   return false;
    2128             : }
    2129             : 
    2130             : 
    2131             : // testSIV -
    2132             : // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
    2133             : // where i is an induction variable, c1 and c2 are loop invariant, and a1 and
    2134             : // a2 are constant, we attack it with an SIV test. While they can all be
    2135             : // solved with the Exact SIV test, it's worthwhile to use simpler tests when
    2136             : // they apply; they're cheaper and sometimes more precise.
    2137             : //
    2138             : // Return true if dependence disproved.
    2139         441 : bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
    2140             :                              FullDependence &Result, Constraint &NewConstraint,
    2141             :                              const SCEV *&SplitIter) const {
    2142             :   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
    2143             :   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
    2144             :   const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
    2145             :   const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
    2146         441 :   if (SrcAddRec && DstAddRec) {
    2147         421 :     const SCEV *SrcConst = SrcAddRec->getStart();
    2148         421 :     const SCEV *DstConst = DstAddRec->getStart();
    2149         421 :     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
    2150         421 :     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
    2151         421 :     const Loop *CurLoop = SrcAddRec->getLoop();
    2152             :     assert(CurLoop == DstAddRec->getLoop() &&
    2153             :            "both loops in SIV should be same");
    2154         421 :     Level = mapSrcLoop(CurLoop);
    2155             :     bool disproven;
    2156         421 :     if (SrcCoeff == DstCoeff)
    2157         374 :       disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
    2158             :                                 Level, Result, NewConstraint);
    2159          47 :     else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
    2160          11 :       disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
    2161             :                                       Level, Result, NewConstraint, SplitIter);
    2162             :     else
    2163          36 :       disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
    2164             :                                Level, Result, NewConstraint);
    2165         405 :     return disproven ||
    2166         824 :       gcdMIVtest(Src, Dst, Result) ||
    2167         403 :       symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
    2168             :   }
    2169          20 :   if (SrcAddRec) {
    2170          10 :     const SCEV *SrcConst = SrcAddRec->getStart();
    2171          10 :     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
    2172             :     const SCEV *DstConst = Dst;
    2173          10 :     const Loop *CurLoop = SrcAddRec->getLoop();
    2174          10 :     Level = mapSrcLoop(CurLoop);
    2175          10 :     return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
    2176          17 :                               Level, Result, NewConstraint) ||
    2177           7 :       gcdMIVtest(Src, Dst, Result);
    2178             :   }
    2179          10 :   if (DstAddRec) {
    2180          10 :     const SCEV *DstConst = DstAddRec->getStart();
    2181          10 :     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
    2182             :     const SCEV *SrcConst = Src;
    2183          10 :     const Loop *CurLoop = DstAddRec->getLoop();
    2184          10 :     Level = mapDstLoop(CurLoop);
    2185          10 :     return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
    2186          17 :                               CurLoop, Level, Result, NewConstraint) ||
    2187           7 :       gcdMIVtest(Src, Dst, Result);
    2188             :   }
    2189           0 :   llvm_unreachable("SIV test expected at least one AddRec");
    2190             :   return false;
    2191             : }
    2192             : 
    2193             : 
    2194             : // testRDIV -
    2195             : // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
    2196             : // where i and j are induction variables, c1 and c2 are loop invariant,
    2197             : // and a1 and a2 are constant, we can solve it exactly with an easy adaptation
    2198             : // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
    2199             : // It doesn't make sense to talk about distance or direction in this case,
    2200             : // so there's no point in making special versions of the Strong SIV test or
    2201             : // the Weak-crossing SIV test.
    2202             : //
    2203             : // With minor algebra, this test can also be used for things like
    2204             : // [c1 + a1*i + a2*j][c2].
    2205             : //
    2206             : // Return true if dependence disproved.
    2207          22 : bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
    2208             :                               FullDependence &Result) const {
    2209             :   // we have 3 possible situations here:
    2210             :   //   1) [a*i + b] and [c*j + d]
    2211             :   //   2) [a*i + c*j + b] and [d]
    2212             :   //   3) [b] and [a*i + c*j + d]
    2213             :   // We need to find what we've got and get organized
    2214             : 
    2215             :   const SCEV *SrcConst, *DstConst;
    2216             :   const SCEV *SrcCoeff, *DstCoeff;
    2217             :   const Loop *SrcLoop, *DstLoop;
    2218             : 
    2219             :   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
    2220             :   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
    2221             :   const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
    2222             :   const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
    2223          22 :   if (SrcAddRec && DstAddRec) {
    2224          15 :     SrcConst = SrcAddRec->getStart();
    2225          15 :     SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
    2226          15 :     SrcLoop = SrcAddRec->getLoop();
    2227          15 :     DstConst = DstAddRec->getStart();
    2228          15 :     DstCoeff = DstAddRec->getStepRecurrence(*SE);
    2229          15 :     DstLoop = DstAddRec->getLoop();
    2230             :   }
    2231           7 :   else if (SrcAddRec) {
    2232             :     if (const SCEVAddRecExpr *tmpAddRec =
    2233           7 :         dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
    2234           7 :       SrcConst = tmpAddRec->getStart();
    2235           7 :       SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
    2236           7 :       SrcLoop = tmpAddRec->getLoop();
    2237             :       DstConst = Dst;
    2238           7 :       DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
    2239           7 :       DstLoop = SrcAddRec->getLoop();
    2240             :     }
    2241             :     else
    2242           0 :       llvm_unreachable("RDIV reached by surprising SCEVs");
    2243             :   }
    2244           0 :   else if (DstAddRec) {
    2245             :     if (const SCEVAddRecExpr *tmpAddRec =
    2246           0 :         dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
    2247           0 :       DstConst = tmpAddRec->getStart();
    2248           0 :       DstCoeff = tmpAddRec->getStepRecurrence(*SE);
    2249           0 :       DstLoop = tmpAddRec->getLoop();
    2250             :       SrcConst = Src;
    2251           0 :       SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
    2252           0 :       SrcLoop = DstAddRec->getLoop();
    2253             :     }
    2254             :     else
    2255           0 :       llvm_unreachable("RDIV reached by surprising SCEVs");
    2256             :   }
    2257             :   else
    2258           0 :     llvm_unreachable("RDIV expected at least one AddRec");
    2259          22 :   return exactRDIVtest(SrcCoeff, DstCoeff,
    2260             :                        SrcConst, DstConst,
    2261             :                        SrcLoop, DstLoop,
    2262          12 :                        Result) ||
    2263          34 :     gcdMIVtest(Src, Dst, Result) ||
    2264          12 :     symbolicRDIVtest(SrcCoeff, DstCoeff,
    2265             :                      SrcConst, DstConst,
    2266          22 :                      SrcLoop, DstLoop);
    2267             : }
    2268             : 
    2269             : 
    2270             : // Tests the single-subscript MIV pair (Src and Dst) for dependence.
    2271             : // Return true if dependence disproved.
    2272             : // Can sometimes refine direction vectors.
    2273         244 : bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
    2274             :                              const SmallBitVector &Loops,
    2275             :                              FullDependence &Result) const {
    2276             :   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
    2277             :   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
    2278         244 :   Result.Consistent = false;
    2279         484 :   return gcdMIVtest(Src, Dst, Result) ||
    2280         240 :     banerjeeMIVtest(Src, Dst, Loops, Result);
    2281             : }
    2282             : 
    2283             : 
    2284             : // Given a product, e.g., 10*X*Y, returns the first constant operand,
    2285             : // in this case 10. If there is no constant part, returns NULL.
    2286             : static
    2287             : const SCEVConstant *getConstantPart(const SCEV *Expr) {
    2288             :   if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
    2289             :     return Constant;
    2290             :   else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
    2291         125 :     if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
    2292             :       return Constant;
    2293             :   return nullptr;
    2294             : }
    2295             : 
    2296             : 
    2297             : //===----------------------------------------------------------------------===//
    2298             : // gcdMIVtest -
    2299             : // Tests an MIV subscript pair for dependence.
    2300             : // Returns true if any possible dependence is disproved.
    2301             : // Marks the result as inconsistent.
    2302             : // Can sometimes disprove the equal direction for 1 or more loops,
    2303             : // as discussed in Michael Wolfe's book,
    2304             : // High Performance Compilers for Parallel Computing, page 235.
    2305             : //
    2306             : // We spend some effort (code!) to handle cases like
    2307             : // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
    2308             : // but M and N are just loop-invariant variables.
    2309             : // This should help us handle linearized subscripts;
    2310             : // also makes this test a useful backup to the various SIV tests.
    2311             : //
    2312             : // It occurs to me that the presence of loop-invariant variables
    2313             : // changes the nature of the test from "greatest common divisor"
    2314             : // to "a common divisor".
    2315         675 : bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
    2316             :                                 FullDependence &Result) const {
    2317             :   LLVM_DEBUG(dbgs() << "starting gcd\n");
    2318             :   ++GCDapplications;
    2319         675 :   unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
    2320         675 :   APInt RunningGCD = APInt::getNullValue(BitWidth);
    2321             : 
    2322             :   // Examine Src coefficients.
    2323             :   // Compute running GCD and record source constant.
    2324             :   // Because we're looking for the constant at the end of the chain,
    2325             :   // we can't quit the loop just because the GCD == 1.
    2326             :   const SCEV *Coefficients = Src;
    2327             :   while (const SCEVAddRecExpr *AddRec =
    2328             :          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
    2329         958 :     const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
    2330             :     // If the coefficient is the product of a constant and other stuff,
    2331             :     // we can use the constant in the GCD computation.
    2332             :     const auto *Constant = getConstantPart(Coeff);
    2333         958 :     if (!Constant)
    2334           0 :       return false;
    2335             :     APInt ConstCoeff = Constant->getAPInt();
    2336        2874 :     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
    2337         958 :     Coefficients = AddRec->getStart();
    2338         958 :   }
    2339             :   const SCEV *SrcConst = Coefficients;
    2340             : 
    2341             :   // Examine Dst coefficients.
    2342             :   // Compute running GCD and record destination constant.
    2343             :   // Because we're looking for the constant at the end of the chain,
    2344             :   // we can't quit the loop just because the GCD == 1.
    2345             :   Coefficients = Dst;
    2346             :   while (const SCEVAddRecExpr *AddRec =
    2347             :          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
    2348         951 :     const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
    2349             :     // If the coefficient is the product of a constant and other stuff,
    2350             :     // we can use the constant in the GCD computation.
    2351             :     const auto *Constant = getConstantPart(Coeff);
    2352         951 :     if (!Constant)
    2353           0 :       return false;
    2354             :     APInt ConstCoeff = Constant->getAPInt();
    2355        2853 :     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
    2356         951 :     Coefficients = AddRec->getStart();
    2357         951 :   }
    2358             :   const SCEV *DstConst = Coefficients;
    2359             : 
    2360         675 :   APInt ExtraGCD = APInt::getNullValue(BitWidth);
    2361         675 :   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
    2362             :   LLVM_DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
    2363             :   const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
    2364             :   if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
    2365             :     // If Delta is a sum of products, we may be able to make further progress.
    2366          46 :     for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
    2367          31 :       const SCEV *Operand = Sum->getOperand(Op);
    2368          31 :       if (isa<SCEVConstant>(Operand)) {
    2369             :         assert(!Constant && "Surprised to find multiple constants");
    2370             :         Constant = cast<SCEVConstant>(Operand);
    2371             :       }
    2372             :       else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
    2373             :         // Search for constant operand to participate in GCD;
    2374             :         // If none found; return false.
    2375             :         const SCEVConstant *ConstOp = getConstantPart(Product);
    2376             :         if (!ConstOp)
    2377           0 :           return false;
    2378             :         APInt ConstOpValue = ConstOp->getAPInt();
    2379          34 :         ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
    2380          34 :                                                    ConstOpValue.abs());
    2381             :       }
    2382             :       else
    2383             :         return false;
    2384             :     }
    2385             :   }
    2386         675 :   if (!Constant)
    2387             :     return false;
    2388             :   APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
    2389             :   LLVM_DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
    2390         659 :   if (ConstDelta == 0)
    2391             :     return false;
    2392         256 :   RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
    2393             :   LLVM_DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
    2394         128 :   APInt Remainder = ConstDelta.srem(RunningGCD);
    2395         128 :   if (Remainder != 0) {
    2396             :     ++GCDindependence;
    2397             :     return true;
    2398             :   }
    2399             : 
    2400             :   // Try to disprove equal directions.
    2401             :   // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
    2402             :   // the code above can't disprove the dependence because the GCD = 1.
    2403             :   // So we consider what happen if i = i' and what happens if j = j'.
    2404             :   // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
    2405             :   // which is infeasible, so we can disallow the = direction for the i level.
    2406             :   // Setting j = j' doesn't help matters, so we end up with a direction vector
    2407             :   // of [<>, *]
    2408             :   //
    2409             :   // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
    2410             :   // we need to remember that the constant part is 5 and the RunningGCD should
    2411             :   // be initialized to ExtraGCD = 30.
    2412             :   LLVM_DEBUG(dbgs() << "    ExtraGCD = " << ExtraGCD << '\n');
    2413             : 
    2414             :   bool Improved = false;
    2415             :   Coefficients = Src;
    2416             :   while (const SCEVAddRecExpr *AddRec =
    2417             :          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
    2418         187 :     Coefficients = AddRec->getStart();
    2419         187 :     const Loop *CurLoop = AddRec->getLoop();
    2420         187 :     RunningGCD = ExtraGCD;
    2421         187 :     const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
    2422         187 :     const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
    2423             :     const SCEV *Inner = Src;
    2424         532 :     while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
    2425             :       AddRec = cast<SCEVAddRecExpr>(Inner);
    2426         345 :       const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
    2427         345 :       if (CurLoop == AddRec->getLoop())
    2428             :         ; // SrcCoeff == Coeff
    2429             :       else {
    2430             :         // If the coefficient is the product of a constant and other stuff,
    2431             :         // we can use the constant in the GCD computation.
    2432             :         Constant = getConstantPart(Coeff);
    2433         158 :         if (!Constant)
    2434           0 :           return false;
    2435             :         APInt ConstCoeff = Constant->getAPInt();
    2436         474 :         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
    2437             :       }
    2438         345 :       Inner = AddRec->getStart();
    2439             :     }
    2440             :     Inner = Dst;
    2441         511 :     while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
    2442             :       AddRec = cast<SCEVAddRecExpr>(Inner);
    2443         324 :       const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
    2444         324 :       if (CurLoop == AddRec->getLoop())
    2445             :         DstCoeff = Coeff;
    2446             :       else {
    2447             :         // If the coefficient is the product of a constant and other stuff,
    2448             :         // we can use the constant in the GCD computation.
    2449             :         Constant = getConstantPart(Coeff);
    2450         158 :         if (!Constant)
    2451           0 :           return false;
    2452             :         APInt ConstCoeff = Constant->getAPInt();
    2453         474 :         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
    2454             :       }
    2455         324 :       Inner = AddRec->getStart();
    2456             :     }
    2457         187 :     Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
    2458             :     // If the coefficient is the product of a constant and other stuff,
    2459             :     // we can use the constant in the GCD computation.
    2460             :     Constant = getConstantPart(Delta);
    2461         186 :     if (!Constant)
    2462             :       // The difference of the two coefficients might not be a product
    2463             :       // or constant, in which case we give up on this direction.
    2464           1 :       continue;
    2465             :     APInt ConstCoeff = Constant->getAPInt();
    2466         558 :     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
    2467             :     LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
    2468         186 :     if (RunningGCD != 0) {
    2469         316 :       Remainder = ConstDelta.srem(RunningGCD);
    2470             :       LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
    2471         158 :       if (Remainder != 0) {
    2472          38 :         unsigned Level = mapSrcLoop(CurLoop);
    2473          76 :         Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
    2474             :         Improved = true;
    2475             :       }
    2476             :     }
    2477             :   }
    2478             :   if (Improved)
    2479             :     ++GCDsuccesses;
    2480             :   LLVM_DEBUG(dbgs() << "all done\n");
    2481         122 :   return false;
    2482             : }
    2483             : 
    2484             : 
    2485             : //===----------------------------------------------------------------------===//
    2486             : // banerjeeMIVtest -
    2487             : // Use Banerjee's Inequalities to test an MIV subscript pair.
    2488             : // (Wolfe, in the race-car book, calls this the Extreme Value Test.)
    2489             : // Generally follows the discussion in Section 2.5.2 of
    2490             : //
    2491             : //    Optimizing Supercompilers for Supercomputers
    2492             : //    Michael Wolfe
    2493             : //
    2494             : // The inequalities given on page 25 are simplified in that loops are
    2495             : // normalized so that the lower bound is always 0 and the stride is always 1.
    2496             : // For example, Wolfe gives
    2497             : //
    2498             : //     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
    2499             : //
    2500             : // where A_k is the coefficient of the kth index in the source subscript,
    2501             : // B_k is the coefficient of the kth index in the destination subscript,
    2502             : // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
    2503             : // index, and N_k is the stride of the kth index. Since all loops are normalized
    2504             : // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
    2505             : // equation to
    2506             : //
    2507             : //     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
    2508             : //            = (A^-_k - B_k)^- (U_k - 1)  - B_k
    2509             : //
    2510             : // Similar simplifications are possible for the other equations.
    2511             : //
    2512             : // When we can't determine the number of iterations for a loop,
    2513             : // we use NULL as an indicator for the worst case, infinity.
    2514             : // When computing the upper bound, NULL denotes +inf;
    2515             : // for the lower bound, NULL denotes -inf.
    2516             : //
    2517             : // Return true if dependence disproved.
    2518         240 : bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
    2519             :                                      const SmallBitVector &Loops,
    2520             :                                      FullDependence &Result) const {
    2521             :   LLVM_DEBUG(dbgs() << "starting Banerjee\n");
    2522             :   ++BanerjeeApplications;
    2523             :   LLVM_DEBUG(dbgs() << "    Src = " << *Src << '\n');
    2524             :   const SCEV *A0;
    2525         240 :   CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
    2526             :   LLVM_DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
    2527             :   const SCEV *B0;
    2528         240 :   CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
    2529         240 :   BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
    2530         240 :   const SCEV *Delta = SE->getMinusSCEV(B0, A0);
    2531             :   LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
    2532             : 
    2533             :   // Compute bounds for all the * directions.
    2534             :   LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
    2535         799 :   for (unsigned K = 1; K <= MaxLevels; ++K) {
    2536         559 :     Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
    2537         559 :     Bound[K].Direction = Dependence::DVEntry::ALL;
    2538         559 :     Bound[K].DirSet = Dependence::DVEntry::NONE;
    2539         559 :     findBoundsALL(A, B, Bound, K);
    2540             : #ifndef NDEBUG
    2541             :     LLVM_DEBUG(dbgs() << "\t    " << K << '\t');
    2542             :     if (Bound[K].Lower[Dependence::DVEntry::ALL])
    2543             :       LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
    2544             :     else
    2545             :       LLVM_DEBUG(dbgs() << "-inf\t");
    2546             :     if (Bound[K].Upper[Dependence::DVEntry::ALL])
    2547             :       LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
    2548             :     else
    2549             :       LLVM_DEBUG(dbgs() << "+inf\n");
    2550             : #endif
    2551             :   }
    2552             : 
    2553             :   // Test the *, *, *, ... case.
    2554             :   bool Disproved = false;
    2555         240 :   if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
    2556             :     // Explore the direction vector hierarchy.
    2557         236 :     unsigned DepthExpanded = 0;
    2558         236 :     unsigned NewDeps = exploreDirections(1, A, B, Bound,
    2559             :                                          Loops, DepthExpanded, Delta);
    2560         236 :     if (NewDeps > 0) {
    2561             :       bool Improved = false;
    2562         776 :       for (unsigned K = 1; K <= CommonLevels; ++K) {
    2563         540 :         if (Loops[K]) {
    2564         515 :           unsigned Old = Result.DV[K - 1].Direction;
    2565         515 :           Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
    2566             :           Improved |= Old != Result.DV[K - 1].Direction;
    2567         515 :           if (!Result.DV[K - 1].Direction) {
    2568             :             Improved = false;
    2569             :             Disproved = true;
    2570             :             break;
    2571             :           }
    2572             :         }
    2573             :       }
    2574             :       if (Improved)
    2575             :         ++BanerjeeSuccesses;
    2576             :     }
    2577             :     else {
    2578             :       ++BanerjeeIndependence;
    2579             :       Disproved = true;
    2580             :     }
    2581             :   }
    2582             :   else {
    2583             :     ++BanerjeeIndependence;
    2584             :     Disproved = true;
    2585             :   }
    2586         240 :   delete [] Bound;
    2587         240 :   delete [] A;
    2588         240 :   delete [] B;
    2589         240 :   return Disproved;
    2590             : }
    2591             : 
    2592             : 
    2593             : // Hierarchically expands the direction vector
    2594             : // search space, combining the directions of discovered dependences
    2595             : // in the DirSet field of Bound. Returns the number of distinct
    2596             : // dependences discovered. If the dependence is disproved,
    2597             : // it will return 0.
    2598        2102 : unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
    2599             :                                            CoefficientInfo *B, BoundInfo *Bound,
    2600             :                                            const SmallBitVector &Loops,
    2601             :                                            unsigned &DepthExpanded,
    2602             :                                            const SCEV *Delta) const {
    2603        2141 :   if (Level > CommonLevels) {
    2604             :     // record result
    2605             :     LLVM_DEBUG(dbgs() << "\t[");
    2606        4489 :     for (unsigned K = 1; K <= CommonLevels; ++K) {
    2607        3381 :       if (Loops[K]) {
    2608        3228 :         Bound[K].DirSet |= Bound[K].Direction;
    2609             : #ifndef NDEBUG
    2610             :         switch (Bound[K].Direction) {
    2611             :         case Dependence::DVEntry::LT:
    2612             :           LLVM_DEBUG(dbgs() << " <");
    2613             :           break;
    2614             :         case Dependence::DVEntry::EQ:
    2615             :           LLVM_DEBUG(dbgs() << " =");
    2616             :           break;
    2617             :         case Dependence::DVEntry::GT:
    2618             :           LLVM_DEBUG(dbgs() << " >");
    2619             :           break;
    2620             :         case Dependence::DVEntry::ALL:
    2621             :           LLVM_DEBUG(dbgs() << " *");
    2622             :           break;
    2623             :         default:
    2624             :           llvm_unreachable("unexpected Bound[K].Direction");
    2625             :         }
    2626             : #endif
    2627             :       }
    2628             :     }
    2629             :     LLVM_DEBUG(dbgs() << " ]\n");
    2630             :     return 1;
    2631             :   }
    2632        1033 :   if (Loops[Level]) {
    2633         994 :     if (Level > DepthExpanded) {
    2634         515 :       DepthExpanded = Level;
    2635             :       // compute bounds for <, =, > at current level
    2636         515 :       findBoundsLT(A, B, Bound, Level);
    2637         515 :       findBoundsGT(A, B, Bound, Level);
    2638         515 :       findBoundsEQ(A, B, Bound, Level);
    2639             : #ifndef NDEBUG
    2640             :       LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
    2641             :       LLVM_DEBUG(dbgs() << "\t    <\t");
    2642             :       if (Bound[Level].Lower[Dependence::DVEntry::LT])
    2643             :         LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
    2644             :                           << '\t');
    2645             :       else
    2646             :         LLVM_DEBUG(dbgs() << "-inf\t");
    2647             :       if (Bound[Level].Upper[Dependence::DVEntry::LT])
    2648             :         LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
    2649             :                           << '\n');
    2650             :       else
    2651             :         LLVM_DEBUG(dbgs() << "+inf\n");
    2652             :       LLVM_DEBUG(dbgs() << "\t    =\t");
    2653             :       if (Bound[Level].Lower[Dependence::DVEntry::EQ])
    2654             :         LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
    2655             :                           << '\t');
    2656             :       else
    2657             :         LLVM_DEBUG(dbgs() << "-inf\t");
    2658             :       if (Bound[Level].Upper[Dependence::DVEntry::EQ])
    2659             :         LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
    2660             :                           << '\n');
    2661             :       else
    2662             :         LLVM_DEBUG(dbgs() << "+inf\n");
    2663             :       LLVM_DEBUG(dbgs() << "\t    >\t");
    2664             :       if (Bound[Level].Lower[Dependence::DVEntry::GT])
    2665             :         LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
    2666             :                           << '\t');
    2667             :       else
    2668             :         LLVM_DEBUG(dbgs() << "-inf\t");
    2669             :       if (Bound[Level].Upper[Dependence::DVEntry::GT])
    2670             :         LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
    2671             :                           << '\n');
    2672             :       else
    2673             :         LLVM_DEBUG(dbgs() << "+inf\n");
    2674             : #endif
    2675             :     }
    2676             : 
    2677             :     unsigned NewDeps = 0;
    2678             : 
    2679             :     // test bounds for <, *, *, ...
    2680         994 :     if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
    2681         541 :       NewDeps += exploreDirections(Level + 1, A, B, Bound,
    2682             :                                    Loops, DepthExpanded, Delta);
    2683             : 
    2684             :     // Test bounds for =, *, *, ...
    2685         994 :     if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
    2686         778 :       NewDeps += exploreDirections(Level + 1, A, B, Bound,
    2687             :                                    Loops, DepthExpanded, Delta);
    2688             : 
    2689             :     // test bounds for >, *, *, ...
    2690         994 :     if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
    2691         547 :       NewDeps += exploreDirections(Level + 1, A, B, Bound,
    2692             :                                    Loops, DepthExpanded, Delta);
    2693             : 
    2694         994 :     Bound[Level].Direction = Dependence::DVEntry::ALL;
    2695         994 :     return NewDeps;
    2696             :   }
    2697             :   else
    2698          39 :     return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
    2699             : }
    2700             : 
    2701             : 
    2702             : // Returns true iff the current bounds are plausible.
    2703        3222 : bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
    2704             :                                 BoundInfo *Bound, const SCEV *Delta) const {
    2705        3222 :   Bound[Level].Direction = DirKind;
    2706        3222 :   if (const SCEV *LowerBound = getLowerBound(Bound))
    2707        3222 :     if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
    2708             :       return false;
    2709        2660 :   if (const SCEV *UpperBound = getUpperBound(Bound))
    2710        2660 :     if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
    2711         558 :       return false;
    2712             :   return true;
    2713             : }
    2714             : 
    2715             : 
    2716             : // Computes the upper and lower bounds for level K
    2717             : // using the * direction. Records them in Bound.
    2718             : // Wolfe gives the equations
    2719             : //
    2720             : //    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
    2721             : //    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
    2722             : //
    2723             : // Since we normalize loops, we can simplify these equations to
    2724             : //
    2725             : //    LB^*_k = (A^-_k - B^+_k)U_k
    2726             : //    UB^*_k = (A^+_k - B^-_k)U_k
    2727             : //
    2728             : // We must be careful to handle the case where the upper bound is unknown.
    2729             : // Note that the lower bound is always <= 0
    2730             : // and the upper bound is always >= 0.
    2731         559 : void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
    2732             :                                    BoundInfo *Bound, unsigned K) const {
    2733         559 :   Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
    2734         559 :   Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
    2735         559 :   if (Bound[K].Iterations) {
    2736         534 :     Bound[K].Lower[Dependence::DVEntry::ALL] =
    2737         534 :       SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
    2738             :                      Bound[K].Iterations);
    2739         534 :     Bound[K].Upper[Dependence::DVEntry::ALL] =
    2740         534 :       SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
    2741             :                      Bound[K].Iterations);
    2742             :   }
    2743             :   else {
    2744             :     // If the difference is 0, we won't need to know the number of iterations.
    2745          25 :     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
    2746          25 :       Bound[K].Lower[Dependence::DVEntry::ALL] =
    2747          25 :           SE->getZero(A[K].Coeff->getType());
    2748          25 :     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
    2749          25 :       Bound[K].Upper[Dependence::DVEntry::ALL] =
    2750          25 :           SE->getZero(A[K].Coeff->getType());
    2751             :   }
    2752         559 : }
    2753             : 
    2754             : 
    2755             : // Computes the upper and lower bounds for level K
    2756             : // using the = direction. Records them in Bound.
    2757             : // Wolfe gives the equations
    2758             : //
    2759             : //    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
    2760             : //    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
    2761             : //
    2762             : // Since we normalize loops, we can simplify these equations to
    2763             : //
    2764             : //    LB^=_k = (A_k - B_k)^- U_k
    2765             : //    UB^=_k = (A_k - B_k)^+ U_k
    2766             : //
    2767             : // We must be careful to handle the case where the upper bound is unknown.
    2768             : // Note that the lower bound is always <= 0
    2769             : // and the upper bound is always >= 0.
    2770         515 : void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
    2771             :                                   BoundInfo *Bound, unsigned K) const {
    2772         515 :   Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
    2773         515 :   Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
    2774         515 :   if (Bound[K].Iterations) {
    2775         515 :     const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
    2776         515 :     const SCEV *NegativePart = getNegativePart(Delta);
    2777         515 :     Bound[K].Lower[Dependence::DVEntry::EQ] =
    2778         515 :       SE->getMulExpr(NegativePart, Bound[K].Iterations);
    2779         515 :     const SCEV *PositivePart = getPositivePart(Delta);
    2780         515 :     Bound[K].Upper[Dependence::DVEntry::EQ] =
    2781         515 :       SE->getMulExpr(PositivePart, Bound[K].Iterations);
    2782             :   }
    2783             :   else {
    2784             :     // If the positive/negative part of the difference is 0,
    2785             :     // we won't need to know the number of iterations.
    2786           0 :     const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
    2787           0 :     const SCEV *NegativePart = getNegativePart(Delta);
    2788           0 :     if (NegativePart->isZero())
    2789           0 :       Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
    2790           0 :     const SCEV *PositivePart = getPositivePart(Delta);
    2791           0 :     if (PositivePart->isZero())
    2792           0 :       Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
    2793             :   }
    2794         515 : }
    2795             : 
    2796             : 
    2797             : // Computes the upper and lower bounds for level K
    2798             : // using the < direction. Records them in Bound.
    2799             : // Wolfe gives the equations
    2800             : //
    2801             : //    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
    2802             : //    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
    2803             : //
    2804             : // Since we normalize loops, we can simplify these equations to
    2805             : //
    2806             : //    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
    2807             : //    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
    2808             : //
    2809             : // We must be careful to handle the case where the upper bound is unknown.
    2810         515 : void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
    2811             :                                   BoundInfo *Bound, unsigned K) const {
    2812         515 :   Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
    2813         515 :   Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
    2814         515 :   if (Bound[K].Iterations) {
    2815        1030 :     const SCEV *Iter_1 = SE->getMinusSCEV(
    2816             :         Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
    2817             :     const SCEV *NegPart =
    2818         515 :       getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
    2819         515 :     Bound[K].Lower[Dependence::DVEntry::LT] =
    2820         515 :       SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
    2821             :     const SCEV *PosPart =
    2822         515 :       getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
    2823         515 :     Bound[K].Upper[Dependence::DVEntry::LT] =
    2824         515 :       SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
    2825             :   }
    2826             :   else {
    2827             :     // If the positive/negative part of the difference is 0,
    2828             :     // we won't need to know the number of iterations.
    2829             :     const SCEV *NegPart =
    2830           0 :       getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
    2831           0 :     if (NegPart->isZero())
    2832           0 :       Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
    2833             :     const SCEV *PosPart =
    2834           0 :       getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
    2835           0 :     if (PosPart->isZero())
    2836           0 :       Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
    2837             :   }
    2838         515 : }
    2839             : 
    2840             : 
    2841             : // Computes the upper and lower bounds for level K
    2842             : // using the > direction. Records them in Bound.
    2843             : // Wolfe gives the equations
    2844             : //
    2845             : //    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
    2846             : //    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
    2847             : //
    2848             : // Since we normalize loops, we can simplify these equations to
    2849             : //
    2850             : //    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
    2851             : //    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
    2852             : //
    2853             : // We must be careful to handle the case where the upper bound is unknown.
    2854         515 : void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
    2855             :                                   BoundInfo *Bound, unsigned K) const {
    2856         515 :   Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
    2857         515 :   Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
    2858         515 :   if (Bound[K].Iterations) {
    2859        1030 :     const SCEV *Iter_1 = SE->getMinusSCEV(
    2860             :         Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
    2861             :     const SCEV *NegPart =
    2862         515 :       getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
    2863         515 :     Bound[K].Lower[Dependence::DVEntry::GT] =
    2864         515 :       SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
    2865             :     const SCEV *PosPart =
    2866         515 :       getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
    2867         515 :     Bound[K].Upper[Dependence::DVEntry::GT] =
    2868         515 :       SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
    2869             :   }
    2870             :   else {
    2871             :     // If the positive/negative part of the difference is 0,
    2872             :     // we won't need to know the number of iterations.
    2873           0 :     const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
    2874           0 :     if (NegPart->isZero())
    2875           0 :       Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
    2876           0 :     const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
    2877           0 :     if (PosPart->isZero())
    2878           0 :       Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
    2879             :   }
    2880         515 : }
    2881             : 
    2882             : 
    2883             : // X^+ = max(X, 0)
    2884        2590 : const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
    2885        5180 :   return SE->getSMaxExpr(X, SE->getZero(X->getType()));
    2886             : }
    2887             : 
    2888             : 
    2889             : // X^- = min(X, 0)
    2890        2590 : const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
    2891        5180 :   return SE->getSMinExpr(X, SE->getZero(X->getType()));
    2892             : }
    2893             : 
    2894             : 
    2895             : // Walks through the subscript,
    2896             : // collecting each coefficient, the associated loop bounds,
    2897             : // and recording its positive and negative parts for later use.
    2898             : DependenceInfo::CoefficientInfo *
    2899         480 : DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
    2900             :                                  const SCEV *&Constant) const {
    2901         480 :   const SCEV *Zero = SE->getZero(Subscript->getType());
    2902         480 :   CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
    2903        1598 :   for (unsigned K = 1; K <= MaxLevels; ++K) {
    2904        1118 :     CI[K].Coeff = Zero;
    2905        1118 :     CI[K].PosPart = Zero;
    2906        1118 :     CI[K].NegPart = Zero;
    2907        1118 :     CI[K].Iterations = nullptr;
    2908             :   }
    2909             :   while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
    2910        1045 :     const Loop *L = AddRec->getLoop();
    2911        1045 :     unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
    2912        1045 :     CI[K].Coeff = AddRec->getStepRecurrence(*SE);
    2913        1045 :     CI[K].PosPart = getPositivePart(CI[K].Coeff);
    2914        1045 :     CI[K].NegPart = getNegativePart(CI[K].Coeff);
    2915        1045 :     CI[K].Iterations = collectUpperBound(L, Subscript->getType());
    2916        1045 :     Subscript = AddRec->getStart();
    2917        1045 :   }
    2918         480 :   Constant = Subscript;
    2919             : #ifndef NDEBUG
    2920             :   LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
    2921             :   for (unsigned K = 1; K <= MaxLevels; ++K) {
    2922             :     LLVM_DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
    2923             :     LLVM_DEBUG(dbgs() << "\tPos Part = ");
    2924             :     LLVM_DEBUG(dbgs() << *CI[K].PosPart);
    2925             :     LLVM_DEBUG(dbgs() << "\tNeg Part = ");
    2926             :     LLVM_DEBUG(dbgs() << *CI[K].NegPart);
    2927             :     LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
    2928             :     if (CI[K].Iterations)
    2929             :       LLVM_DEBUG(dbgs() << *CI[K].Iterations);
    2930             :     else
    2931             :       LLVM_DEBUG(dbgs() << "+inf");
    2932             :     LLVM_DEBUG(dbgs() << '\n');
    2933             :   }
    2934             :   LLVM_DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
    2935             : #endif
    2936         480 :   return CI;
    2937             : }
    2938             : 
    2939             : 
    2940             : // Looks through all the bounds info and
    2941             : // computes the lower bound given the current direction settings
    2942             : // at each level. If the lower bound for any level is -inf,
    2943             : // the result is -inf.
    2944        3222 : const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
    2945        3222 :   const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
    2946        8974 :   for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
    2947        5752 :     if (Bound[K].Lower[Bound[K].Direction])
    2948        5752 :       Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
    2949             :     else
    2950             :       Sum = nullptr;
    2951             :   }
    2952        3222 :   return Sum;
    2953             : }
    2954             : 
    2955             : 
    2956             : // Looks through all the bounds info and
    2957             : // computes the upper bound given the current direction settings
    2958             : // at each level. If the upper bound at any level is +inf,
    2959             : // the result is +inf.
    2960        2660 : const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
    2961        2660 :   const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
    2962        7626 :   for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
    2963        4966 :     if (Bound[K].Upper[Bound[K].Direction])
    2964        4966 :       Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
    2965             :     else
    2966             :       Sum = nullptr;
    2967             :   }
    2968        2660 :   return Sum;
    2969             : }
    2970             : 
    2971             : 
    2972             : //===----------------------------------------------------------------------===//
    2973             : // Constraint manipulation for Delta test.
    2974             : 
    2975             : // Given a linear SCEV,
    2976             : // return the coefficient (the step)
    2977             : // corresponding to the specified loop.
    2978             : // If there isn't one, return 0.
    2979             : // For example, given a*i + b*j + c*k, finding the coefficient
    2980             : // corresponding to the j loop would yield b.
    2981           0 : const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,
    2982             :                                             const Loop *TargetLoop) const {
    2983             :   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
    2984             :   if (!AddRec)
    2985           0 :     return SE->getZero(Expr->getType());
    2986           0 :   if (AddRec->getLoop() == TargetLoop)
    2987           0 :     return AddRec->getStepRecurrence(*SE);
    2988           0 :   return findCoefficient(AddRec->getStart(), TargetLoop);
    2989             : }
    2990             : 
    2991             : 
    2992             : // Given a linear SCEV,
    2993             : // return the SCEV given by zeroing out the coefficient
    2994             : // corresponding to the specified loop.
    2995             : // For example, given a*i + b*j + c*k, zeroing the coefficient
    2996             : // corresponding to the j loop would yield a*i + c*k.
    2997           0 : const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,
    2998             :                                             const Loop *TargetLoop) const {
    2999             :   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
    3000             :   if (!AddRec)
    3001             :     return Expr; // ignore
    3002           0 :   if (AddRec->getLoop() == TargetLoop)
    3003           0 :     return AddRec->getStart();
    3004           0 :   return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
    3005             :                            AddRec->getStepRecurrence(*SE),
    3006             :                            AddRec->getLoop(),
    3007           0 :                            AddRec->getNoWrapFlags());
    3008             : }
    3009             : 
    3010             : 
    3011             : // Given a linear SCEV Expr,
    3012             : // return the SCEV given by adding some Value to the
    3013             : // coefficient corresponding to the specified TargetLoop.
    3014             : // For example, given a*i + b*j + c*k, adding 1 to the coefficient
    3015             : // corresponding to the j loop would yield a*i + (b+1)*j + c*k.
    3016           0 : const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,
    3017             :                                              const Loop *TargetLoop,
    3018             :                                              const SCEV *Value) const {
    3019             :   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
    3020             :   if (!AddRec) // create a new addRec
    3021           0 :     return SE->getAddRecExpr(Expr,
    3022             :                              Value,
    3023             :                              TargetLoop,
    3024           0 :                              SCEV::FlagAnyWrap); // Worst case, with no info.
    3025           0 :   if (AddRec->getLoop() == TargetLoop) {
    3026           0 :     const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
    3027           0 :     if (Sum->isZero())
    3028           0 :       return AddRec->getStart();
    3029           0 :     return SE->getAddRecExpr(AddRec->getStart(),
    3030             :                              Sum,
    3031             :                              AddRec->getLoop(),
    3032           0 :                              AddRec->getNoWrapFlags());
    3033             :   }
    3034           0 :   if (SE->isLoopInvariant(AddRec, TargetLoop))
    3035           0 :     return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
    3036           0 :   return SE->getAddRecExpr(
    3037             :       addToCoefficient(AddRec->getStart(), TargetLoop, Value),
    3038             :       AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
    3039           0 :       AddRec->getNoWrapFlags());
    3040             : }
    3041             : 
    3042             : 
    3043             : // Review the constraints, looking for opportunities
    3044             : // to simplify a subscript pair (Src and Dst).
    3045             : // Return true if some simplification occurs.
    3046             : // If the simplification isn't exact (that is, if it is conservative
    3047             : // in terms of dependence), set consistent to false.
    3048             : // Corresponds to Figure 5 from the paper
    3049             : //
    3050             : //            Practical Dependence Testing
    3051             : //            Goff, Kennedy, Tseng
    3052             : //            PLDI 1991
    3053           0 : bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
    3054             :                                SmallBitVector &Loops,
    3055             :                                SmallVectorImpl<Constraint> &Constraints,
    3056             :                                bool &Consistent) {
    3057             :   bool Result = false;
    3058           0 :   for (unsigned LI : Loops.set_bits()) {
    3059             :     LLVM_DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
    3060             :     LLVM_DEBUG(Constraints[LI].dump(dbgs()));
    3061           0 :     if (Constraints[LI].isDistance())
    3062           0 :       Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
    3063           0 :     else if (Constraints[LI].isLine())
    3064           0 :       Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
    3065           0 :     else if (Constraints[LI].isPoint())
    3066           0 :       Result |= propagatePoint(Src, Dst, Constraints[LI]);
    3067             :   }
    3068           0 :   return Result;
    3069             : }
    3070             : 
    3071             : 
    3072             : // Attempt to propagate a distance
    3073             : // constraint into a subscript pair (Src and Dst).
    3074             : // Return true if some simplification occurs.
    3075             : // If the simplification isn't exact (that is, if it is conservative
    3076             : // in terms of dependence), set consistent to false.
    3077           0 : bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
    3078             :                                        Constraint &CurConstraint,
    3079             :                                        bool &Consistent) {
    3080           0 :   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
    3081             :   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
    3082           0 :   const SCEV *A_K = findCoefficient(Src, CurLoop);
    3083           0 :   if (A_K->isZero())
    3084             :     return false;
    3085           0 :   const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
    3086           0 :   Src = SE->getMinusSCEV(Src, DA_K);
    3087           0 :   Src = zeroCoefficient(Src, CurLoop);
    3088             :   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
    3089             :   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
    3090           0 :   Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
    3091             :   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
    3092           0 :   if (!findCoefficient(Dst, CurLoop)->isZero())
    3093           0 :     Consistent = false;
    3094             :   return true;
    3095             : }
    3096             : 
    3097             : 
    3098             : // Attempt to propagate a line
    3099             : // constraint into a subscript pair (Src and Dst).
    3100             : // Return true if some simplification occurs.
    3101             : // If the simplification isn't exact (that is, if it is conservative
    3102             : // in terms of dependence), set consistent to false.
    3103           0 : bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
    3104             :                                    Constraint &CurConstraint,
    3105             :                                    bool &Consistent) {
    3106           0 :   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
    3107           0 :   const SCEV *A = CurConstraint.getA();
    3108           0 :   const SCEV *B = CurConstraint.getB();
    3109           0 :   const SCEV *C = CurConstraint.getC();
    3110             :   LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C
    3111             :                     << "\n");
    3112             :   LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
    3113             :   LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
    3114           0 :   if (A->isZero()) {
    3115             :     const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
    3116             :     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
    3117           0 :     if (!Bconst || !Cconst) return false;
    3118             :     APInt Beta = Bconst->getAPInt();
    3119             :     APInt Charlie = Cconst->getAPInt();
    3120           0 :     APInt CdivB = Charlie.sdiv(Beta);
    3121             :     assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
    3122           0 :     const SCEV *AP_K = findCoefficient(Dst, CurLoop);
    3123             :     //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
    3124           0 :     Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
    3125           0 :     Dst = zeroCoefficient(Dst, CurLoop);
    3126           0 :     if (!findCoefficient(Src, CurLoop)->isZero())
    3127           0 :       Consistent = false;
    3128             :   }
    3129           0 :   else if (B->isZero()) {
    3130             :     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
    3131             :     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
    3132           0 :     if (!Aconst || !Cconst) return false;
    3133             :     APInt Alpha = Aconst->getAPInt();
    3134             :     APInt Charlie = Cconst->getAPInt();
    3135           0 :     APInt CdivA = Charlie.sdiv(Alpha);
    3136             :     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
    3137           0 :     const SCEV *A_K = findCoefficient(Src, CurLoop);
    3138           0 :     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
    3139           0 :     Src = zeroCoefficient(Src, CurLoop);
    3140           0 :     if (!findCoefficient(Dst, CurLoop)->isZero())
    3141           0 :       Consistent = false;
    3142             :   }
    3143           0 :   else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
    3144             :     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
    3145             :     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
    3146           0 :     if (!Aconst || !Cconst) return false;
    3147             :     APInt Alpha = Aconst->getAPInt();
    3148             :     APInt Charlie = Cconst->getAPInt();
    3149           0 :     APInt CdivA = Charlie.sdiv(Alpha);
    3150             :     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
    3151           0 :     const SCEV *A_K = findCoefficient(Src, CurLoop);
    3152           0 :     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
    3153           0 :     Src = zeroCoefficient(Src, CurLoop);
    3154           0 :     Dst = addToCoefficient(Dst, CurLoop, A_K);
    3155           0 :     if (!findCoefficient(Dst, CurLoop)->isZero())
    3156           0 :       Consistent = false;
    3157             :   }
    3158             :   else {
    3159             :     // paper is incorrect here, or perhaps just misleading
    3160           0 :     const SCEV *A_K = findCoefficient(Src, CurLoop);
    3161           0 :     Src = SE->getMulExpr(Src, A);
    3162           0 :     Dst = SE->getMulExpr(Dst, A);
    3163           0 :     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
    3164           0 :     Src = zeroCoefficient(Src, CurLoop);
    3165           0 :     Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
    3166           0 :     if (!findCoefficient(Dst, CurLoop)->isZero())
    3167           0 :       Consistent = false;
    3168             :   }
    3169             :   LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
    3170             :   LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
    3171             :   return true;
    3172             : }
    3173             : 
    3174             : 
    3175             : // Attempt to propagate a point
    3176             : // constraint into a subscript pair (Src and Dst).
    3177             : // Return true if some simplification occurs.
    3178           0 : bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
    3179             :                                     Constraint &CurConstraint) {
    3180           0 :   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
    3181           0 :   const SCEV *A_K = findCoefficient(Src, CurLoop);
    3182           0 :   const SCEV *AP_K = findCoefficient(Dst, CurLoop);
    3183           0 :   const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
    3184           0 :   const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
    3185             :   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
    3186           0 :   Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
    3187           0 :   Src = zeroCoefficient(Src, CurLoop);
    3188             :   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
    3189             :   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
    3190           0 :   Dst = zeroCoefficient(Dst, CurLoop);
    3191             :   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
    3192           0 :   return true;
    3193             : }
    3194             : 
    3195             : 
    3196             : // Update direction vector entry based on the current constraint.
    3197           1 : void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
    3198             :                                      const Constraint &CurConstraint) const {
    3199             :   LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
    3200             :   LLVM_DEBUG(CurConstraint.dump(dbgs()));
    3201           1 :   if (CurConstraint.isAny())
    3202             :     ; // use defaults
    3203           1 :   else if (CurConstraint.isDistance()) {
    3204             :     // this one is consistent, the others aren't
    3205           0 :     Level.Scalar = false;
    3206           0 :     Level.Distance = CurConstraint.getD();
    3207             :     unsigned NewDirection = Dependence::DVEntry::NONE;
    3208           0 :     if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
    3209             :       NewDirection = Dependence::DVEntry::EQ;
    3210           0 :     if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
    3211           0 :       NewDirection |= Dependence::DVEntry::LT;
    3212           0 :     if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
    3213           0 :       NewDirection |= Dependence::DVEntry::GT;
    3214           0 :     Level.Direction &= NewDirection;
    3215             :   }
    3216           1 :   else if (CurConstraint.isLine()) {
    3217           0 :     Level.Scalar = false;
    3218           0 :     Level.Distance = nullptr;
    3219             :     // direction should be accurate
    3220             :   }
    3221           1 :   else if (CurConstraint.isPoint()) {
    3222           1 :     Level.Scalar = false;
    3223           1 :     Level.Distance = nullptr;
    3224             :     unsigned NewDirection = Dependence::DVEntry::NONE;
    3225           1 :     if (!isKnownPredicate(CmpInst::ICMP_NE,
    3226             :                           CurConstraint.getY(),
    3227             :                           CurConstraint.getX()))
    3228             :       // if X may be = Y
    3229             :       NewDirection |= Dependence::DVEntry::EQ;
    3230           1 :     if (!isKnownPredicate(CmpInst::ICMP_SLE,
    3231             :                           CurConstraint.getY(),
    3232             :                           CurConstraint.getX()))
    3233             :       // if Y may be > X
    3234           0 :       NewDirection |= Dependence::DVEntry::LT;
    3235           1 :     if (!isKnownPredicate(CmpInst::ICMP_SGE,
    3236             :                           CurConstraint.getY(),
    3237             :                           CurConstraint.getX()))
    3238             :       // if Y may be < X
    3239           1 :       NewDirection |= Dependence::DVEntry::GT;
    3240           1 :     Level.Direction &= NewDirection;
    3241             :   }
    3242             :   else
    3243           0 :     llvm_unreachable("constraint has unexpected kind");
    3244           1 : }
    3245             : 
    3246             : /// Check if we can delinearize the subscripts. If the SCEVs representing the
    3247             : /// source and destination array references are recurrences on a nested loop,
    3248             : /// this function flattens the nested recurrences into separate recurrences
    3249             : /// for each loop level.
    3250         881 : bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
    3251             :                                     SmallVectorImpl<Subscript> &Pair) {
    3252             :   assert(isLoadOrStore(Src) && "instruction is not load or store");
    3253             :   assert(isLoadOrStore(Dst) && "instruction is not load or store");
    3254             :   Value *SrcPtr = getLoadStorePointerOperand(Src);
    3255             :   Value *DstPtr = getLoadStorePointerOperand(Dst);
    3256             : 
    3257         881 :   Loop *SrcLoop = LI->getLoopFor(Src->getParent());
    3258         881 :   Loop *DstLoop = LI->getLoopFor(Dst->getParent());
    3259             : 
    3260             :   // Below code mimics the code in Delinearization.cpp
    3261             :   const SCEV *SrcAccessFn =
    3262         881 :     SE->getSCEVAtScope(SrcPtr, SrcLoop);
    3263             :   const SCEV *DstAccessFn =
    3264         881 :     SE->getSCEVAtScope(DstPtr, DstLoop);
    3265             : 
    3266             :   const SCEVUnknown *SrcBase =
    3267         881 :       dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
    3268             :   const SCEVUnknown *DstBase =
    3269         881 :       dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
    3270             : 
    3271         881 :   if (!SrcBase || !DstBase || SrcBase != DstBase)
    3272             :     return false;
    3273             : 
    3274         881 :   const SCEV *ElementSize = SE->getElementSize(Src);
    3275         881 :   if (ElementSize != SE->getElementSize(Dst))
    3276             :     return false;
    3277             : 
    3278         881 :   const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
    3279         881 :   const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
    3280             : 
    3281             :   const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
    3282             :   const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
    3283         881 :   if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
    3284             :     return false;
    3285             : 
    3286             :   // First step: collect parametric terms in both array references.
    3287             :   SmallVector<const SCEV *, 4> Terms;
    3288         601 :   SE->collectParametricTerms(SrcAR, Terms);
    3289         601 :   SE->collectParametricTerms(DstAR, Terms);
    3290             : 
    3291             :   // Second step: find subscript sizes.
    3292             :   SmallVector<const SCEV *, 4> Sizes;
    3293         601 :   SE->findArrayDimensions(Terms, Sizes, ElementSize);
    3294             : 
    3295             :   // Third step: compute the access functions for each subscript.
    3296             :   SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
    3297         601 :   SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
    3298         601 :   SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
    3299             : 
    3300             :   // Fail when there is only a subscript: that's a linearized access function.
    3301        1202 :   if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
    3302             :       SrcSubscripts.size() != DstSubscripts.size())
    3303             :     return false;
    3304             : 
    3305          57 :   int size = SrcSubscripts.size();
    3306             : 
    3307             :   // Statically check that the array bounds are in-range. The first subscript we
    3308             :   // don't have a size for and it cannot overflow into another subscript, so is
    3309             :   // always safe. The others need to be 0 <= subscript[i] < bound, for both src
    3310             :   // and dst.
    3311             :   // FIXME: It may be better to record these sizes and add them as constraints
    3312             :   // to the dependency checks.
    3313         102 :   for (int i = 1; i < size; ++i) {
    3314         156 :     if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
    3315             :       return false;
    3316             : 
    3317         207 :     if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
    3318             :       return false;
    3319             : 
    3320          49 :     if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
    3321             :       return false;
    3322             : 
    3323          94 :     if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
    3324             :       return false;
    3325             :   }
    3326             : 
    3327             :   LLVM_DEBUG({
    3328             :     dbgs() << "\nSrcSubscripts: ";
    3329             :     for (int i = 0; i < size; i++)
    3330             :       dbgs() << *SrcSubscripts[i];
    3331             :     dbgs() << "\nDstSubscripts: ";
    3332             :     for (int i = 0; i < size; i++)
    3333             :       dbgs() << *DstSubscripts[i];
    3334             :   });
    3335             : 
    3336             :   // The delinearization transforms a single-subscript MIV dependence test into
    3337             :   // a multi-subscript SIV dependence test that is easier to compute. So we
    3338             :   // resize Pair to contain as many pairs of subscripts as the delinearization
    3339             :   // has found, and then initialize the pairs following the delinearization.
    3340          24 :   Pair.resize(size);
    3341          89 :   for (int i = 0; i < size; ++i) {
    3342         130 :     Pair[i].Src = SrcSubscripts[i];
    3343          65 :     Pair[i].Dst = DstSubscripts[i];
    3344         130 :     unifySubscriptType(&Pair[i]);
    3345             :   }
    3346             : 
    3347             :   return true;
    3348             : }
    3349             : 
    3350             : //===----------------------------------------------------------------------===//
    3351             : 
    3352             : #ifndef NDEBUG
    3353             : // For debugging purposes, dump a small bit vector to dbgs().
    3354             : static void dumpSmallBitVector(SmallBitVector &BV) {
    3355             :   dbgs() << "{";
    3356             :   for (unsigned VI : BV.set_bits()) {
    3357             :     dbgs() << VI;
    3358             :     if (BV.find_next(VI) >= 0)
    3359             :       dbgs() << ' ';
    3360             :   }
    3361             :   dbgs() << "}\n";
    3362             : }
    3363             : #endif
    3364             : 
    3365             : // depends -
    3366             : // Returns NULL if there is no dependence.
    3367             : // Otherwise, return a Dependence with as many details as possible.
    3368             : // Corresponds to Section 3.1 in the paper
    3369             : //
    3370             : //            Practical Dependence Testing
    3371             : //            Goff, Kennedy, Tseng
    3372             : //            PLDI 1991
    3373             : //
    3374             : // Care is required to keep the routine below, getSplitIteration(),
    3375             : // up to date with respect to this routine.
    3376             : std::unique_ptr<Dependence>
    3377        1798 : DependenceInfo::depends(Instruction *Src, Instruction *Dst,
    3378             :                         bool PossiblyLoopIndependent) {
    3379        1798 :   if (Src == Dst)
    3380             :     PossiblyLoopIndependent = false;
    3381             : 
    3382        3596 :   if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
    3383        2998 :       (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
    3384             :     // if both instructions don't reference memory, there's no dependence
    3385             :     return nullptr;
    3386             : 
    3387        1798 :   if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
    3388             :     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
    3389             :     LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
    3390             :     return make_unique<Dependence>(Src, Dst);
    3391             :   }
    3392             : 
    3393             :   assert(isLoadOrStore(Src) && "instruction is not load or store");
    3394             :   assert(isLoadOrStore(Dst) && "instruction is not load or store");
    3395             :   Value *SrcPtr = getLoadStorePointerOperand(Src);
    3396             :   Value *DstPtr = getLoadStorePointerOperand(Dst);
    3397             : 
    3398        1798 :   switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
    3399        1798 :                                  MemoryLocation::get(Dst),
    3400        1798 :                                  MemoryLocation::get(Src))) {
    3401         538 :   case MayAlias:
    3402             :   case PartialAlias:
    3403             :     // cannot analyse objects if we don't understand their aliasing.
    3404             :     LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
    3405             :     return make_unique<Dependence>(Src, Dst);
    3406             :   case NoAlias:
    3407             :     // If the objects noalias, they are distinct, accesses are independent.
    3408             :     LLVM_DEBUG(dbgs() << "no alias\n");
    3409             :     return nullptr;
    3410             :   case MustAlias:
    3411             :     break; // The underlying objects alias; test accesses for dependence.
    3412             :   }
    3413             : 
    3414             :   // establish loop nesting levels
    3415         930 :   establishNestingLevels(Src, Dst);
    3416             :   LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
    3417             :   LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
    3418             : 
    3419         930 :   FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
    3420             :   ++TotalArrayPairs;
    3421             : 
    3422             :   unsigned Pairs = 1;
    3423        1860 :   SmallVector<Subscript, 2> Pair(Pairs);
    3424         930 :   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
    3425         930 :   const SCEV *DstSCEV = SE->getSCEV(DstPtr);
    3426             :   LLVM_DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
    3427             :   LLVM_DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
    3428         930 :   Pair[0].Src = SrcSCEV;
    3429         930 :   Pair[0].Dst = DstSCEV;
    3430             : 
    3431         930 :   if (Delinearize) {
    3432         878 :     if (tryDelinearize(Src, Dst, Pair)) {
    3433             :       LLVM_DEBUG(dbgs() << "    delinearized\n");
    3434          24 :       Pairs = Pair.size();
    3435             :     }
    3436             :   }
    3437             : 
    3438        1901 :   for (unsigned P = 0; P < Pairs; ++P) {
    3439        1942 :     Pair[P].Loops.resize(MaxLevels + 1);
    3440         971 :     Pair[P].GroupLoops.resize(MaxLevels + 1);
    3441         971 :     Pair[P].Group.resize(Pairs);
    3442         971 :     removeMatchingExtensions(&Pair[P]);
    3443         971 :     Pair[P].Classification =
    3444        1942 :       classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
    3445         971 :                    Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
    3446         971 :                    Pair[P].Loops);
    3447         971 :     Pair[P].GroupLoops = Pair[P].Loops;
    3448         971 :     Pair[P].Group.set(P);
    3449             :     LLVM_DEBUG(dbgs() << "    subscript " << P << "\n");
    3450             :     LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
    3451             :     LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
    3452             :     LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
    3453             :     LLVM_DEBUG(dbgs() << "\tloops = ");
    3454             :     LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
    3455             :   }
    3456             : 
    3457        1860 :   SmallBitVector Separable(Pairs);
    3458        1860 :   SmallBitVector Coupled(Pairs);
    3459             : 
    3460             :   // Partition subscripts into separable and minimally-coupled groups
    3461             :   // Algorithm in paper is algorithmically better;
    3462             :   // this may be faster in practice. Check someday.
    3463             :   //
    3464             :   // Here's an example of how it works. Consider this code:
    3465             :   //
    3466             :   //   for (i = ...) {
    3467             :   //     for (j = ...) {
    3468             :   //       for (k = ...) {
    3469             :   //         for (l = ...) {
    3470             :   //           for (m = ...) {
    3471             :   //             A[i][j][k][m] = ...;
    3472             :   //             ... = A[0][j][l][i + j];
    3473             :   //           }
    3474             :   //         }
    3475             :   //       }
    3476             :   //     }
    3477             :   //   }
    3478             :   //
    3479             :   // There are 4 subscripts here:
    3480             :   //    0 [i] and [0]
    3481             :   //    1 [j] and [j]
    3482             :   //    2 [k] and [l]
    3483             :   //    3 [m] and [i + j]
    3484             :   //
    3485             :   // We've already classified each subscript pair as ZIV, SIV, etc.,
    3486             :   // and collected all the loops mentioned by pair P in Pair[P].Loops.
    3487             :   // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
    3488             :   // and set Pair[P].Group = {P}.
    3489             :   //
    3490             :   //      Src Dst    Classification Loops  GroupLoops Group
    3491             :   //    0 [i] [0]         SIV       {1}      {1}        {0}
    3492             :   //    1 [j] [j]         SIV       {2}      {2}        {1}
    3493             :   //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
    3494             :   //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
    3495             :   //
    3496             :   // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
    3497             :   // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
    3498             :   //
    3499             :   // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
    3500             :   // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
    3501             :   // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
    3502             :   // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
    3503             :   // to either Separable or Coupled).
    3504             :   //
    3505             :   // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
    3506             :   // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
    3507             :   // so Pair[3].Group = {0, 1, 3} and Done = false.
    3508             :   //
    3509             :   // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
    3510             :   // Since Done remains true, we add 2 to the set of Separable pairs.
    3511             :   //
    3512             :   // Finally, we consider 3. There's nothing to compare it with,
    3513             :   // so Done remains true and we add it to the Coupled set.
    3514             :   // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
    3515             :   //
    3516             :   // In the end, we've got 1 separable subscript and 1 coupled group.
    3517        1901 :   for (unsigned SI = 0; SI < Pairs; ++SI) {
    3518        1942 :     if (Pair[SI].Classification == Subscript::NonLinear) {
    3519             :       // ignore these, but collect loops for later
    3520             :       ++NonlinearSubscriptPairs;
    3521          80 :       collectCommonLoops(Pair[SI].Src,
    3522          80 :                          LI->getLoopFor(Src->getParent()),
    3523          80 :                          Pair[SI].Loops);
    3524          80 :       collectCommonLoops(Pair[SI].Dst,
    3525          80 :                          LI->getLoopFor(Dst->getParent()),
    3526          80 :                          Pair[SI].Loops);
    3527          80 :       Result.Consistent = false;
    3528         891 :     } else if (Pair[SI].Classification == Subscript::ZIV) {
    3529             :       // always separable
    3530         187 :       Separable.set(SI);
    3531             :     }
    3532             :     else {
    3533             :       // SIV, RDIV, or MIV, so check for coupled group
    3534             :       bool Done = true;
    3535         762 :       for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
    3536         116 :         SmallBitVector Intersection = Pair[SI].GroupLoops;
    3537         116 :         Intersection &= Pair[SJ].GroupLoops;
    3538          58 :         if (Intersection.any()) {
    3539             :           // accumulate set of all the loops in group
    3540           2 :           Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
    3541             :           // accumulate set of all subscripts in group
    3542           2 :           Pair[SJ].Group |= Pair[SI].Group;
    3543             :           Done = false;
    3544             :         }
    3545             :       }
    3546         704 :       if (Done) {
    3547         703 :         if (Pair[SI].Group.count() == 1) {
    3548         702 :           Separable.set(SI);
    3549             :           ++SeparableSubscriptPairs;
    3550             :         }
    3551             :         else {
    3552           1 :           Coupled.set(SI);
    3553             :           ++CoupledSubscriptPairs;
    3554             :         }
    3555             :       }
    3556             :     }
    3557             :   }
    3558             : 
    3559             :   LLVM_DEBUG(dbgs() << "    Separable = ");
    3560             :   LLVM_DEBUG(dumpSmallBitVector(Separable));
    3561             :   LLVM_DEBUG(dbgs() << "    Coupled = ");
    3562             :   LLVM_DEBUG(dumpSmallBitVector(Coupled));
    3563             : 
    3564             :   Constraint NewConstraint;
    3565         930 :   NewConstraint.setAny(SE);
    3566             : 
    3567             :   // test separable subscripts
    3568        1761 :   for (unsigned SI : Separable.set_bits()) {
    3569             :     LLVM_DEBUG(dbgs() << "testing subscript " << SI);
    3570        1778 :     switch (Pair[SI].Classification) {
    3571         187 :     case Subscript::ZIV:
    3572             :       LLVM_DEBUG(dbgs() << ", ZIV\n");
    3573         187 :       if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
    3574             :         return nullptr;
    3575             :       break;
    3576         436 :     case Subscript::SIV: {
    3577             :       LLVM_DEBUG(dbgs() << ", SIV\n");
    3578             :       unsigned Level;
    3579         436 :       const SCEV *SplitIter = nullptr;
    3580         436 :       if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
    3581             :                   SplitIter))
    3582          30 :         return nullptr;
    3583         406 :       break;
    3584             :     }
    3585          22 :     case Subscript::RDIV:
    3586             :       LLVM_DEBUG(dbgs() << ", RDIV\n");
    3587          22 :       if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
    3588             :         return nullptr;
    3589             :       break;
    3590         244 :     case Subscript::MIV:
    3591             :       LLVM_DEBUG(dbgs() << ", MIV\n");
    3592         244 :       if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
    3593             :         return nullptr;
    3594             :       break;
    3595           0 :     default:
    3596           0 :       llvm_unreachable("subscript has unexpected classification");
    3597             :     }
    3598             :   }
    3599             : 
    3600         872 :   if (Coupled.count()) {
    3601             :     // test coupled subscript groups
    3602             :     LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
    3603             :     LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
    3604           1 :     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
    3605           4 :     for (unsigned II = 0; II <= MaxLevels; ++II)
    3606           6 :       Constraints[II].setAny(SE);
    3607           2 :     for (unsigned SI : Coupled.set_bits()) {
    3608             :       LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
    3609           3 :       SmallBitVector Group(Pair[SI].Group);
    3610           2 :       SmallBitVector Sivs(Pairs);
    3611           2 :       SmallBitVector Mivs(Pairs);
    3612           2 :       SmallBitVector ConstrainedLevels(MaxLevels + 1);
    3613             :       SmallVector<Subscript *, 4> PairsInGroup;
    3614           3 :       for (unsigned SJ : Group.set_bits()) {
    3615             :         LLVM_DEBUG(dbgs() << SJ << " ");
    3616           4 :         if (Pair[SJ].Classification == Subscript::SIV)
    3617           2 :           Sivs.set(SJ);
    3618             :         else
    3619           0 :           Mivs.set(SJ);
    3620           2 :         PairsInGroup.push_back(&Pair[SJ]);
    3621             :       }
    3622           1 :       unifySubscriptType(PairsInGroup);
    3623             :       LLVM_DEBUG(dbgs() << "}\n");
    3624           2 :       while (Sivs.any()) {
    3625             :         bool Changed = false;
    3626           3 :         for (unsigned SJ : Sivs.set_bits()) {
    3627             :           LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
    3628             :           // SJ is an SIV subscript that's part of the current coupled group
    3629             :           unsigned Level;
    3630           2 :           const SCEV *SplitIter = nullptr;
    3631             :           LLVM_DEBUG(dbgs() << "SIV\n");
    3632           4 :           if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
    3633             :                       SplitIter))
    3634           0 :             return nullptr;
    3635           2 :           ConstrainedLevels.set(Level);
    3636           4 :           if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
    3637           4 :             if (Constraints[Level].isEmpty()) {
    3638             :               ++DeltaIndependence;
    3639             :               return nullptr;
    3640             :             }
    3641             :             Changed = true;
    3642             :           }
    3643           2 :           Sivs.reset(SJ);
    3644             :         }
    3645           1 :         if (Changed) {
    3646             :           // propagate, possibly creating new SIVs and ZIVs
    3647             :           LLVM_DEBUG(dbgs() << "    propagating\n");
    3648             :           LLVM_DEBUG(dbgs() << "\tMivs = ");
    3649             :           LLVM_DEBUG(dumpSmallBitVector(Mivs));
    3650           1 :           for (unsigned SJ : Mivs.set_bits()) {
    3651             :             // SJ is an MIV subscript that's part of the current coupled group
    3652             :             LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
    3653           0 :             if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
    3654             :                           Constraints, Result.Consistent)) {
    3655             :               LLVM_DEBUG(dbgs() << "\t    Changed\n");
    3656             :               ++DeltaPropagations;
    3657           0 :               Pair[SJ].Classification =
    3658           0 :                 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
    3659           0 :                              Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
    3660           0 :                              Pair[SJ].Loops);
    3661           0 :               switch (Pair[SJ].Classification) {
    3662           0 :               case Subscript::ZIV:
    3663             :                 LLVM_DEBUG(dbgs() << "ZIV\n");
    3664           0 :                 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
    3665             :                   return nullptr;
    3666           0 :                 Mivs.reset(SJ);
    3667           0 :                 break;
    3668           0 :               case Subscript::SIV:
    3669           0 :                 Sivs.set(SJ);
    3670           0 :                 Mivs.reset(SJ);
    3671           0 :                 break;
    3672             :               case Subscript::RDIV:
    3673             :               case Subscript::MIV:
    3674             :                 break;
    3675           0 :               default:
    3676           0 :                 llvm_unreachable("bad subscript classification");
    3677             :               }
    3678             :             }
    3679             :           }
    3680             :         }
    3681             :       }
    3682             : 
    3683             :       // test & propagate remaining RDIVs
    3684           1 :       for (unsigned SJ : Mivs.set_bits()) {
    3685           0 :         if (Pair[SJ].Classification == Subscript::RDIV) {
    3686             :           LLVM_DEBUG(dbgs() << "RDIV test\n");
    3687           0 :           if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
    3688             :             return nullptr;
    3689             :           // I don't yet understand how to propagate RDIV results
    3690           0 :           Mivs.reset(SJ);
    3691             :         }
    3692             :       }
    3693             : 
    3694             :       // test remaining MIVs
    3695             :       // This code is temporary.
    3696             :       // Better to somehow test all remaining subscripts simultaneously.
    3697           1 :       for (unsigned SJ : Mivs.set_bits()) {
    3698           0 :         if (Pair[SJ].Classification == Subscript::MIV) {
    3699             :           LLVM_DEBUG(dbgs() << "MIV test\n");
    3700           0 :           if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
    3701             :             return nullptr;
    3702             :         }
    3703             :         else
    3704           0 :           llvm_unreachable("expected only MIV subscripts at this point");
    3705             :       }
    3706             : 
    3707             :       // update Result.DV from constraint vector
    3708             :       LLVM_DEBUG(dbgs() << "    updating\n");
    3709           2 :       for (unsigned SJ : ConstrainedLevels.set_bits()) {
    3710           1 :         if (SJ > CommonLevels)
    3711             :           break;
    3712           3 :         updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
    3713           1 :         if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
    3714             :           return nullptr;
    3715             :       }
    3716             :     }
    3717             :   }
    3718             : 
    3719             :   // Make sure the Scalar flags are set correctly.
    3720        1744 :   SmallBitVector CompleteLoops(MaxLevels + 1);
    3721        1785 :   for (unsigned SI = 0; SI < Pairs; ++SI)
    3722        1826 :     CompleteLoops |= Pair[SI].Loops;
    3723        2187 :   for (unsigned II = 1; II <= CommonLevels; ++II)
    3724        1315 :     if (CompleteLoops[II])
    3725        2128 :       Result.DV[II - 1].Scalar = false;
    3726             : 
    3727         872 :   if (PossiblyLoopIndependent) {
    3728             :     // Make sure the LoopIndependent flag is set correctly.
    3729             :     // All directions must include equal, otherwise no
    3730             :     // loop-independent dependence is possible.
    3731         672 :     for (unsigned II = 1; II <= CommonLevels; ++II) {
    3732         462 :       if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
    3733          84 :         Result.LoopIndependent = false;
    3734          84 :         break;
    3735             :       }
    3736             :     }
    3737             :   }
    3738             :   else {
    3739             :     // On the other hand, if all directions are equal and there's no
    3740             :     // loop-independent dependence possible, then no dependence exists.
    3741             :     bool AllEqual = true;
    3742        1132 :     for (unsigned II = 1; II <= CommonLevels; ++II) {
    3743         690 :       if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
    3744             :         AllEqual = false;
    3745             :         break;
    3746             :       }
    3747             :     }
    3748         578 :     if (AllEqual)
    3749             :       return nullptr;
    3750             :   }
    3751             : 
    3752         430 :   return make_unique<FullDependence>(std::move(Result));
    3753             : }
    3754             : 
    3755             : 
    3756             : 
    3757             : //===----------------------------------------------------------------------===//
    3758             : // getSplitIteration -
    3759             : // Rather than spend rarely-used space recording the splitting iteration
    3760             : // during the Weak-Crossing SIV test, we re-compute it on demand.
    3761             : // The re-computation is basically a repeat of the entire dependence test,
    3762             : // though simplified since we know that the dependence exists.
    3763             : // It's tedious, since we must go through all propagations, etc.
    3764             : //
    3765             : // Care is required to keep this code up to date with respect to the routine
    3766             : // above, depends().
    3767             : //
    3768             : // Generally, the dependence analyzer will be used to build
    3769             : // a dependence graph for a function (basically a map from instructions
    3770             : // to dependences). Looking for cycles in the graph shows us loops
    3771             : // that cannot be trivially vectorized/parallelized.
    3772             : //
    3773             : // We can try to improve the situation by examining all the dependences
    3774             : // that make up the cycle, looking for ones we can break.
    3775             : // Sometimes, peeling the first or last iteration of a loop will break
    3776             : // dependences, and we've got flags for those possibilities.
    3777             : // Sometimes, splitting a loop at some other iteration will do the trick,
    3778             : // and we've got a flag for that case. Rather than waste the space to
    3779             : // record the exact iteration (since we rarely know), we provide
    3780             : // a method that calculates the iteration. It's a drag that it must work
    3781             : // from scratch, but wonderful in that it's possible.
    3782             : //
    3783             : // Here's an example:
    3784             : //
    3785             : //    for (i = 0; i < 10; i++)
    3786             : //        A[i] = ...
    3787             : //        ... = A[11 - i]
    3788             : //
    3789             : // There's a loop-carried flow dependence from the store to the load,
    3790             : // found by the weak-crossing SIV test. The dependence will have a flag,
    3791             : // indicating that the dependence can be broken by splitting the loop.
    3792             : // Calling getSplitIteration will return 5.
    3793             : // Splitting the loop breaks the dependence, like so:
    3794             : //
    3795             : //    for (i = 0; i <= 5; i++)
    3796             : //        A[i] = ...
    3797             : //        ... = A[11 - i]
    3798             : //    for (i = 6; i < 10; i++)
    3799             : //        A[i] = ...
    3800             : //        ... = A[11 - i]
    3801             : //
    3802             : // breaks the dependence and allows us to vectorize/parallelize
    3803             : // both loops.
    3804           3 : const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
    3805             :                                               unsigned SplitLevel) {
    3806             :   assert(Dep.isSplitable(SplitLevel) &&
    3807             :          "Dep should be splitable at SplitLevel");
    3808           3 :   Instruction *Src = Dep.getSrc();
    3809           3 :   Instruction *Dst = Dep.getDst();
    3810             :   assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
    3811             :   assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
    3812             :   assert(isLoadOrStore(Src));
    3813             :   assert(isLoadOrStore(Dst));
    3814             :   Value *SrcPtr = getLoadStorePointerOperand(Src);
    3815             :   Value *DstPtr = getLoadStorePointerOperand(Dst);
    3816             :   assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
    3817             :                                 MemoryLocation::get(Dst),
    3818             :                                 MemoryLocation::get(Src)) == MustAlias);
    3819             : 
    3820             :   // establish loop nesting levels
    3821           3 :   establishNestingLevels(Src, Dst);
    3822             : 
    3823           3 :   FullDependence Result(Src, Dst, false, CommonLevels);
    3824             : 
    3825             :   unsigned Pairs = 1;
    3826           6 :   SmallVector<Subscript, 2> Pair(Pairs);
    3827           3 :   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
    3828           3 :   const SCEV *DstSCEV = SE->getSCEV(DstPtr);
    3829           3 :   Pair[0].Src = SrcSCEV;
    3830           3 :   Pair[0].Dst = DstSCEV;
    3831             : 
    3832           3 :   if (Delinearize) {
    3833           3 :     if (tryDelinearize(Src, Dst, Pair)) {
    3834             :       LLVM_DEBUG(dbgs() << "    delinearized\n");
    3835           0 :       Pairs = Pair.size();
    3836             :     }
    3837             :   }
    3838             : 
    3839           6 :   for (unsigned P = 0; P < Pairs; ++P) {
    3840           6 :     Pair[P].Loops.resize(MaxLevels + 1);
    3841           3 :     Pair[P].GroupLoops.resize(MaxLevels + 1);
    3842           3 :     Pair[P].Group.resize(Pairs);
    3843           3 :     removeMatchingExtensions(&Pair[P]);
    3844           3 :     Pair[P].Classification =
    3845           6 :       classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
    3846           3 :                    Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
    3847           3 :                    Pair[P].Loops);
    3848           3 :     Pair[P].GroupLoops = Pair[P].Loops;
    3849           3 :     Pair[P].Group.set(P);
    3850             :   }
    3851             : 
    3852           6 :   SmallBitVector Separable(Pairs);
    3853           6 :   SmallBitVector Coupled(Pairs);
    3854             : 
    3855             :   // partition subscripts into separable and minimally-coupled groups
    3856           6 :   for (unsigned SI = 0; SI < Pairs; ++SI) {
    3857           6 :     if (Pair[SI].Classification == Subscript::NonLinear) {
    3858             :       // ignore these, but collect loops for later
    3859           0 :       collectCommonLoops(Pair[SI].Src,
    3860           0 :                          LI->getLoopFor(Src->getParent()),
    3861           0 :                          Pair[SI].Loops);
    3862           0 :       collectCommonLoops(Pair[SI].Dst,
    3863           0 :                          LI->getLoopFor(Dst->getParent()),
    3864           0 :                          Pair[SI].Loops);
    3865           0 :       Result.Consistent = false;
    3866             :     }
    3867           3 :     else if (Pair[SI].Classification == Subscript::ZIV)
    3868           0 :       Separable.set(SI);
    3869             :     else {
    3870             :       // SIV, RDIV, or MIV, so check for coupled group
    3871             :       bool Done = true;
    3872           3 :       for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
    3873           0 :         SmallBitVector Intersection = Pair[SI].GroupLoops;
    3874           0 :         Intersection &= Pair[SJ].GroupLoops;
    3875           0 :         if (Intersection.any()) {
    3876             :           // accumulate set of all the loops in group
    3877           0 :           Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
    3878             :           // accumulate set of all subscripts in group
    3879           0 :           Pair[SJ].Group |= Pair[SI].Group;
    3880             :           Done = false;
    3881             :         }
    3882             :       }
    3883           3 :       if (Done) {
    3884           3 :         if (Pair[SI].Group.count() == 1)
    3885           3 :           Separable.set(SI);
    3886             :         else
    3887           0 :           Coupled.set(SI);
    3888             :       }
    3889             :     }
    3890             :   }
    3891             : 
    3892             :   Constraint NewConstraint;
    3893           3 :   NewConstraint.setAny(SE);
    3894             : 
    3895             :   // test separable subscripts
    3896           3 :   for (unsigned SI : Separable.set_bits()) {
    3897           6 :     switch (Pair[SI].Classification) {
    3898           3 :     case Subscript::SIV: {
    3899             :       unsigned Level;
    3900           3 :       const SCEV *SplitIter = nullptr;
    3901           3 :       (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
    3902             :                      Result, NewConstraint, SplitIter);
    3903           3 :       if (Level == SplitLevel) {
    3904             :         assert(SplitIter != nullptr);
    3905           3 :         return SplitIter;
    3906             :       }
    3907           0 :       break;
    3908             :     }
    3909             :     case Subscript::ZIV:
    3910             :     case Subscript::RDIV:
    3911             :     case Subscript::MIV:
    3912             :       break;
    3913           0 :     default:
    3914           0 :       llvm_unreachable("subscript has unexpected classification");
    3915             :     }
    3916             :   }
    3917             : 
    3918           0 :   if (Coupled.count()) {
    3919             :     // test coupled subscript groups
    3920           0 :     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
    3921           0 :     for (unsigned II = 0; II <= MaxLevels; ++II)
    3922           0 :       Constraints[II].setAny(SE);
    3923           0 :     for (unsigned SI : Coupled.set_bits()) {
    3924           0 :       SmallBitVector Group(Pair[SI].Group);
    3925           0 :       SmallBitVector Sivs(Pairs);
    3926           0 :       SmallBitVector Mivs(Pairs);
    3927           0 :       SmallBitVector ConstrainedLevels(MaxLevels + 1);
    3928           0 :       for (unsigned SJ : Group.set_bits()) {
    3929           0 :         if (Pair[SJ].Classification == Subscript::SIV)
    3930           0 :           Sivs.set(SJ);
    3931             :         else
    3932           0 :           Mivs.set(SJ);
    3933             :       }
    3934           0 :       while (Sivs.any()) {
    3935             :         bool Changed = false;
    3936           0 :         for (unsigned SJ : Sivs.set_bits()) {
    3937             :           // SJ is an SIV subscript that's part of the current coupled group
    3938             :           unsigned Level;
    3939           0 :           const SCEV *SplitIter = nullptr;
    3940           0 :           (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
    3941             :                          Result, NewConstraint, SplitIter);
    3942           0 :           if (Level == SplitLevel && SplitIter)
    3943           0 :             return SplitIter;
    3944           0 :           ConstrainedLevels.set(Level);
    3945           0 :           if (intersectConstraints(&Constraints[Level], &NewConstraint))
    3946             :             Changed = true;
    3947           0 :           Sivs.reset(SJ);
    3948             :         }
    3949           0 :         if (Changed) {
    3950             :           // propagate, possibly creating new SIVs and ZIVs
    3951           0 :           for (unsigned SJ : Mivs.set_bits()) {
    3952             :             // SJ is an MIV subscript that's part of the current coupled group
    3953           0 :             if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
    3954           0 :                           Pair[SJ].Loops, Constraints, Result.Consistent)) {
    3955           0 :               Pair[SJ].Classification =
    3956           0 :                 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
    3957           0 :                              Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
    3958           0 :                              Pair[SJ].Loops);
    3959           0 :               switch (Pair[SJ].Classification) {
    3960           0 :               case Subscript::ZIV:
    3961           0 :                 Mivs.reset(SJ);
    3962           0 :                 break;
    3963           0 :               case Subscript::SIV:
    3964           0 :                 Sivs.set(SJ);
    3965           0 :                 Mivs.reset(SJ);
    3966           0 :                 break;
    3967             :               case Subscript::RDIV:
    3968             :               case Subscript::MIV:
    3969             :                 break;
    3970           0 :               default:
    3971           0 :                 llvm_unreachable("bad subscript classification");
    3972             :               }
    3973             :             }
    3974             :           }
    3975             :         }
    3976             :       }
    3977             :     }
    3978             :   }
    3979           0 :   llvm_unreachable("somehow reached end of routine");
    3980             :   return nullptr;
    3981             : }

Generated by: LCOV version 1.13