LCOV - llvm-toolchain.info - include/llvm/Analysis/VectorUtils.h

LCOV - code coverage report

Current view:	top level - include/llvm/Analysis - VectorUtils.h (source / functions)		Hit	Total	Coverage
Test:	llvm-toolchain.info	Lines:	72	81	88.9 %
Date:	2018-10-20 13:21:21	Functions:	11	19	57.9 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines some vectorizer utilities.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #ifndef LLVM_ANALYSIS_VECTORUTILS_H
      15             : #define LLVM_ANALYSIS_VECTORUTILS_H
      16             : 
      17             : #include "llvm/ADT/MapVector.h"
      18             : #include "llvm/Analysis/LoopAccessAnalysis.h"
      19             : #include "llvm/Analysis/TargetLibraryInfo.h"
      20             : #include "llvm/IR/IRBuilder.h"
      21             : 
      22             : namespace llvm {
      23             : 
      24             : template <typename T> class ArrayRef;
      25             : class DemandedBits;
      26             : class GetElementPtrInst;
      27             : class Loop;
      28             : class ScalarEvolution;
      29             : class TargetTransformInfo;
      30             : class Type;
      31             : class Value;
      32             : 
      33             : namespace Intrinsic {
      34             : enum ID : unsigned;
      35             : }
      36             : 
      37             : /// Identify if the intrinsic is trivially vectorizable.
      38             : /// This method returns true if the intrinsic's argument types are all
      39             : /// scalars for the scalar form of the intrinsic and all vectors for
      40             : /// the vector form of the intrinsic.
      41             : bool isTriviallyVectorizable(Intrinsic::ID ID);
      42             : 
      43             : /// Identifies if the intrinsic has a scalar operand. It checks for
      44             : /// ctlz,cttz and powi special intrinsics whose argument is scalar.
      45             : bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
      46             : 
      47             : /// Returns intrinsic ID for call.
      48             : /// For the input call instruction it finds mapping intrinsic and returns
      49             : /// its intrinsic ID, in case it does not found it return not_intrinsic.
      50             : Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
      51             :                                           const TargetLibraryInfo *TLI);
      52             : 
      53             : /// Find the operand of the GEP that should be checked for consecutive
      54             : /// stores. This ignores trailing indices that have no effect on the final
      55             : /// pointer.
      56             : unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
      57             : 
      58             : /// If the argument is a GEP, then returns the operand identified by
      59             : /// getGEPInductionOperand. However, if there is some other non-loop-invariant
      60             : /// operand, it returns that instead.
      61             : Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
      62             : 
      63             : /// If a value has only one user that is a CastInst, return it.
      64             : Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
      65             : 
      66             : /// Get the stride of a pointer access in a loop. Looks for symbolic
      67             : /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
      68             : Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
      69             : 
      70             : /// Given a vector and an element number, see if the scalar value is
      71             : /// already around as a register, for example if it were inserted then extracted
      72             : /// from the vector.
      73             : Value *findScalarElement(Value *V, unsigned EltNo);
      74             : 
      75             : /// Get splat value if the input is a splat vector or return nullptr.
      76             : /// The value may be extracted from a splat constants vector or from
      77             : /// a sequence of instructions that broadcast a single value into a vector.
      78             : const Value *getSplatValue(const Value *V);
      79             : 
      80             : /// Compute a map of integer instructions to their minimum legal type
      81             : /// size.
      82             : ///
      83             : /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
      84             : /// type (e.g. i32) whenever arithmetic is performed on them.
      85             : ///
      86             : /// For targets with native i8 or i16 operations, usually InstCombine can shrink
      87             : /// the arithmetic type down again. However InstCombine refuses to create
      88             : /// illegal types, so for targets without i8 or i16 registers, the lengthening
      89             : /// and shrinking remains.
      90             : ///
      91             : /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
      92             : /// their scalar equivalents do not, so during vectorization it is important to
      93             : /// remove these lengthens and truncates when deciding the profitability of
      94             : /// vectorization.
      95             : ///
      96             : /// This function analyzes the given range of instructions and determines the
      97             : /// minimum type size each can be converted to. It attempts to remove or
      98             : /// minimize type size changes across each def-use chain, so for example in the
      99             : /// following code:
     100             : ///
     101             : ///   %1 = load i8, i8*
     102             : ///   %2 = add i8 %1, 2
     103             : ///   %3 = load i16, i16*
     104             : ///   %4 = zext i8 %2 to i32
     105             : ///   %5 = zext i16 %3 to i32
     106             : ///   %6 = add i32 %4, %5
     107             : ///   %7 = trunc i32 %6 to i16
     108             : ///
     109             : /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
     110             : /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
     111             : ///
     112             : /// If the optional TargetTransformInfo is provided, this function tries harder
     113             : /// to do less work by only looking at illegal types.
     114             : MapVector<Instruction*, uint64_t>
     115             : computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
     116             :                          DemandedBits &DB,
     117             :                          const TargetTransformInfo *TTI=nullptr);
     118             : 
     119             : /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
     120             : /// MD_nontemporal].  For K in Kinds, we get the MDNode for K from each of the
     121             : /// elements of VL, compute their "intersection" (i.e., the most generic
     122             : /// metadata value that covers all of the individual values), and set I's
     123             : /// metadata for M equal to the intersection value.
     124             : ///
     125             : /// This function always sets a (possibly null) value for each K in Kinds.
     126             : Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
     127             : 
     128             : /// Create a mask with replicated elements.
     129             : ///
     130             : /// This function creates a shuffle mask for replicating each of the \p VF 
     131             : /// elements in a vector \p ReplicationFactor times. It can be used to
     132             : /// transform a mask of \p VF elements into a mask of
     133             : /// \p VF * \p ReplicationFactor elements used by a predicated
     134             : /// interleaved-group of loads/stores whose Interleaved-factor ==
     135             : /// \p ReplicationFactor.
     136             : ///
     137             : /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
     138             : ///
     139             : ///   <0,0,0,1,1,1,2,2,2,3,3,3>
     140             : Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
     141             :                                unsigned VF);
     142             : 
     143             : /// Create an interleave shuffle mask.
     144             : ///
     145             : /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
     146             : /// vectorization factor \p VF into a single wide vector. The mask is of the
     147             : /// form:
     148             : ///
     149             : ///   <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
     150             : ///
     151             : /// For example, the mask for VF = 4 and NumVecs = 2 is:
     152             : ///
     153             : ///   <0, 4, 1, 5, 2, 6, 3, 7>.
     154             : Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
     155             :                                unsigned NumVecs);
     156             : 
     157             : /// Create a stride shuffle mask.
     158             : ///
     159             : /// This function creates a shuffle mask whose elements begin at \p Start and
     160             : /// are incremented by \p Stride. The mask can be used to deinterleave an
     161             : /// interleaved vector into separate vectors of vectorization factor \p VF. The
     162             : /// mask is of the form:
     163             : ///
     164             : ///   <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
     165             : ///
     166             : /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
     167             : ///
     168             : ///   <0, 2, 4, 6>
     169             : Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
     170             :                            unsigned Stride, unsigned VF);
     171             : 
     172             : /// Create a sequential shuffle mask.
     173             : ///
     174             : /// This function creates shuffle mask whose elements are sequential and begin
     175             : /// at \p Start.  The mask contains \p NumInts integers and is padded with \p
     176             : /// NumUndefs undef values. The mask is of the form:
     177             : ///
     178             : ///   <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
     179             : ///
     180             : /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
     181             : ///
     182             : ///   <0, 1, 2, 3, undef, undef, undef, undef>
     183             : Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
     184             :                                unsigned NumInts, unsigned NumUndefs);
     185             : 
     186             : /// Concatenate a list of vectors.
     187             : ///
     188             : /// This function generates code that concatenate the vectors in \p Vecs into a
     189             : /// single large vector. The number of vectors should be greater than one, and
     190             : /// their element types should be the same. The number of elements in the
     191             : /// vectors should also be the same; however, if the last vector has fewer
     192             : /// elements, it will be padded with undefs.
     193             : Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
     194             : 
     195             : /// The group of interleaved loads/stores sharing the same stride and
     196             : /// close to each other.
     197             : ///
     198             : /// Each member in this group has an index starting from 0, and the largest
     199             : /// index should be less than interleaved factor, which is equal to the absolute
     200             : /// value of the access's stride.
     201             : ///
     202             : /// E.g. An interleaved load group of factor 4:
     203             : ///        for (unsigned i = 0; i < 1024; i+=4) {
     204             : ///          a = A[i];                           // Member of index 0
     205             : ///          b = A[i+1];                         // Member of index 1
     206             : ///          d = A[i+3];                         // Member of index 3
     207             : ///          ...
     208             : ///        }
     209             : ///
     210             : ///      An interleaved store group of factor 4:
     211             : ///        for (unsigned i = 0; i < 1024; i+=4) {
     212             : ///          ...
     213             : ///          A[i]   = a;                         // Member of index 0
     214             : ///          A[i+1] = b;                         // Member of index 1
     215             : ///          A[i+2] = c;                         // Member of index 2
     216             : ///          A[i+3] = d;                         // Member of index 3
     217             : ///        }
     218             : ///
     219             : /// Note: the interleaved load group could have gaps (missing members), but
     220             : /// the interleaved store group doesn't allow gaps.
     221          99 : class InterleaveGroup {
     222             : public:
     223          99 :   InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
     224          99 :       : Align(Align), InsertPos(Instr) {
     225             :     assert(Align && "The alignment should be non-zero");
     226             : 
     227          99 :     Factor = std::abs(Stride);
     228             :     assert(Factor > 1 && "Invalid interleave factor");
     229             : 
     230          99 :     Reverse = Stride < 0;
     231          99 :     Members[0] = Instr;
     232          99 :   }
     233             : 
     234           0 :   bool isReverse() const { return Reverse; }
     235           0 :   unsigned getFactor() const { return Factor; }
     236           0 :   unsigned getAlignment() const { return Align; }
     237             :   unsigned getNumMembers() const { return Members.size(); }
     238             : 
     239             :   /// Try to insert a new member \p Instr with index \p Index and
     240             :   /// alignment \p NewAlign. The index is related to the leader and it could be
     241             :   /// negative if it is the new leader.
     242             :   ///
     243             :   /// \returns false if the instruction doesn't belong to the group.
     244          68 :   bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
     245             :     assert(NewAlign && "The new member's alignment should be non-zero");
     246             : 
     247          68 :     int Key = Index + SmallestKey;
     248             : 
     249             :     // Skip if there is already a member with the same index.
     250          68 :     if (Members.find(Key) != Members.end())
     251             :       return false;
     252             : 
     253          62 :     if (Key > LargestKey) {
     254             :       // The largest index is always less than the interleave factor.
     255           2 :       if (Index >= static_cast<int>(Factor))
     256             :         return false;
     257             : 
     258           2 :       LargestKey = Key;
     259          60 :     } else if (Key < SmallestKey) {
     260             :       // The largest index is always less than the interleave factor.
     261          60 :       if (LargestKey - Key >= static_cast<int>(Factor))
     262             :         return false;
     263             : 
     264          59 :       SmallestKey = Key;
     265             :     }
     266             : 
     267             :     // It's always safe to select the minimum alignment.
     268          61 :     Align = std::min(Align, NewAlign);
     269          61 :     Members[Key] = Instr;
     270          61 :     return true;
     271             :   }
     272             : 
     273             :   /// Get the member with the given index \p Index
     274             :   ///
     275             :   /// \returns nullptr if contains no such member.
     276         729 :   Instruction *getMember(unsigned Index) const {
     277         729 :     int Key = SmallestKey + Index;
     278         729 :     auto Member = Members.find(Key);
     279         729 :     if (Member == Members.end())
     280             :       return nullptr;
     281             : 
     282         475 :     return Member->second;
     283             :   }
     284             : 
     285             :   /// Get the index for the given member. Unlike the key in the member
     286             :   /// map, the index starts from 0.
     287         130 :   unsigned getIndex(Instruction *Instr) const {
     288         160 :     for (auto I : Members)
     289         160 :       if (I.second == Instr)
     290         130 :         return I.first - SmallestKey;
     291             : 
     292           0 :     llvm_unreachable("InterleaveGroup contains no such member");
     293             :   }
     294             : 
     295           0 :   Instruction *getInsertPos() const { return InsertPos; }
     296          36 :   void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
     297             : 
     298             :   /// Add metadata (e.g. alias info) from the instructions in this group to \p
     299             :   /// NewInst.
     300             :   ///
     301             :   /// FIXME: this function currently does not add noalias metadata a'la
     302             :   /// addNewMedata.  To do that we need to compute the intersection of the
     303             :   /// noalias info from all members.
     304          93 :   void addMetadata(Instruction *NewInst) const {
     305             :     SmallVector<Value *, 4> VL;
     306          93 :     std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
     307          93 :                    [](std::pair<int, Instruction *> p) { return p.second; });
     308          93 :     propagateMetadata(NewInst, VL);
     309          93 :   }
     310             : 
     311             : private:
     312             :   unsigned Factor; // Interleave Factor.
     313             :   bool Reverse;
     314             :   unsigned Align;
     315             :   DenseMap<int, Instruction *> Members;
     316             :   int SmallestKey = 0;
     317             :   int LargestKey = 0;
     318             : 
     319             :   // To avoid breaking dependences, vectorized instructions of an interleave
     320             :   // group should be inserted at either the first load or the last store in
     321             :   // program order.
     322             :   //
     323             :   // E.g. %even = load i32             // Insert Position
     324             :   //      %add = add i32 %even         // Use of %even
     325             :   //      %odd = load i32
     326             :   //
     327             :   //      store i32 %even
     328             :   //      %odd = add i32               // Def of %odd
     329             :   //      store i32 %odd               // Insert Position
     330             :   Instruction *InsertPos;
     331             : };
     332             : 
     333             : /// Drive the analysis of interleaved memory accesses in the loop.
     334             : ///
     335             : /// Use this class to analyze interleaved accesses only when we can vectorize
     336             : /// a loop. Otherwise it's meaningless to do analysis as the vectorization
     337             : /// on interleaved accesses is unsafe.
     338             : ///
     339             : /// The analysis collects interleave groups and records the relationships
     340             : /// between the member and the group in a map.
     341             : class InterleavedAccessInfo {
     342             : public:
     343             :   InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
     344             :                         DominatorTree *DT, LoopInfo *LI,
     345             :                         const LoopAccessInfo *LAI)
     346         989 :       : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
     347             : 
     348         989 :   ~InterleavedAccessInfo() { reset(); }
     349             : 
     350             :   /// Analyze the interleaved accesses and collect them in interleave
     351             :   /// groups. Substitute symbolic strides using \p Strides.
     352             :   /// Consider also predicated loads/stores in the analysis if
     353             :   /// \p EnableMaskedInterleavedGroup is true.
     354             :   void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
     355             : 
     356             :   /// Invalidate groups, e.g., in case all blocks in loop will be predicated
     357             :   /// contrary to original assumption. Although we currently prevent group
     358             :   /// formation for predicated accesses, we may be able to relax this limitation
     359             :   /// in the future once we handle more complicated blocks.
     360         995 :   void reset() {
     361             :     SmallPtrSet<InterleaveGroup *, 4> DelSet;
     362             :     // Avoid releasing a pointer twice.
     363        1121 :     for (auto &I : InterleaveGroupMap)
     364         126 :       DelSet.insert(I.second);
     365        1063 :     for (auto *Ptr : DelSet)
     366         136 :       delete Ptr;
     367         995 :     InterleaveGroupMap.clear();
     368         995 :     RequiresScalarEpilogue = false;
     369         995 :   }
     370             : 
     371             : 
     372             :   /// Check if \p Instr belongs to any interleave group.
     373             :   bool isInterleaved(Instruction *Instr) const {
     374         801 :     return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
     375             :   }
     376             : 
     377             :   /// Get the interleave group that \p Instr belongs to.
     378             :   ///
     379             :   /// \returns nullptr if doesn't have such group.
     380             :   InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
     381       22236 :     auto Group = InterleaveGroupMap.find(Instr);
     382       22236 :     if (Group == InterleaveGroupMap.end())
     383             :       return nullptr;
     384         797 :     return Group->second;
     385             :   }
     386             : 
     387             :   /// Returns true if an interleaved group that may access memory
     388             :   /// out-of-bounds requires a scalar epilogue iteration for correctness.
     389           0 :   bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
     390             : 
     391             : private:
     392             :   /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
     393             :   /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
     394             :   /// The interleaved access analysis can also add new predicates (for example
     395             :   /// by versioning strides of pointers).
     396             :   PredicatedScalarEvolution &PSE;
     397             : 
     398             :   Loop *TheLoop;
     399             :   DominatorTree *DT;
     400             :   LoopInfo *LI;
     401             :   const LoopAccessInfo *LAI;
     402             : 
     403             :   /// True if the loop may contain non-reversed interleaved groups with
     404             :   /// out-of-bounds accesses. We ensure we don't speculatively access memory
     405             :   /// out-of-bounds by executing at least one scalar epilogue iteration.
     406             :   bool RequiresScalarEpilogue = false;
     407             : 
     408             :   /// Holds the relationships between the members and the interleave group.
     409             :   DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
     410             : 
     411             :   /// Holds dependences among the memory accesses in the loop. It maps a source
     412             :   /// access to a set of dependent sink accesses.
     413             :   DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
     414             : 
     415             :   /// The descriptor for a strided memory access.
     416             :   struct StrideDescriptor {
     417             :     StrideDescriptor() = default;
     418             :     StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
     419             :                      unsigned Align)
     420             :         : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
     421             : 
     422             :     // The access's stride. It is negative for a reverse access.
     423             :     int64_t Stride = 0;
     424             : 
     425             :     // The scalar expression of this access.
     426             :     const SCEV *Scev = nullptr;
     427             : 
     428             :     // The size of the memory object.
     429             :     uint64_t Size = 0;
     430             : 
     431             :     // The alignment of this access.
     432             :     unsigned Align = 0;
     433             :   };
     434             : 
     435             :   /// A type for holding instructions and their stride descriptors.
     436             :   using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
     437             : 
     438             :   /// Create a new interleave group with the given instruction \p Instr,
     439             :   /// stride \p Stride and alignment \p Align.
     440             :   ///
     441             :   /// \returns the newly created interleave group.
     442          99 :   InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
     443             :                                          unsigned Align) {
     444             :     assert(!isInterleaved(Instr) && "Already in an interleaved access group");
     445          99 :     InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
     446          99 :     return InterleaveGroupMap[Instr];
     447             :   }
     448             : 
     449             :   /// Release the group and remove all the relationships.
     450          31 :   void releaseGroup(InterleaveGroup *Group) {
     451         107 :     for (unsigned i = 0; i < Group->getFactor(); i++)
     452          76 :       if (Instruction *Member = Group->getMember(i))
     453          34 :         InterleaveGroupMap.erase(Member);
     454             : 
     455          31 :     delete Group;
     456          31 :   }
     457             : 
     458             :   /// Collect all the accesses with a constant stride in program order.
     459             :   void collectConstStrideAccesses(
     460             :       MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
     461             :       const ValueToValueMap &Strides);
     462             : 
     463             :   /// Returns true if \p Stride is allowed in an interleaved group.
     464             :   static bool isStrided(int Stride);
     465             : 
     466             :   /// Returns true if \p BB is a predicated block.
     467           0 :   bool isPredicated(BasicBlock *BB) const {
     468         337 :     return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
     469             :   }
     470             : 
     471             :   /// Returns true if LoopAccessInfo can be used for dependence queries.
     472           0 :   bool areDependencesValid() const {
     473           0 :     return LAI && LAI->getDepChecker().getDependences();
     474             :   }
     475             : 
     476             :   /// Returns true if memory accesses \p A and \p B can be reordered, if
     477             :   /// necessary, when constructing interleaved groups.
     478             :   ///
     479             :   /// \p A must precede \p B in program order. We return false if reordering is
     480             :   /// not necessary or is prevented because \p A and \p B may be dependent.
     481        1176 :   bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
     482             :                                                  StrideEntry *B) const {
     483             :     // Code motion for interleaved accesses can potentially hoist strided loads
     484             :     // and sink strided stores. The code below checks the legality of the
     485             :     // following two conditions:
     486             :     //
     487             :     // 1. Potentially moving a strided load (B) before any store (A) that
     488             :     //    precedes B, or
     489             :     //
     490             :     // 2. Potentially moving a strided store (A) after any load or store (B)
     491             :     //    that A precedes.
     492             :     //
     493             :     // It's legal to reorder A and B if we know there isn't a dependence from A
     494             :     // to B. Note that this determination is conservative since some
     495             :     // dependences could potentially be reordered safely.
     496             : 
     497             :     // A is potentially the source of a dependence.
     498        1176 :     auto *Src = A->first;
     499        1176 :     auto SrcDes = A->second;
     500             : 
     501             :     // B is potentially the sink of a dependence.
     502        1176 :     auto *Sink = B->first;
     503        1176 :     auto SinkDes = B->second;
     504             : 
     505             :     // Code motion for interleaved accesses can't violate WAR dependences.
     506             :     // Thus, reordering is legal if the source isn't a write.
     507        1176 :     if (!Src->mayWriteToMemory())
     508             :       return true;
     509             : 
     510             :     // At least one of the accesses must be strided.
     511         140 :     if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
     512             :       return true;
     513             : 
     514             :     // If dependence information is not available from LoopAccessInfo,
     515             :     // conservatively assume the instructions can't be reordered.
     516          74 :     if (!areDependencesValid())
     517             :       return false;
     518             : 
     519             :     // If we know there is a dependence from source to sink, assume the
     520             :     // instructions can't be reordered. Otherwise, reordering is legal.
     521          86 :     return Dependences.find(Src) == Dependences.end() ||
     522          91 :            !Dependences.lookup(Src).count(Sink);
     523             :   }
     524             : 
     525             :   /// Collect the dependences from LoopAccessInfo.
     526             :   ///
     527             :   /// We process the dependences once during the interleaved access analysis to
     528             :   /// enable constant-time dependence queries.
     529         457 :   void collectDependences() {
     530         457 :     if (!areDependencesValid())
     531             :       return;
     532             :     auto *Deps = LAI->getDepChecker().getDependences();
     533         483 :     for (auto Dep : *Deps)
     534          52 :       Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
     535             :   }
     536             : };
     537             : 
     538             : } // llvm namespace
     539             : 
     540             : #endif

Generated by: LCOV version 1.13