LLVM  10.0.0svn
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
18 #include "llvm/IR/IRBuilder.h"
20 
21 namespace llvm {
22 
23 /// Describes the type of Parameters
24 enum class VFParamKind {
25  Vector, // No semantic information.
26  OMP_Linear, // declare simd linear(i)
27  OMP_LinearRef, // declare simd linear(ref(i))
28  OMP_LinearVal, // declare simd linear(val(i))
29  OMP_LinearUVal, // declare simd linear(uval(i))
30  OMP_LinearPos, // declare simd linear(i:c) uniform(c)
31  OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
32  OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
33  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
34  OMP_Uniform, // declare simd uniform(i)
35  GlobalPredicate, // Global logical predicate that acts on all lanes
36  // of the input and output mask concurrently. For
37  // example, it is implied by the `M` token in the
38  // Vector Function ABI mangled name.
39  Unknown
40 };
41 
42 /// Describes the type of Instruction Set Architecture
43 enum class VFISAKind {
44  AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
45  SVE, // AArch64 Scalable Vector Extension
46  SSE, // x86 SSE
47  AVX, // x86 AVX
48  AVX2, // x86 AVX2
49  AVX512, // x86 AVX512
50  Unknown // Unknown ISA
51 };
52 
53 /// Encapsulates information needed to describe a parameter.
54 ///
55 /// The description of the parameter is not linked directly to
56 /// OpenMP or any other vector function description. This structure
57 /// is extendible to handle other paradigms that describe vector
58 /// functions and their parameters.
59 struct VFParameter {
60  unsigned ParamPos; // Parameter Position in Scalar Function.
61  VFParamKind ParamKind; // Kind of Parameter.
62  int LinearStepOrPos = 0; // Step or Position of the Parameter.
63  Align Alignment = Align(); // Optional aligment in bytes, defaulted to 1.
64 
65  // Comparison operator.
66  bool operator==(const VFParameter &Other) const {
67  return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
68  std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
69  Other.Alignment);
70  }
71 };
72 
73 /// Contains the information about the kind of vectorization
74 /// available.
75 ///
76 /// This object in independent on the paradigm used to
77 /// represent vector functions. in particular, it is not attached to
78 /// any target-specific ABI.
79 struct VFShape {
80  unsigned VF; // Vectorization factor.
81  bool IsScalable; // True if the function is a scalable function.
82  VFISAKind ISA; // Instruction Set Architecture.
83  SmallVector<VFParameter, 8> Parameters; // List of parameter informations.
84  // Comparison operator.
85  bool operator==(const VFShape &Other) const {
86  return std::tie(VF, IsScalable, ISA, Parameters) ==
87  std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters);
88  }
89 };
90 
91 /// Holds the VFShape for a specific scalar to vector function mapping.
92 struct VFInfo {
93  VFShape Shape; // Classification of the vector function.
94  StringRef ScalarName; // Scalar Function Name.
95  StringRef VectorName; // Vector Function Name associated to this VFInfo.
96 
97  // Comparison operator.
98  bool operator==(const VFInfo &Other) const {
99  return std::tie(Shape, ScalarName, VectorName) ==
100  std::tie(Shape, Other.ScalarName, Other.VectorName);
101  }
102 };
103 
104 namespace VFABI {
105 /// Function to contruct a VFInfo out of a mangled names in the
106 /// following format:
107 ///
108 /// <VFABI_name>{(<redirection>)}
109 ///
110 /// where <VFABI_name> is the name of the vector function, mangled according
111 /// to the rules described in the Vector Function ABI of the target vector
112 /// extentsion (or <isa> from now on). The <VFABI_name> is in the following
113 /// format:
114 ///
115 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
116 ///
117 /// This methods support demangling rules for the following <isa>:
118 ///
119 /// * AArch64: https://developer.arm.com/docs/101129/latest
120 ///
121 /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
122 /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
123 ///
124 ///
125 ///
126 /// \param MangledName -> input string in the format
127 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
129 
130 /// Retrieve the `VFParamKind` from a string token.
132 } // end namespace VFABI
133 
134 template <typename T> class ArrayRef;
135 class DemandedBits;
136 class GetElementPtrInst;
137 template <typename InstTy> class InterleaveGroup;
138 class Loop;
139 class ScalarEvolution;
140 class TargetLibraryInfo;
141 class TargetTransformInfo;
142 class Type;
143 class Value;
144 
145 namespace Intrinsic {
146 enum ID : unsigned;
147 }
148 
149 /// Identify if the intrinsic is trivially vectorizable.
150 /// This method returns true if the intrinsic's argument types are all scalars
151 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
152 /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
154 
155 /// Identifies if the vector form of the intrinsic has a scalar operand.
156 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
157 
158 /// Returns intrinsic ID for call.
159 /// For the input call instruction it finds mapping intrinsic and returns
160 /// its intrinsic ID, in case it does not found it return not_intrinsic.
162  const TargetLibraryInfo *TLI);
163 
164 /// Find the operand of the GEP that should be checked for consecutive
165 /// stores. This ignores trailing indices that have no effect on the final
166 /// pointer.
167 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
168 
169 /// If the argument is a GEP, then returns the operand identified by
170 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
171 /// operand, it returns that instead.
173 
174 /// If a value has only one user that is a CastInst, return it.
175 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
176 
177 /// Get the stride of a pointer access in a loop. Looks for symbolic
178 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
180 
181 /// Given a vector and an element number, see if the scalar value is
182 /// already around as a register, for example if it were inserted then extracted
183 /// from the vector.
184 Value *findScalarElement(Value *V, unsigned EltNo);
185 
186 /// Get splat value if the input is a splat vector or return nullptr.
187 /// The value may be extracted from a splat constants vector or from
188 /// a sequence of instructions that broadcast a single value into a vector.
189 const Value *getSplatValue(const Value *V);
190 
191 /// Return true if the input value is known to be a vector with all identical
192 /// elements (potentially including undefined elements).
193 /// This may be more powerful than the related getSplatValue() because it is
194 /// not limited by finding a scalar source value to a splatted vector.
195 bool isSplatValue(const Value *V, unsigned Depth = 0);
196 
197 /// Compute a map of integer instructions to their minimum legal type
198 /// size.
199 ///
200 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
201 /// type (e.g. i32) whenever arithmetic is performed on them.
202 ///
203 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
204 /// the arithmetic type down again. However InstCombine refuses to create
205 /// illegal types, so for targets without i8 or i16 registers, the lengthening
206 /// and shrinking remains.
207 ///
208 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
209 /// their scalar equivalents do not, so during vectorization it is important to
210 /// remove these lengthens and truncates when deciding the profitability of
211 /// vectorization.
212 ///
213 /// This function analyzes the given range of instructions and determines the
214 /// minimum type size each can be converted to. It attempts to remove or
215 /// minimize type size changes across each def-use chain, so for example in the
216 /// following code:
217 ///
218 /// %1 = load i8, i8*
219 /// %2 = add i8 %1, 2
220 /// %3 = load i16, i16*
221 /// %4 = zext i8 %2 to i32
222 /// %5 = zext i16 %3 to i32
223 /// %6 = add i32 %4, %5
224 /// %7 = trunc i32 %6 to i16
225 ///
226 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
227 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
228 ///
229 /// If the optional TargetTransformInfo is provided, this function tries harder
230 /// to do less work by only looking at illegal types.
233  DemandedBits &DB,
234  const TargetTransformInfo *TTI=nullptr);
235 
236 /// Compute the union of two access-group lists.
237 ///
238 /// If the list contains just one access group, it is returned directly. If the
239 /// list is empty, returns nullptr.
240 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
241 
242 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
243 /// are both in. If either instruction does not access memory at all, it is
244 /// considered to be in every list.
245 ///
246 /// If the list contains just one access group, it is returned directly. If the
247 /// list is empty, returns nullptr.
249  const Instruction *Inst2);
250 
251 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
252 /// MD_nontemporal, MD_access_group].
253 /// For K in Kinds, we get the MDNode for K from each of the
254 /// elements of VL, compute their "intersection" (i.e., the most generic
255 /// metadata value that covers all of the individual values), and set I's
256 /// metadata for M equal to the intersection value.
257 ///
258 /// This function always sets a (possibly null) value for each K in Kinds.
260 
261 /// Create a mask that filters the members of an interleave group where there
262 /// are gaps.
263 ///
264 /// For example, the mask for \p Group with interleave-factor 3
265 /// and \p VF 4, that has only its first member present is:
266 ///
267 /// <1,0,0,1,0,0,1,0,0,1,0,0>
268 ///
269 /// Note: The result is a mask of 0's and 1's, as opposed to the other
270 /// create[*]Mask() utilities which create a shuffle mask (mask that
271 /// consists of indices).
272 Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
273  const InterleaveGroup<Instruction> &Group);
274 
275 /// Create a mask with replicated elements.
276 ///
277 /// This function creates a shuffle mask for replicating each of the \p VF
278 /// elements in a vector \p ReplicationFactor times. It can be used to
279 /// transform a mask of \p VF elements into a mask of
280 /// \p VF * \p ReplicationFactor elements used by a predicated
281 /// interleaved-group of loads/stores whose Interleaved-factor ==
282 /// \p ReplicationFactor.
283 ///
284 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
285 ///
286 /// <0,0,0,1,1,1,2,2,2,3,3,3>
287 Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
288  unsigned VF);
289 
290 /// Create an interleave shuffle mask.
291 ///
292 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
293 /// vectorization factor \p VF into a single wide vector. The mask is of the
294 /// form:
295 ///
296 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
297 ///
298 /// For example, the mask for VF = 4 and NumVecs = 2 is:
299 ///
300 /// <0, 4, 1, 5, 2, 6, 3, 7>.
301 Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
302  unsigned NumVecs);
303 
304 /// Create a stride shuffle mask.
305 ///
306 /// This function creates a shuffle mask whose elements begin at \p Start and
307 /// are incremented by \p Stride. The mask can be used to deinterleave an
308 /// interleaved vector into separate vectors of vectorization factor \p VF. The
309 /// mask is of the form:
310 ///
311 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
312 ///
313 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
314 ///
315 /// <0, 2, 4, 6>
316 Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
317  unsigned Stride, unsigned VF);
318 
319 /// Create a sequential shuffle mask.
320 ///
321 /// This function creates shuffle mask whose elements are sequential and begin
322 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
323 /// NumUndefs undef values. The mask is of the form:
324 ///
325 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
326 ///
327 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
328 ///
329 /// <0, 1, 2, 3, undef, undef, undef, undef>
330 Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
331  unsigned NumInts, unsigned NumUndefs);
332 
333 /// Concatenate a list of vectors.
334 ///
335 /// This function generates code that concatenate the vectors in \p Vecs into a
336 /// single large vector. The number of vectors should be greater than one, and
337 /// their element types should be the same. The number of elements in the
338 /// vectors should also be the same; however, if the last vector has fewer
339 /// elements, it will be padded with undefs.
341 
342 /// Given a mask vector of the form <Y x i1>, Return true if all of the
343 /// elements of this predicate mask are false or undef. That is, return true
344 /// if all lanes can be assumed inactive.
346 
347 /// Given a mask vector of the form <Y x i1>, Return true if all of the
348 /// elements of this predicate mask are true or undef. That is, return true
349 /// if all lanes can be assumed active.
351 
352 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
353 /// for each lane which may be active.
355 
356 /// The group of interleaved loads/stores sharing the same stride and
357 /// close to each other.
358 ///
359 /// Each member in this group has an index starting from 0, and the largest
360 /// index should be less than interleaved factor, which is equal to the absolute
361 /// value of the access's stride.
362 ///
363 /// E.g. An interleaved load group of factor 4:
364 /// for (unsigned i = 0; i < 1024; i+=4) {
365 /// a = A[i]; // Member of index 0
366 /// b = A[i+1]; // Member of index 1
367 /// d = A[i+3]; // Member of index 3
368 /// ...
369 /// }
370 ///
371 /// An interleaved store group of factor 4:
372 /// for (unsigned i = 0; i < 1024; i+=4) {
373 /// ...
374 /// A[i] = a; // Member of index 0
375 /// A[i+1] = b; // Member of index 1
376 /// A[i+2] = c; // Member of index 2
377 /// A[i+3] = d; // Member of index 3
378 /// }
379 ///
380 /// Note: the interleaved load group could have gaps (missing members), but
381 /// the interleaved store group doesn't allow gaps.
382 template <typename InstTy> class InterleaveGroup {
383 public:
384  InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
385  : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
386 
387  InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
388  : Align(Align), InsertPos(Instr) {
389  assert(Align && "The alignment should be non-zero");
390 
391  Factor = std::abs(Stride);
392  assert(Factor > 1 && "Invalid interleave factor");
393 
394  Reverse = Stride < 0;
395  Members[0] = Instr;
396  }
397 
398  bool isReverse() const { return Reverse; }
399  uint32_t getFactor() const { return Factor; }
400  uint32_t getAlignment() const { return Align; }
401  uint32_t getNumMembers() const { return Members.size(); }
402 
403  /// Try to insert a new member \p Instr with index \p Index and
404  /// alignment \p NewAlign. The index is related to the leader and it could be
405  /// negative if it is the new leader.
406  ///
407  /// \returns false if the instruction doesn't belong to the group.
408  bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
409  assert(NewAlign && "The new member's alignment should be non-zero");
410 
411  // Make sure the key fits in an int32_t.
412  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
413  if (!MaybeKey)
414  return false;
415  int32_t Key = *MaybeKey;
416 
417  // Skip if there is already a member with the same index.
418  if (Members.find(Key) != Members.end())
419  return false;
420 
421  if (Key > LargestKey) {
422  // The largest index is always less than the interleave factor.
423  if (Index >= static_cast<int32_t>(Factor))
424  return false;
425 
426  LargestKey = Key;
427  } else if (Key < SmallestKey) {
428 
429  // Make sure the largest index fits in an int32_t.
430  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
431  if (!MaybeLargestIndex)
432  return false;
433 
434  // The largest index is always less than the interleave factor.
435  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
436  return false;
437 
438  SmallestKey = Key;
439  }
440 
441  // It's always safe to select the minimum alignment.
442  Align = std::min(Align, NewAlign);
443  Members[Key] = Instr;
444  return true;
445  }
446 
447  /// Get the member with the given index \p Index
448  ///
449  /// \returns nullptr if contains no such member.
450  InstTy *getMember(uint32_t Index) const {
451  int32_t Key = SmallestKey + Index;
452  auto Member = Members.find(Key);
453  if (Member == Members.end())
454  return nullptr;
455 
456  return Member->second;
457  }
458 
459  /// Get the index for the given member. Unlike the key in the member
460  /// map, the index starts from 0.
461  uint32_t getIndex(const InstTy *Instr) const {
462  for (auto I : Members) {
463  if (I.second == Instr)
464  return I.first - SmallestKey;
465  }
466 
467  llvm_unreachable("InterleaveGroup contains no such member");
468  }
469 
470  InstTy *getInsertPos() const { return InsertPos; }
471  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
472 
473  /// Add metadata (e.g. alias info) from the instructions in this group to \p
474  /// NewInst.
475  ///
476  /// FIXME: this function currently does not add noalias metadata a'la
477  /// addNewMedata. To do that we need to compute the intersection of the
478  /// noalias info from all members.
479  void addMetadata(InstTy *NewInst) const;
480 
481  /// Returns true if this Group requires a scalar iteration to handle gaps.
482  bool requiresScalarEpilogue() const {
483  // If the last member of the Group exists, then a scalar epilog is not
484  // needed for this group.
485  if (getMember(getFactor() - 1))
486  return false;
487 
488  // We have a group with gaps. It therefore cannot be a group of stores,
489  // and it can't be a reversed access, because such groups get invalidated.
490  assert(!getMember(0)->mayWriteToMemory() &&
491  "Group should have been invalidated");
492  assert(!isReverse() && "Group should have been invalidated");
493 
494  // This is a group of loads, with gaps, and without a last-member
495  return true;
496  }
497 
498 private:
499  uint32_t Factor; // Interleave Factor.
500  bool Reverse;
501  uint32_t Align;
503  int32_t SmallestKey = 0;
504  int32_t LargestKey = 0;
505 
506  // To avoid breaking dependences, vectorized instructions of an interleave
507  // group should be inserted at either the first load or the last store in
508  // program order.
509  //
510  // E.g. %even = load i32 // Insert Position
511  // %add = add i32 %even // Use of %even
512  // %odd = load i32
513  //
514  // store i32 %even
515  // %odd = add i32 // Def of %odd
516  // store i32 %odd // Insert Position
517  InstTy *InsertPos;
518 };
519 
520 /// Drive the analysis of interleaved memory accesses in the loop.
521 ///
522 /// Use this class to analyze interleaved accesses only when we can vectorize
523 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
524 /// on interleaved accesses is unsafe.
525 ///
526 /// The analysis collects interleave groups and records the relationships
527 /// between the member and the group in a map.
529 public:
531  DominatorTree *DT, LoopInfo *LI,
532  const LoopAccessInfo *LAI)
533  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
534 
535  ~InterleavedAccessInfo() { reset(); }
536 
537  /// Analyze the interleaved accesses and collect them in interleave
538  /// groups. Substitute symbolic strides using \p Strides.
539  /// Consider also predicated loads/stores in the analysis if
540  /// \p EnableMaskedInterleavedGroup is true.
541  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
542 
543  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
544  /// contrary to original assumption. Although we currently prevent group
545  /// formation for predicated accesses, we may be able to relax this limitation
546  /// in the future once we handle more complicated blocks.
547  void reset() {
549  // Avoid releasing a pointer twice.
550  for (auto &I : InterleaveGroupMap)
551  DelSet.insert(I.second);
552  for (auto *Ptr : DelSet)
553  delete Ptr;
554  InterleaveGroupMap.clear();
555  RequiresScalarEpilogue = false;
556  }
557 
558 
559  /// Check if \p Instr belongs to any interleave group.
560  bool isInterleaved(Instruction *Instr) const {
561  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
562  }
563 
564  /// Get the interleave group that \p Instr belongs to.
565  ///
566  /// \returns nullptr if doesn't have such group.
568  getInterleaveGroup(const Instruction *Instr) const {
569  if (InterleaveGroupMap.count(Instr))
570  return InterleaveGroupMap.find(Instr)->second;
571  return nullptr;
572  }
573 
576  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
577  }
578 
579  /// Returns true if an interleaved group that may access memory
580  /// out-of-bounds requires a scalar epilogue iteration for correctness.
581  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
582 
583  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
584  /// happen when optimizing for size forbids a scalar epilogue, and the gap
585  /// cannot be filtered by masking the load/store.
586  void invalidateGroupsRequiringScalarEpilogue();
587 
588 private:
589  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
590  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
591  /// The interleaved access analysis can also add new predicates (for example
592  /// by versioning strides of pointers).
594 
595  Loop *TheLoop;
596  DominatorTree *DT;
597  LoopInfo *LI;
598  const LoopAccessInfo *LAI;
599 
600  /// True if the loop may contain non-reversed interleaved groups with
601  /// out-of-bounds accesses. We ensure we don't speculatively access memory
602  /// out-of-bounds by executing at least one scalar epilogue iteration.
603  bool RequiresScalarEpilogue = false;
604 
605  /// Holds the relationships between the members and the interleave group.
607 
608  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
609 
610  /// Holds dependences among the memory accesses in the loop. It maps a source
611  /// access to a set of dependent sink accesses.
613 
614  /// The descriptor for a strided memory access.
615  struct StrideDescriptor {
616  StrideDescriptor() = default;
617  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
618  unsigned Align)
619  : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
620 
621  // The access's stride. It is negative for a reverse access.
622  int64_t Stride = 0;
623 
624  // The scalar expression of this access.
625  const SCEV *Scev = nullptr;
626 
627  // The size of the memory object.
628  uint64_t Size = 0;
629 
630  // The alignment of this access.
631  unsigned Align = 0;
632  };
633 
634  /// A type for holding instructions and their stride descriptors.
635  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
636 
637  /// Create a new interleave group with the given instruction \p Instr,
638  /// stride \p Stride and alignment \p Align.
639  ///
640  /// \returns the newly created interleave group.
642  createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
643  assert(!InterleaveGroupMap.count(Instr) &&
644  "Already in an interleaved access group");
645  InterleaveGroupMap[Instr] =
646  new InterleaveGroup<Instruction>(Instr, Stride, Align);
647  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
648  return InterleaveGroupMap[Instr];
649  }
650 
651  /// Release the group and remove all the relationships.
652  void releaseGroup(InterleaveGroup<Instruction> *Group) {
653  for (unsigned i = 0; i < Group->getFactor(); i++)
654  if (Instruction *Member = Group->getMember(i))
655  InterleaveGroupMap.erase(Member);
656 
657  InterleaveGroups.erase(Group);
658  delete Group;
659  }
660 
661  /// Collect all the accesses with a constant stride in program order.
662  void collectConstStrideAccesses(
664  const ValueToValueMap &Strides);
665 
666  /// Returns true if \p Stride is allowed in an interleaved group.
667  static bool isStrided(int Stride);
668 
669  /// Returns true if \p BB is a predicated block.
670  bool isPredicated(BasicBlock *BB) const {
671  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
672  }
673 
674  /// Returns true if LoopAccessInfo can be used for dependence queries.
675  bool areDependencesValid() const {
676  return LAI && LAI->getDepChecker().getDependences();
677  }
678 
679  /// Returns true if memory accesses \p A and \p B can be reordered, if
680  /// necessary, when constructing interleaved groups.
681  ///
682  /// \p A must precede \p B in program order. We return false if reordering is
683  /// not necessary or is prevented because \p A and \p B may be dependent.
684  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
685  StrideEntry *B) const {
686  // Code motion for interleaved accesses can potentially hoist strided loads
687  // and sink strided stores. The code below checks the legality of the
688  // following two conditions:
689  //
690  // 1. Potentially moving a strided load (B) before any store (A) that
691  // precedes B, or
692  //
693  // 2. Potentially moving a strided store (A) after any load or store (B)
694  // that A precedes.
695  //
696  // It's legal to reorder A and B if we know there isn't a dependence from A
697  // to B. Note that this determination is conservative since some
698  // dependences could potentially be reordered safely.
699 
700  // A is potentially the source of a dependence.
701  auto *Src = A->first;
702  auto SrcDes = A->second;
703 
704  // B is potentially the sink of a dependence.
705  auto *Sink = B->first;
706  auto SinkDes = B->second;
707 
708  // Code motion for interleaved accesses can't violate WAR dependences.
709  // Thus, reordering is legal if the source isn't a write.
710  if (!Src->mayWriteToMemory())
711  return true;
712 
713  // At least one of the accesses must be strided.
714  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
715  return true;
716 
717  // If dependence information is not available from LoopAccessInfo,
718  // conservatively assume the instructions can't be reordered.
719  if (!areDependencesValid())
720  return false;
721 
722  // If we know there is a dependence from source to sink, assume the
723  // instructions can't be reordered. Otherwise, reordering is legal.
724  return Dependences.find(Src) == Dependences.end() ||
725  !Dependences.lookup(Src).count(Sink);
726  }
727 
728  /// Collect the dependences from LoopAccessInfo.
729  ///
730  /// We process the dependences once during the interleaved access analysis to
731  /// enable constant-time dependence queries.
732  void collectDependences() {
733  if (!areDependencesValid())
734  return;
735  auto *Deps = LAI->getDepChecker().getDependences();
736  for (auto Dep : *Deps)
737  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
738  }
739 };
740 
741 } // llvm namespace
742 
743 #endif
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:575
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:471
VFISAKind ISA
Definition: VectorUtils.h:82
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:560
This class represents a function call, abstracting a target machine&#39;s calling convention.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:79
Metadata node.
Definition: Metadata.h:863
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
StringRef ScalarName
Definition: VectorUtils.h:94
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:92
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
void reset()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:547
VFParamKind ParamKind
Definition: VectorUtils.h:61
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:83
bool operator==(const VFInfo &Other) const
Definition: VectorUtils.h:98
bool isReverse() const
Definition: VectorUtils.h:398
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
Key
PAL metadata keys.
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:528
bool isSplatValue(const Value *V, unsigned Depth=0)
Return true if the input value is known to be a vector with all identical elements (potentially inclu...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:137
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:43
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:450
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:568
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:59
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:85
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:461
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName)
Function to contruct a VFInfo out of a mangled names in the following format:
unsigned VF
Definition: VectorUtils.h:80
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
Definition: VectorUtils.h:384
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:581
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:530
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:408
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:92
uint32_t getAlignment() const
Definition: VectorUtils.h:400
A range adaptor for a pair of iterators.
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:24
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
VFShape Shape
Definition: VectorUtils.h:93
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1228
uint32_t getFactor() const
Definition: VectorUtils.h:399
uint32_t Size
Definition: Profile.cpp:46
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
Definition: VectorUtils.h:387
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InstTy * getInsertPos() const
Definition: VectorUtils.h:470
LLVM Value Representation.
Definition: Value.h:73
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned ParamPos
Definition: VectorUtils.h:60
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
StringRef VectorName
Definition: VectorUtils.h:95
uint32_t getNumMembers() const
Definition: VectorUtils.h:401
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:482
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:66
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:43