LLVM  11.0.0git
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallSet.h"
21 
22 namespace llvm {
23 
24 /// Describes the type of Parameters
25 enum class VFParamKind {
26  Vector, // No semantic information.
27  OMP_Linear, // declare simd linear(i)
28  OMP_LinearRef, // declare simd linear(ref(i))
29  OMP_LinearVal, // declare simd linear(val(i))
30  OMP_LinearUVal, // declare simd linear(uval(i))
31  OMP_LinearPos, // declare simd linear(i:c) uniform(c)
32  OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
33  OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
34  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
35  OMP_Uniform, // declare simd uniform(i)
36  GlobalPredicate, // Global logical predicate that acts on all lanes
37  // of the input and output mask concurrently. For
38  // example, it is implied by the `M` token in the
39  // Vector Function ABI mangled name.
40  Unknown
41 };
42 
43 /// Describes the type of Instruction Set Architecture
44 enum class VFISAKind {
45  AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
46  SVE, // AArch64 Scalable Vector Extension
47  SSE, // x86 SSE
48  AVX, // x86 AVX
49  AVX2, // x86 AVX2
50  AVX512, // x86 AVX512
51  LLVM, // LLVM internal ISA for functions that are not
52  // attached to an existing ABI via name mangling.
53  Unknown // Unknown ISA
54 };
55 
56 /// Encapsulates information needed to describe a parameter.
57 ///
58 /// The description of the parameter is not linked directly to
59 /// OpenMP or any other vector function description. This structure
60 /// is extendible to handle other paradigms that describe vector
61 /// functions and their parameters.
62 struct VFParameter {
63  unsigned ParamPos; // Parameter Position in Scalar Function.
64  VFParamKind ParamKind; // Kind of Parameter.
65  int LinearStepOrPos = 0; // Step or Position of the Parameter.
66  Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
67 
68  // Comparison operator.
69  bool operator==(const VFParameter &Other) const {
70  return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
71  std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
72  Other.Alignment);
73  }
74 };
75 
76 /// Contains the information about the kind of vectorization
77 /// available.
78 ///
79 /// This object in independent on the paradigm used to
80 /// represent vector functions. in particular, it is not attached to
81 /// any target-specific ABI.
82 struct VFShape {
83  unsigned VF; // Vectorization factor.
84  bool IsScalable; // True if the function is a scalable function.
85  SmallVector<VFParameter, 8> Parameters; // List of parameter information.
86  // Comparison operator.
87  bool operator==(const VFShape &Other) const {
88  return std::tie(VF, IsScalable, Parameters) ==
89  std::tie(Other.VF, Other.IsScalable, Other.Parameters);
90  }
91 
92  /// Update the parameter in position P.ParamPos to P.
94  assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
95  Parameters[P.ParamPos] = P;
96  assert(hasValidParameterList() && "Invalid parameter list");
97  }
98 
99  // Retrieve the VFShape that can be used to map a (scalar) function to itself,
100  // with VF = 1.
101  static VFShape getScalarShape(const CallInst &CI) {
102  return VFShape::get(CI, /*EC*/ {1, false}, /*HasGlobalPredicate*/ false);
103  }
104 
105  // Retrieve the basic vectorization shape of the function, where all
106  // parameters are mapped to VFParamKind::Vector with \p EC
107  // lanes. Specifies whether the function has a Global Predicate
108  // argument via \p HasGlobalPred.
109  static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
110  SmallVector<VFParameter, 8> Parameters;
111  for (unsigned I = 0; I < CI.arg_size(); ++I)
112  Parameters.push_back(VFParameter({I, VFParamKind::Vector}));
113  if (HasGlobalPred)
114  Parameters.push_back(
115  VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
116 
117  return {EC.Min, EC.Scalable, Parameters};
118  }
119  /// Sanity check on the Parameters in the VFShape.
120  bool hasValidParameterList() const;
121 };
122 
123 /// Holds the VFShape for a specific scalar to vector function mapping.
124 struct VFInfo {
125  VFShape Shape; /// Classification of the vector function.
126  std::string ScalarName; /// Scalar Function Name.
127  std::string VectorName; /// Vector Function Name associated to this VFInfo.
128  VFISAKind ISA; /// Instruction Set Architecture.
129 
130  // Comparison operator.
131  bool operator==(const VFInfo &Other) const {
132  return std::tie(Shape, ScalarName, VectorName, ISA) ==
133  std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA);
134  }
135 };
136 
137 namespace VFABI {
138 /// LLVM Internal VFABI ISA token for vector functions.
139 static constexpr char const *_LLVM_ = "_LLVM_";
140 /// Prefix for internal name redirection for vector function that
141 /// tells the compiler to scalarize the call using the scalar name
142 /// of the function. For example, a mangled name like
143 /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
144 /// vectorizer to vectorize the scalar call `foo`, and to scalarize
145 /// it once vectorization is done.
146 static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
147 
148 /// Function to construct a VFInfo out of a mangled names in the
149 /// following format:
150 ///
151 /// <VFABI_name>{(<redirection>)}
152 ///
153 /// where <VFABI_name> is the name of the vector function, mangled according
154 /// to the rules described in the Vector Function ABI of the target vector
155 /// extension (or <isa> from now on). The <VFABI_name> is in the following
156 /// format:
157 ///
158 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
159 ///
160 /// This methods support demangling rules for the following <isa>:
161 ///
162 /// * AArch64: https://developer.arm.com/docs/101129/latest
163 ///
164 /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
165 /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
166 ///
167 /// \param MangledName -> input string in the format
168 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
169 /// \param M -> Module used to retrieve informations about the vector
170 /// function that are not possible to retrieve from the mangled
171 /// name. At the moment, this parameter is needed only to retrieve the
172 /// Vectorization Factor of scalable vector functions from their
173 /// respective IR declarations.
175 
176 /// This routine mangles the given VectorName according to the LangRef
177 /// specification for vector-function-abi-variant attribute and is specific to
178 /// the TLI mappings. It is the responsibility of the caller to make sure that
179 /// this is only used if all parameters in the vector function are vector type.
180 /// This returned string holds scalar-to-vector mapping:
181 /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
182 ///
183 /// where:
184 ///
185 /// <isa> = "_LLVM_"
186 /// <mask> = "N". Note: TLI does not support masked interfaces.
187 /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
188 /// field of the `VecDesc` struct.
189 /// <vparams> = "v", as many as are the numArgs.
190 /// <scalarname> = the name of the scalar function.
191 /// <vectorname> = the name of the vector function.
192 std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
193  unsigned numArgs, unsigned VF);
194 
195 /// Retrieve the `VFParamKind` from a string token.
197 
198 // Name of the attribute where the variant mappings are stored.
199 static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
200 
201 /// Populates a set of strings representing the Vector Function ABI variants
202 /// associated to the CallInst CI. If the CI does not contain the
203 /// vector-function-abi-variant attribute, we return without populating
204 /// VariantMappings, i.e. callers of getVectorVariantNames need not check for
205 /// the presence of the attribute (see InjectTLIMappings).
206 void getVectorVariantNames(const CallInst &CI,
207  SmallVectorImpl<std::string> &VariantMappings);
208 } // end namespace VFABI
209 
210 /// The Vector Function Database.
211 ///
212 /// Helper class used to find the vector functions associated to a
213 /// scalar CallInst.
214 class VFDatabase {
215  /// The Module of the CallInst CI.
216  const Module *M;
217  /// The CallInst instance being queried for scalar to vector mappings.
218  const CallInst &CI;
219  /// List of vector functions descriptors associated to the call
220  /// instruction.
221  const SmallVector<VFInfo, 8> ScalarToVectorMappings;
222 
223  /// Retrieve the scalar-to-vector mappings associated to the rule of
224  /// a vector Function ABI.
225  static void getVFABIMappings(const CallInst &CI,
227  const StringRef ScalarName = CI.getCalledFunction()->getName();
228 
229  SmallVector<std::string, 8> ListOfStrings;
230  // The check for the vector-function-abi-variant attribute is done when
231  // retrieving the vector variant names here.
232  VFABI::getVectorVariantNames(CI, ListOfStrings);
233  if (ListOfStrings.empty())
234  return;
235  for (const auto &MangledName : ListOfStrings) {
236  const Optional<VFInfo> Shape =
237  VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
238  // A match is found via scalar and vector names, and also by
239  // ensuring that the variant described in the attribute has a
240  // corresponding definition or declaration of the vector
241  // function in the Module M.
242  if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) {
243  assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&
244  "Vector function is missing.");
245  Mappings.push_back(Shape.getValue());
246  }
247  }
248  }
249 
250 public:
251  /// Retrieve all the VFInfo instances associated to the CallInst CI.
254 
255  // Get mappings from the Vector Function ABI variants.
256  getVFABIMappings(CI, Ret);
257 
258  // Other non-VFABI variants should be retrieved here.
259 
260  return Ret;
261  }
262 
263  /// Constructor, requires a CallInst instance.
265  : M(CI.getModule()), CI(CI),
266  ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
267  /// \defgroup VFDatabase query interface.
268  ///
269  /// @{
270  /// Retrieve the Function with VFShape \p Shape.
271  Function *getVectorizedFunction(const VFShape &Shape) const {
272  if (Shape == VFShape::getScalarShape(CI))
273  return CI.getCalledFunction();
274 
275  for (const auto &Info : ScalarToVectorMappings)
276  if (Info.Shape == Shape)
277  return M->getFunction(Info.VectorName);
278 
279  return nullptr;
280  }
281  /// @}
282 };
283 
284 template <typename T> class ArrayRef;
285 class DemandedBits;
286 class GetElementPtrInst;
287 template <typename InstTy> class InterleaveGroup;
288 class IRBuilderBase;
289 class Loop;
290 class ScalarEvolution;
291 class TargetTransformInfo;
292 class Type;
293 class Value;
294 
295 namespace Intrinsic {
296 typedef unsigned ID;
297 }
298 
299 /// A helper function for converting Scalar types to vector types.
300 /// If the incoming type is void, we return void. If the VF is 1, we return
301 /// the scalar type.
302 inline Type *ToVectorTy(Type *Scalar, unsigned VF, bool isScalable = false) {
303  if (Scalar->isVoidTy() || VF == 1)
304  return Scalar;
305  return VectorType::get(Scalar, {VF, isScalable});
306 }
307 
308 /// Identify if the intrinsic is trivially vectorizable.
309 /// This method returns true if the intrinsic's argument types are all scalars
310 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
311 /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
313 
314 /// Identifies if the vector form of the intrinsic has a scalar operand.
315 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
316 
317 /// Returns intrinsic ID for call.
318 /// For the input call instruction it finds mapping intrinsic and returns
319 /// its intrinsic ID, in case it does not found it return not_intrinsic.
321  const TargetLibraryInfo *TLI);
322 
323 /// Find the operand of the GEP that should be checked for consecutive
324 /// stores. This ignores trailing indices that have no effect on the final
325 /// pointer.
326 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
327 
328 /// If the argument is a GEP, then returns the operand identified by
329 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
330 /// operand, it returns that instead.
332 
333 /// If a value has only one user that is a CastInst, return it.
334 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
335 
336 /// Get the stride of a pointer access in a loop. Looks for symbolic
337 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
339 
340 /// Given a vector and an element number, see if the scalar value is
341 /// already around as a register, for example if it were inserted then extracted
342 /// from the vector.
343 Value *findScalarElement(Value *V, unsigned EltNo);
344 
345 /// If all non-negative \p Mask elements are the same value, return that value.
346 /// If all elements are negative (undefined) or \p Mask contains different
347 /// non-negative values, return -1.
349 
350 /// Get splat value if the input is a splat vector or return nullptr.
351 /// The value may be extracted from a splat constants vector or from
352 /// a sequence of instructions that broadcast a single value into a vector.
353 const Value *getSplatValue(const Value *V);
354 
355 /// Return true if each element of the vector value \p V is poisoned or equal to
356 /// every other non-poisoned element. If an index element is specified, either
357 /// every element of the vector is poisoned or the element at that index is not
358 /// poisoned and equal to every other non-poisoned element.
359 /// This may be more powerful than the related getSplatValue() because it is
360 /// not limited by finding a scalar source value to a splatted vector.
361 bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
362 
363 /// Replace each shuffle mask index with the scaled sequential indices for an
364 /// equivalent mask of narrowed elements. Mask elements that are less than 0
365 /// (sentinel values) are repeated in the output mask.
366 ///
367 /// Example with Scale = 4:
368 /// <4 x i32> <3, 2, 0, -1> -->
369 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
370 ///
371 /// This is the reverse process of widening shuffle mask elements, but it always
372 /// succeeds because the indexes can always be multiplied (scaled up) to map to
373 /// narrower vector elements.
374 void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
375  SmallVectorImpl<int> &ScaledMask);
376 
377 /// Try to transform a shuffle mask by replacing elements with the scaled index
378 /// for an equivalent mask of widened elements. If all mask elements that would
379 /// map to a wider element of the new mask are the same negative number
380 /// (sentinel value), that element of the new mask is the same value. If any
381 /// element in a given slice is negative and some other element in that slice is
382 /// not the same value, return false (partial matches with sentinel values are
383 /// not allowed).
384 ///
385 /// Example with Scale = 4:
386 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
387 /// <4 x i32> <3, 2, 0, -1>
388 ///
389 /// This is the reverse process of narrowing shuffle mask elements if it
390 /// succeeds. This transform is not always possible because indexes may not
391 /// divide evenly (scale down) to map to wider vector elements.
392 bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
393  SmallVectorImpl<int> &ScaledMask);
394 
395 /// Compute a map of integer instructions to their minimum legal type
396 /// size.
397 ///
398 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
399 /// type (e.g. i32) whenever arithmetic is performed on them.
400 ///
401 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
402 /// the arithmetic type down again. However InstCombine refuses to create
403 /// illegal types, so for targets without i8 or i16 registers, the lengthening
404 /// and shrinking remains.
405 ///
406 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
407 /// their scalar equivalents do not, so during vectorization it is important to
408 /// remove these lengthens and truncates when deciding the profitability of
409 /// vectorization.
410 ///
411 /// This function analyzes the given range of instructions and determines the
412 /// minimum type size each can be converted to. It attempts to remove or
413 /// minimize type size changes across each def-use chain, so for example in the
414 /// following code:
415 ///
416 /// %1 = load i8, i8*
417 /// %2 = add i8 %1, 2
418 /// %3 = load i16, i16*
419 /// %4 = zext i8 %2 to i32
420 /// %5 = zext i16 %3 to i32
421 /// %6 = add i32 %4, %5
422 /// %7 = trunc i32 %6 to i16
423 ///
424 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
425 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
426 ///
427 /// If the optional TargetTransformInfo is provided, this function tries harder
428 /// to do less work by only looking at illegal types.
431  DemandedBits &DB,
432  const TargetTransformInfo *TTI=nullptr);
433 
434 /// Compute the union of two access-group lists.
435 ///
436 /// If the list contains just one access group, it is returned directly. If the
437 /// list is empty, returns nullptr.
438 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
439 
440 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
441 /// are both in. If either instruction does not access memory at all, it is
442 /// considered to be in every list.
443 ///
444 /// If the list contains just one access group, it is returned directly. If the
445 /// list is empty, returns nullptr.
447  const Instruction *Inst2);
448 
449 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
450 /// MD_nontemporal, MD_access_group].
451 /// For K in Kinds, we get the MDNode for K from each of the
452 /// elements of VL, compute their "intersection" (i.e., the most generic
453 /// metadata value that covers all of the individual values), and set I's
454 /// metadata for M equal to the intersection value.
455 ///
456 /// This function always sets a (possibly null) value for each K in Kinds.
458 
459 /// Create a mask that filters the members of an interleave group where there
460 /// are gaps.
461 ///
462 /// For example, the mask for \p Group with interleave-factor 3
463 /// and \p VF 4, that has only its first member present is:
464 ///
465 /// <1,0,0,1,0,0,1,0,0,1,0,0>
466 ///
467 /// Note: The result is a mask of 0's and 1's, as opposed to the other
468 /// create[*]Mask() utilities which create a shuffle mask (mask that
469 /// consists of indices).
471  const InterleaveGroup<Instruction> &Group);
472 
473 /// Create a mask with replicated elements.
474 ///
475 /// This function creates a shuffle mask for replicating each of the \p VF
476 /// elements in a vector \p ReplicationFactor times. It can be used to
477 /// transform a mask of \p VF elements into a mask of
478 /// \p VF * \p ReplicationFactor elements used by a predicated
479 /// interleaved-group of loads/stores whose Interleaved-factor ==
480 /// \p ReplicationFactor.
481 ///
482 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
483 ///
484 /// <0,0,0,1,1,1,2,2,2,3,3,3>
485 llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
486  unsigned VF);
487 
488 /// Create an interleave shuffle mask.
489 ///
490 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
491 /// vectorization factor \p VF into a single wide vector. The mask is of the
492 /// form:
493 ///
494 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
495 ///
496 /// For example, the mask for VF = 4 and NumVecs = 2 is:
497 ///
498 /// <0, 4, 1, 5, 2, 6, 3, 7>.
499 llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
500 
501 /// Create a stride shuffle mask.
502 ///
503 /// This function creates a shuffle mask whose elements begin at \p Start and
504 /// are incremented by \p Stride. The mask can be used to deinterleave an
505 /// interleaved vector into separate vectors of vectorization factor \p VF. The
506 /// mask is of the form:
507 ///
508 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
509 ///
510 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
511 ///
512 /// <0, 2, 4, 6>
513 llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
514  unsigned VF);
515 
516 /// Create a sequential shuffle mask.
517 ///
518 /// This function creates shuffle mask whose elements are sequential and begin
519 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
520 /// NumUndefs undef values. The mask is of the form:
521 ///
522 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
523 ///
524 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
525 ///
526 /// <0, 1, 2, 3, undef, undef, undef, undef>
528 createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
529 
530 /// Concatenate a list of vectors.
531 ///
532 /// This function generates code that concatenate the vectors in \p Vecs into a
533 /// single large vector. The number of vectors should be greater than one, and
534 /// their element types should be the same. The number of elements in the
535 /// vectors should also be the same; however, if the last vector has fewer
536 /// elements, it will be padded with undefs.
538 
539 /// Given a mask vector of the form <Y x i1>, Return true if all of the
540 /// elements of this predicate mask are false or undef. That is, return true
541 /// if all lanes can be assumed inactive.
542 bool maskIsAllZeroOrUndef(Value *Mask);
543 
544 /// Given a mask vector of the form <Y x i1>, Return true if all of the
545 /// elements of this predicate mask are true or undef. That is, return true
546 /// if all lanes can be assumed active.
547 bool maskIsAllOneOrUndef(Value *Mask);
548 
549 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
550 /// for each lane which may be active.
552 
553 /// The group of interleaved loads/stores sharing the same stride and
554 /// close to each other.
555 ///
556 /// Each member in this group has an index starting from 0, and the largest
557 /// index should be less than interleaved factor, which is equal to the absolute
558 /// value of the access's stride.
559 ///
560 /// E.g. An interleaved load group of factor 4:
561 /// for (unsigned i = 0; i < 1024; i+=4) {
562 /// a = A[i]; // Member of index 0
563 /// b = A[i+1]; // Member of index 1
564 /// d = A[i+3]; // Member of index 3
565 /// ...
566 /// }
567 ///
568 /// An interleaved store group of factor 4:
569 /// for (unsigned i = 0; i < 1024; i+=4) {
570 /// ...
571 /// A[i] = a; // Member of index 0
572 /// A[i+1] = b; // Member of index 1
573 /// A[i+2] = c; // Member of index 2
574 /// A[i+3] = d; // Member of index 3
575 /// }
576 ///
577 /// Note: the interleaved load group could have gaps (missing members), but
578 /// the interleaved store group doesn't allow gaps.
579 template <typename InstTy> class InterleaveGroup {
580 public:
581  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
582  : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
583  InsertPos(nullptr) {}
584 
585  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
586  : Alignment(Alignment), InsertPos(Instr) {
587  Factor = std::abs(Stride);
588  assert(Factor > 1 && "Invalid interleave factor");
589 
590  Reverse = Stride < 0;
591  Members[0] = Instr;
592  }
593 
594  bool isReverse() const { return Reverse; }
595  uint32_t getFactor() const { return Factor; }
597  "Use getAlign instead.") {
598  return Alignment.value();
599  }
600  Align getAlign() const { return Alignment; }
601  uint32_t getNumMembers() const { return Members.size(); }
602 
603  /// Try to insert a new member \p Instr with index \p Index and
604  /// alignment \p NewAlign. The index is related to the leader and it could be
605  /// negative if it is the new leader.
606  ///
607  /// \returns false if the instruction doesn't belong to the group.
608  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
609  // Make sure the key fits in an int32_t.
610  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
611  if (!MaybeKey)
612  return false;
613  int32_t Key = *MaybeKey;
614 
615  // Skip if there is already a member with the same index.
616  if (Members.find(Key) != Members.end())
617  return false;
618 
619  if (Key > LargestKey) {
620  // The largest index is always less than the interleave factor.
621  if (Index >= static_cast<int32_t>(Factor))
622  return false;
623 
624  LargestKey = Key;
625  } else if (Key < SmallestKey) {
626 
627  // Make sure the largest index fits in an int32_t.
628  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
629  if (!MaybeLargestIndex)
630  return false;
631 
632  // The largest index is always less than the interleave factor.
633  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
634  return false;
635 
636  SmallestKey = Key;
637  }
638 
639  // It's always safe to select the minimum alignment.
640  Alignment = std::min(Alignment, NewAlign);
641  Members[Key] = Instr;
642  return true;
643  }
644 
645  /// Get the member with the given index \p Index
646  ///
647  /// \returns nullptr if contains no such member.
648  InstTy *getMember(uint32_t Index) const {
649  int32_t Key = SmallestKey + Index;
650  auto Member = Members.find(Key);
651  if (Member == Members.end())
652  return nullptr;
653 
654  return Member->second;
655  }
656 
657  /// Get the index for the given member. Unlike the key in the member
658  /// map, the index starts from 0.
659  uint32_t getIndex(const InstTy *Instr) const {
660  for (auto I : Members) {
661  if (I.second == Instr)
662  return I.first - SmallestKey;
663  }
664 
665  llvm_unreachable("InterleaveGroup contains no such member");
666  }
667 
668  InstTy *getInsertPos() const { return InsertPos; }
669  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
670 
671  /// Add metadata (e.g. alias info) from the instructions in this group to \p
672  /// NewInst.
673  ///
674  /// FIXME: this function currently does not add noalias metadata a'la
675  /// addNewMedata. To do that we need to compute the intersection of the
676  /// noalias info from all members.
677  void addMetadata(InstTy *NewInst) const;
678 
679  /// Returns true if this Group requires a scalar iteration to handle gaps.
680  bool requiresScalarEpilogue() const {
681  // If the last member of the Group exists, then a scalar epilog is not
682  // needed for this group.
683  if (getMember(getFactor() - 1))
684  return false;
685 
686  // We have a group with gaps. It therefore cannot be a group of stores,
687  // and it can't be a reversed access, because such groups get invalidated.
688  assert(!getMember(0)->mayWriteToMemory() &&
689  "Group should have been invalidated");
690  assert(!isReverse() && "Group should have been invalidated");
691 
692  // This is a group of loads, with gaps, and without a last-member
693  return true;
694  }
695 
696 private:
697  uint32_t Factor; // Interleave Factor.
698  bool Reverse;
699  Align Alignment;
701  int32_t SmallestKey = 0;
702  int32_t LargestKey = 0;
703 
704  // To avoid breaking dependences, vectorized instructions of an interleave
705  // group should be inserted at either the first load or the last store in
706  // program order.
707  //
708  // E.g. %even = load i32 // Insert Position
709  // %add = add i32 %even // Use of %even
710  // %odd = load i32
711  //
712  // store i32 %even
713  // %odd = add i32 // Def of %odd
714  // store i32 %odd // Insert Position
715  InstTy *InsertPos;
716 };
717 
718 /// Drive the analysis of interleaved memory accesses in the loop.
719 ///
720 /// Use this class to analyze interleaved accesses only when we can vectorize
721 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
722 /// on interleaved accesses is unsafe.
723 ///
724 /// The analysis collects interleave groups and records the relationships
725 /// between the member and the group in a map.
727 public:
729  DominatorTree *DT, LoopInfo *LI,
730  const LoopAccessInfo *LAI)
731  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
732 
733  ~InterleavedAccessInfo() { invalidateGroups(); }
734 
735  /// Analyze the interleaved accesses and collect them in interleave
736  /// groups. Substitute symbolic strides using \p Strides.
737  /// Consider also predicated loads/stores in the analysis if
738  /// \p EnableMaskedInterleavedGroup is true.
739  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
740 
741  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
742  /// contrary to original assumption. Although we currently prevent group
743  /// formation for predicated accesses, we may be able to relax this limitation
744  /// in the future once we handle more complicated blocks. Returns true if any
745  /// groups were invalidated.
747  if (InterleaveGroups.empty()) {
748  assert(
749  !RequiresScalarEpilogue &&
750  "RequiresScalarEpilog should not be set without interleave groups");
751  return false;
752  }
753 
754  InterleaveGroupMap.clear();
755  for (auto *Ptr : InterleaveGroups)
756  delete Ptr;
757  InterleaveGroups.clear();
758  RequiresScalarEpilogue = false;
759  return true;
760  }
761 
762  /// Check if \p Instr belongs to any interleave group.
763  bool isInterleaved(Instruction *Instr) const {
764  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
765  }
766 
767  /// Get the interleave group that \p Instr belongs to.
768  ///
769  /// \returns nullptr if doesn't have such group.
771  getInterleaveGroup(const Instruction *Instr) const {
772  if (InterleaveGroupMap.count(Instr))
773  return InterleaveGroupMap.find(Instr)->second;
774  return nullptr;
775  }
776 
779  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
780  }
781 
782  /// Returns true if an interleaved group that may access memory
783  /// out-of-bounds requires a scalar epilogue iteration for correctness.
784  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
785 
786  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
787  /// happen when optimizing for size forbids a scalar epilogue, and the gap
788  /// cannot be filtered by masking the load/store.
789  void invalidateGroupsRequiringScalarEpilogue();
790 
791 private:
792  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
793  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
794  /// The interleaved access analysis can also add new predicates (for example
795  /// by versioning strides of pointers).
797 
798  Loop *TheLoop;
799  DominatorTree *DT;
800  LoopInfo *LI;
801  const LoopAccessInfo *LAI;
802 
803  /// True if the loop may contain non-reversed interleaved groups with
804  /// out-of-bounds accesses. We ensure we don't speculatively access memory
805  /// out-of-bounds by executing at least one scalar epilogue iteration.
806  bool RequiresScalarEpilogue = false;
807 
808  /// Holds the relationships between the members and the interleave group.
810 
811  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
812 
813  /// Holds dependences among the memory accesses in the loop. It maps a source
814  /// access to a set of dependent sink accesses.
816 
817  /// The descriptor for a strided memory access.
818  struct StrideDescriptor {
819  StrideDescriptor() = default;
820  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
821  Align Alignment)
822  : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
823 
824  // The access's stride. It is negative for a reverse access.
825  int64_t Stride = 0;
826 
827  // The scalar expression of this access.
828  const SCEV *Scev = nullptr;
829 
830  // The size of the memory object.
831  uint64_t Size = 0;
832 
833  // The alignment of this access.
834  Align Alignment;
835  };
836 
837  /// A type for holding instructions and their stride descriptors.
838  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
839 
840  /// Create a new interleave group with the given instruction \p Instr,
841  /// stride \p Stride and alignment \p Align.
842  ///
843  /// \returns the newly created interleave group.
845  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
846  assert(!InterleaveGroupMap.count(Instr) &&
847  "Already in an interleaved access group");
848  InterleaveGroupMap[Instr] =
849  new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
850  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
851  return InterleaveGroupMap[Instr];
852  }
853 
854  /// Release the group and remove all the relationships.
855  void releaseGroup(InterleaveGroup<Instruction> *Group) {
856  for (unsigned i = 0; i < Group->getFactor(); i++)
857  if (Instruction *Member = Group->getMember(i))
858  InterleaveGroupMap.erase(Member);
859 
860  InterleaveGroups.erase(Group);
861  delete Group;
862  }
863 
864  /// Collect all the accesses with a constant stride in program order.
865  void collectConstStrideAccesses(
867  const ValueToValueMap &Strides);
868 
869  /// Returns true if \p Stride is allowed in an interleaved group.
870  static bool isStrided(int Stride);
871 
872  /// Returns true if \p BB is a predicated block.
873  bool isPredicated(BasicBlock *BB) const {
874  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
875  }
876 
877  /// Returns true if LoopAccessInfo can be used for dependence queries.
878  bool areDependencesValid() const {
879  return LAI && LAI->getDepChecker().getDependences();
880  }
881 
882  /// Returns true if memory accesses \p A and \p B can be reordered, if
883  /// necessary, when constructing interleaved groups.
884  ///
885  /// \p A must precede \p B in program order. We return false if reordering is
886  /// not necessary or is prevented because \p A and \p B may be dependent.
887  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
888  StrideEntry *B) const {
889  // Code motion for interleaved accesses can potentially hoist strided loads
890  // and sink strided stores. The code below checks the legality of the
891  // following two conditions:
892  //
893  // 1. Potentially moving a strided load (B) before any store (A) that
894  // precedes B, or
895  //
896  // 2. Potentially moving a strided store (A) after any load or store (B)
897  // that A precedes.
898  //
899  // It's legal to reorder A and B if we know there isn't a dependence from A
900  // to B. Note that this determination is conservative since some
901  // dependences could potentially be reordered safely.
902 
903  // A is potentially the source of a dependence.
904  auto *Src = A->first;
905  auto SrcDes = A->second;
906 
907  // B is potentially the sink of a dependence.
908  auto *Sink = B->first;
909  auto SinkDes = B->second;
910 
911  // Code motion for interleaved accesses can't violate WAR dependences.
912  // Thus, reordering is legal if the source isn't a write.
913  if (!Src->mayWriteToMemory())
914  return true;
915 
916  // At least one of the accesses must be strided.
917  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
918  return true;
919 
920  // If dependence information is not available from LoopAccessInfo,
921  // conservatively assume the instructions can't be reordered.
922  if (!areDependencesValid())
923  return false;
924 
925  // If we know there is a dependence from source to sink, assume the
926  // instructions can't be reordered. Otherwise, reordering is legal.
927  return Dependences.find(Src) == Dependences.end() ||
928  !Dependences.lookup(Src).count(Sink);
929  }
930 
931  /// Collect the dependences from LoopAccessInfo.
932  ///
933  /// We process the dependences once during the interleaved access analysis to
934  /// enable constant-time dependence queries.
935  void collectDependences() {
936  if (!areDependencesValid())
937  return;
938  auto *Deps = LAI->getDepChecker().getDependences();
939  for (auto Dep : *Deps)
940  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
941  }
942 };
943 
944 } // llvm namespace
945 
946 #endif
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:778
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static VFShape getScalarShape(const CallInst &CI)
Definition: VectorUtils.h:101
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:669
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
The Vector Function Database.
Definition: VectorUtils.h:214
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:67
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:264
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:69
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:763
This class represents a function call, abstracting a target machine&#39;s calling convention.
Inject TLI Mappings
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:82
Metadata node.
Definition: Metadata.h:870
static constexpr char const * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:139
std::string VectorName
Scalar Function Name.
Definition: VectorUtils.h:127
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:124
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:608
Align getAlign() const
Definition: VectorUtils.h:600
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
static uint32_t getAlignment(const MCSectionCOFF &Sec)
VFParamKind ParamKind
Definition: VectorUtils.h:64
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:85
bool operator==(const VFInfo &Other) const
Instruction Set Architecture.
Definition: VectorUtils.h:131
bool isReverse() const
Definition: VectorUtils.h:594
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:585
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Key
PAL metadata keys.
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:726
Type * ToVectorTy(Type *Scalar, unsigned VF, bool isScalable=false)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:302
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
static constexpr char const * MappingsAttrName
Definition: VectorUtils.h:199
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:287
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:44
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Analysis containing CSE Info
Definition: CSEInfo.cpp:25
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:648
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:138
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:868
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
#define P(N)
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:771
This is an important base class in LLVM.
Definition: Constant.h:41
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:62
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:87
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Definition: VectorUtils.h:93
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:305
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:659
unsigned VF
Definition: VectorUtils.h:83
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:784
assume Assume Builder
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:581
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:728
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
Definition: VectorUtils.h:109
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:252
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:439
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:371
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Drive the analysis of memory accesses in the loop.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:94
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
A range adaptor for a pair of iterators.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:25
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
bool hasValue() const
Definition: Optional.h:259
LLVM_ATTRIBUTE_DEPRECATED(uint32_t getAlignment() const, "Use getAlign instead.")
Definition: VectorUtils.h:596
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:516
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:600
VFShape Shape
Definition: VectorUtils.h:125
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:270
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1299
#define I(x, y, z)
Definition: MD5.cpp:59
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1269
uint32_t getFactor() const
Definition: VectorUtils.h:595
uint32_t Size
Definition: Profile.cpp:46
std::string ScalarName
Classification of the vector function.
Definition: VectorUtils.h:126
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:746
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:271
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InstTy * getInsertPos() const
Definition: VectorUtils.h:668
LLVM Value Representation.
Definition: Value.h:74
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Definition: VectorUtils.h:128
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, unsigned VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
static constexpr char const * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
Definition: VectorUtils.h:146
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
unsigned ParamPos
Definition: VectorUtils.h:63
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
uint32_t getNumMembers() const
Definition: VectorUtils.h:601
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:680
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:69
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44