LLVM  13.0.0git
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallVector.h"
20 
21 namespace llvm {
22 class TargetLibraryInfo;
23 
24 /// Describes the type of Parameters
25 enum class VFParamKind {
26  Vector, // No semantic information.
27  OMP_Linear, // declare simd linear(i)
28  OMP_LinearRef, // declare simd linear(ref(i))
29  OMP_LinearVal, // declare simd linear(val(i))
30  OMP_LinearUVal, // declare simd linear(uval(i))
31  OMP_LinearPos, // declare simd linear(i:c) uniform(c)
32  OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
33  OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
34  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
35  OMP_Uniform, // declare simd uniform(i)
36  GlobalPredicate, // Global logical predicate that acts on all lanes
37  // of the input and output mask concurrently. For
38  // example, it is implied by the `M` token in the
39  // Vector Function ABI mangled name.
40  Unknown
41 };
42 
43 /// Describes the type of Instruction Set Architecture
44 enum class VFISAKind {
45  AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
46  SVE, // AArch64 Scalable Vector Extension
47  SSE, // x86 SSE
48  AVX, // x86 AVX
49  AVX2, // x86 AVX2
50  AVX512, // x86 AVX512
51  LLVM, // LLVM internal ISA for functions that are not
52  // attached to an existing ABI via name mangling.
53  Unknown // Unknown ISA
54 };
55 
56 /// Encapsulates information needed to describe a parameter.
57 ///
58 /// The description of the parameter is not linked directly to
59 /// OpenMP or any other vector function description. This structure
60 /// is extendible to handle other paradigms that describe vector
61 /// functions and their parameters.
62 struct VFParameter {
63  unsigned ParamPos; // Parameter Position in Scalar Function.
64  VFParamKind ParamKind; // Kind of Parameter.
65  int LinearStepOrPos = 0; // Step or Position of the Parameter.
66  Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
67 
68  // Comparison operator.
69  bool operator==(const VFParameter &Other) const {
70  return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
71  std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
72  Other.Alignment);
73  }
74 };
75 
76 /// Contains the information about the kind of vectorization
77 /// available.
78 ///
79 /// This object in independent on the paradigm used to
80 /// represent vector functions. in particular, it is not attached to
81 /// any target-specific ABI.
82 struct VFShape {
83  unsigned VF; // Vectorization factor.
84  bool IsScalable; // True if the function is a scalable function.
85  SmallVector<VFParameter, 8> Parameters; // List of parameter information.
86  // Comparison operator.
87  bool operator==(const VFShape &Other) const {
88  return std::tie(VF, IsScalable, Parameters) ==
89  std::tie(Other.VF, Other.IsScalable, Other.Parameters);
90  }
91 
92  /// Update the parameter in position P.ParamPos to P.
94  assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
95  Parameters[P.ParamPos] = P;
96  assert(hasValidParameterList() && "Invalid parameter list");
97  }
98 
99  // Retrieve the VFShape that can be used to map a (scalar) function to itself,
100  // with VF = 1.
101  static VFShape getScalarShape(const CallInst &CI) {
102  return VFShape::get(CI, ElementCount::getFixed(1),
103  /*HasGlobalPredicate*/ false);
104  }
105 
106  // Retrieve the basic vectorization shape of the function, where all
107  // parameters are mapped to VFParamKind::Vector with \p EC
108  // lanes. Specifies whether the function has a Global Predicate
109  // argument via \p HasGlobalPred.
110  static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
112  for (unsigned I = 0; I < CI.arg_size(); ++I)
114  if (HasGlobalPred)
115  Parameters.push_back(
117 
118  return {EC.getKnownMinValue(), EC.isScalable(), Parameters};
119  }
120  /// Sanity check on the Parameters in the VFShape.
121  bool hasValidParameterList() const;
122 };
123 
124 /// Holds the VFShape for a specific scalar to vector function mapping.
125 struct VFInfo {
126  VFShape Shape; /// Classification of the vector function.
127  std::string ScalarName; /// Scalar Function Name.
128  std::string VectorName; /// Vector Function Name associated to this VFInfo.
129  VFISAKind ISA; /// Instruction Set Architecture.
130 
131  // Comparison operator.
132  bool operator==(const VFInfo &Other) const {
133  return std::tie(Shape, ScalarName, VectorName, ISA) ==
134  std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA);
135  }
136 };
137 
138 namespace VFABI {
139 /// LLVM Internal VFABI ISA token for vector functions.
140 static constexpr char const *_LLVM_ = "_LLVM_";
141 /// Prefix for internal name redirection for vector function that
142 /// tells the compiler to scalarize the call using the scalar name
143 /// of the function. For example, a mangled name like
144 /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
145 /// vectorizer to vectorize the scalar call `foo`, and to scalarize
146 /// it once vectorization is done.
147 static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
148 
149 /// Function to construct a VFInfo out of a mangled names in the
150 /// following format:
151 ///
152 /// <VFABI_name>{(<redirection>)}
153 ///
154 /// where <VFABI_name> is the name of the vector function, mangled according
155 /// to the rules described in the Vector Function ABI of the target vector
156 /// extension (or <isa> from now on). The <VFABI_name> is in the following
157 /// format:
158 ///
159 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
160 ///
161 /// This methods support demangling rules for the following <isa>:
162 ///
163 /// * AArch64: https://developer.arm.com/docs/101129/latest
164 ///
165 /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
166 /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
167 ///
168 /// \param MangledName -> input string in the format
169 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
170 /// \param M -> Module used to retrieve informations about the vector
171 /// function that are not possible to retrieve from the mangled
172 /// name. At the moment, this parameter is needed only to retrieve the
173 /// Vectorization Factor of scalable vector functions from their
174 /// respective IR declarations.
176 
177 /// This routine mangles the given VectorName according to the LangRef
178 /// specification for vector-function-abi-variant attribute and is specific to
179 /// the TLI mappings. It is the responsibility of the caller to make sure that
180 /// this is only used if all parameters in the vector function are vector type.
181 /// This returned string holds scalar-to-vector mapping:
182 /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
183 ///
184 /// where:
185 ///
186 /// <isa> = "_LLVM_"
187 /// <mask> = "N". Note: TLI does not support masked interfaces.
188 /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
189 /// field of the `VecDesc` struct. If the number of lanes is scalable
190 /// then 'x' is printed instead.
191 /// <vparams> = "v", as many as are the numArgs.
192 /// <scalarname> = the name of the scalar function.
193 /// <vectorname> = the name of the vector function.
194 std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
195  unsigned numArgs, ElementCount VF);
196 
197 /// Retrieve the `VFParamKind` from a string token.
199 
200 // Name of the attribute where the variant mappings are stored.
201 static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
202 
203 /// Populates a set of strings representing the Vector Function ABI variants
204 /// associated to the CallInst CI. If the CI does not contain the
205 /// vector-function-abi-variant attribute, we return without populating
206 /// VariantMappings, i.e. callers of getVectorVariantNames need not check for
207 /// the presence of the attribute (see InjectTLIMappings).
208 void getVectorVariantNames(const CallInst &CI,
209  SmallVectorImpl<std::string> &VariantMappings);
210 } // end namespace VFABI
211 
212 /// The Vector Function Database.
213 ///
214 /// Helper class used to find the vector functions associated to a
215 /// scalar CallInst.
216 class VFDatabase {
217  /// The Module of the CallInst CI.
218  const Module *M;
219  /// The CallInst instance being queried for scalar to vector mappings.
220  const CallInst &CI;
221  /// List of vector functions descriptors associated to the call
222  /// instruction.
223  const SmallVector<VFInfo, 8> ScalarToVectorMappings;
224 
225  /// Retrieve the scalar-to-vector mappings associated to the rule of
226  /// a vector Function ABI.
227  static void getVFABIMappings(const CallInst &CI,
229  if (!CI.getCalledFunction())
230  return;
231 
232  const StringRef ScalarName = CI.getCalledFunction()->getName();
233 
234  SmallVector<std::string, 8> ListOfStrings;
235  // The check for the vector-function-abi-variant attribute is done when
236  // retrieving the vector variant names here.
237  VFABI::getVectorVariantNames(CI, ListOfStrings);
238  if (ListOfStrings.empty())
239  return;
240  for (const auto &MangledName : ListOfStrings) {
241  const Optional<VFInfo> Shape =
242  VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
243  // A match is found via scalar and vector names, and also by
244  // ensuring that the variant described in the attribute has a
245  // corresponding definition or declaration of the vector
246  // function in the Module M.
247  if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) {
248  assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&
249  "Vector function is missing.");
250  Mappings.push_back(Shape.getValue());
251  }
252  }
253  }
254 
255 public:
256  /// Retrieve all the VFInfo instances associated to the CallInst CI.
259 
260  // Get mappings from the Vector Function ABI variants.
261  getVFABIMappings(CI, Ret);
262 
263  // Other non-VFABI variants should be retrieved here.
264 
265  return Ret;
266  }
267 
268  /// Constructor, requires a CallInst instance.
270  : M(CI.getModule()), CI(CI),
271  ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
272  /// \defgroup VFDatabase query interface.
273  ///
274  /// @{
275  /// Retrieve the Function with VFShape \p Shape.
276  Function *getVectorizedFunction(const VFShape &Shape) const {
277  if (Shape == VFShape::getScalarShape(CI))
278  return CI.getCalledFunction();
279 
280  for (const auto &Info : ScalarToVectorMappings)
281  if (Info.Shape == Shape)
282  return M->getFunction(Info.VectorName);
283 
284  return nullptr;
285  }
286  /// @}
287 };
288 
289 template <typename T> class ArrayRef;
290 class DemandedBits;
291 class GetElementPtrInst;
292 template <typename InstTy> class InterleaveGroup;
293 class IRBuilderBase;
294 class Loop;
295 class ScalarEvolution;
296 class TargetTransformInfo;
297 class Type;
298 class Value;
299 
300 namespace Intrinsic {
301 typedef unsigned ID;
302 }
303 
304 /// A helper function for converting Scalar types to vector types. If
305 /// the incoming type is void, we return void. If the EC represents a
306 /// scalar, we return the scalar type.
307 inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
308  if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
309  return Scalar;
310  return VectorType::get(Scalar, EC);
311 }
312 
313 inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
314  return ToVectorTy(Scalar, ElementCount::getFixed(VF));
315 }
316 
317 /// Identify if the intrinsic is trivially vectorizable.
318 /// This method returns true if the intrinsic's argument types are all scalars
319 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
320 /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
322 
323 /// Identifies if the vector form of the intrinsic has a scalar operand.
324 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
325 
326 /// Returns intrinsic ID for call.
327 /// For the input call instruction it finds mapping intrinsic and returns
328 /// its intrinsic ID, in case it does not found it return not_intrinsic.
329 Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
330  const TargetLibraryInfo *TLI);
331 
332 /// Find the operand of the GEP that should be checked for consecutive
333 /// stores. This ignores trailing indices that have no effect on the final
334 /// pointer.
335 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
336 
337 /// If the argument is a GEP, then returns the operand identified by
338 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
339 /// operand, it returns that instead.
340 Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
341 
342 /// If a value has only one user that is a CastInst, return it.
343 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
344 
345 /// Get the stride of a pointer access in a loop. Looks for symbolic
346 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
347 Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
348 
349 /// Given a vector and an element number, see if the scalar value is
350 /// already around as a register, for example if it were inserted then extracted
351 /// from the vector.
352 Value *findScalarElement(Value *V, unsigned EltNo);
353 
354 /// If all non-negative \p Mask elements are the same value, return that value.
355 /// If all elements are negative (undefined) or \p Mask contains different
356 /// non-negative values, return -1.
357 int getSplatIndex(ArrayRef<int> Mask);
358 
359 /// Get splat value if the input is a splat vector or return nullptr.
360 /// The value may be extracted from a splat constants vector or from
361 /// a sequence of instructions that broadcast a single value into a vector.
362 Value *getSplatValue(const Value *V);
363 
364 /// Return true if each element of the vector value \p V is poisoned or equal to
365 /// every other non-poisoned element. If an index element is specified, either
366 /// every element of the vector is poisoned or the element at that index is not
367 /// poisoned and equal to every other non-poisoned element.
368 /// This may be more powerful than the related getSplatValue() because it is
369 /// not limited by finding a scalar source value to a splatted vector.
370 bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
371 
372 /// Replace each shuffle mask index with the scaled sequential indices for an
373 /// equivalent mask of narrowed elements. Mask elements that are less than 0
374 /// (sentinel values) are repeated in the output mask.
375 ///
376 /// Example with Scale = 4:
377 /// <4 x i32> <3, 2, 0, -1> -->
378 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
379 ///
380 /// This is the reverse process of widening shuffle mask elements, but it always
381 /// succeeds because the indexes can always be multiplied (scaled up) to map to
382 /// narrower vector elements.
383 void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
384  SmallVectorImpl<int> &ScaledMask);
385 
386 /// Try to transform a shuffle mask by replacing elements with the scaled index
387 /// for an equivalent mask of widened elements. If all mask elements that would
388 /// map to a wider element of the new mask are the same negative number
389 /// (sentinel value), that element of the new mask is the same value. If any
390 /// element in a given slice is negative and some other element in that slice is
391 /// not the same value, return false (partial matches with sentinel values are
392 /// not allowed).
393 ///
394 /// Example with Scale = 4:
395 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
396 /// <4 x i32> <3, 2, 0, -1>
397 ///
398 /// This is the reverse process of narrowing shuffle mask elements if it
399 /// succeeds. This transform is not always possible because indexes may not
400 /// divide evenly (scale down) to map to wider vector elements.
401 bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
402  SmallVectorImpl<int> &ScaledMask);
403 
404 /// Compute a map of integer instructions to their minimum legal type
405 /// size.
406 ///
407 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
408 /// type (e.g. i32) whenever arithmetic is performed on them.
409 ///
410 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
411 /// the arithmetic type down again. However InstCombine refuses to create
412 /// illegal types, so for targets without i8 or i16 registers, the lengthening
413 /// and shrinking remains.
414 ///
415 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
416 /// their scalar equivalents do not, so during vectorization it is important to
417 /// remove these lengthens and truncates when deciding the profitability of
418 /// vectorization.
419 ///
420 /// This function analyzes the given range of instructions and determines the
421 /// minimum type size each can be converted to. It attempts to remove or
422 /// minimize type size changes across each def-use chain, so for example in the
423 /// following code:
424 ///
425 /// %1 = load i8, i8*
426 /// %2 = add i8 %1, 2
427 /// %3 = load i16, i16*
428 /// %4 = zext i8 %2 to i32
429 /// %5 = zext i16 %3 to i32
430 /// %6 = add i32 %4, %5
431 /// %7 = trunc i32 %6 to i16
432 ///
433 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
434 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
435 ///
436 /// If the optional TargetTransformInfo is provided, this function tries harder
437 /// to do less work by only looking at illegal types.
438 MapVector<Instruction*, uint64_t>
439 computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
440  DemandedBits &DB,
441  const TargetTransformInfo *TTI=nullptr);
442 
443 /// Compute the union of two access-group lists.
444 ///
445 /// If the list contains just one access group, it is returned directly. If the
446 /// list is empty, returns nullptr.
447 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
448 
449 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
450 /// are both in. If either instruction does not access memory at all, it is
451 /// considered to be in every list.
452 ///
453 /// If the list contains just one access group, it is returned directly. If the
454 /// list is empty, returns nullptr.
455 MDNode *intersectAccessGroups(const Instruction *Inst1,
456  const Instruction *Inst2);
457 
458 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
459 /// MD_nontemporal, MD_access_group].
460 /// For K in Kinds, we get the MDNode for K from each of the
461 /// elements of VL, compute their "intersection" (i.e., the most generic
462 /// metadata value that covers all of the individual values), and set I's
463 /// metadata for M equal to the intersection value.
464 ///
465 /// This function always sets a (possibly null) value for each K in Kinds.
466 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
467 
468 /// Create a mask that filters the members of an interleave group where there
469 /// are gaps.
470 ///
471 /// For example, the mask for \p Group with interleave-factor 3
472 /// and \p VF 4, that has only its first member present is:
473 ///
474 /// <1,0,0,1,0,0,1,0,0,1,0,0>
475 ///
476 /// Note: The result is a mask of 0's and 1's, as opposed to the other
477 /// create[*]Mask() utilities which create a shuffle mask (mask that
478 /// consists of indices).
479 Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
480  const InterleaveGroup<Instruction> &Group);
481 
482 /// Create a mask with replicated elements.
483 ///
484 /// This function creates a shuffle mask for replicating each of the \p VF
485 /// elements in a vector \p ReplicationFactor times. It can be used to
486 /// transform a mask of \p VF elements into a mask of
487 /// \p VF * \p ReplicationFactor elements used by a predicated
488 /// interleaved-group of loads/stores whose Interleaved-factor ==
489 /// \p ReplicationFactor.
490 ///
491 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
492 ///
493 /// <0,0,0,1,1,1,2,2,2,3,3,3>
494 llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
495  unsigned VF);
496 
497 /// Create an interleave shuffle mask.
498 ///
499 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
500 /// vectorization factor \p VF into a single wide vector. The mask is of the
501 /// form:
502 ///
503 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
504 ///
505 /// For example, the mask for VF = 4 and NumVecs = 2 is:
506 ///
507 /// <0, 4, 1, 5, 2, 6, 3, 7>.
508 llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
509 
510 /// Create a stride shuffle mask.
511 ///
512 /// This function creates a shuffle mask whose elements begin at \p Start and
513 /// are incremented by \p Stride. The mask can be used to deinterleave an
514 /// interleaved vector into separate vectors of vectorization factor \p VF. The
515 /// mask is of the form:
516 ///
517 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
518 ///
519 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
520 ///
521 /// <0, 2, 4, 6>
522 llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
523  unsigned VF);
524 
525 /// Create a sequential shuffle mask.
526 ///
527 /// This function creates shuffle mask whose elements are sequential and begin
528 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
529 /// NumUndefs undef values. The mask is of the form:
530 ///
531 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
532 ///
533 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
534 ///
535 /// <0, 1, 2, 3, undef, undef, undef, undef>
537 createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
538 
539 /// Concatenate a list of vectors.
540 ///
541 /// This function generates code that concatenate the vectors in \p Vecs into a
542 /// single large vector. The number of vectors should be greater than one, and
543 /// their element types should be the same. The number of elements in the
544 /// vectors should also be the same; however, if the last vector has fewer
545 /// elements, it will be padded with undefs.
546 Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
547 
548 /// Given a mask vector of i1, Return true if all of the elements of this
549 /// predicate mask are known to be false or undef. That is, return true if all
550 /// lanes can be assumed inactive.
552 
553 /// Given a mask vector of i1, Return true if all of the elements of this
554 /// predicate mask are known to be true or undef. That is, return true if all
555 /// lanes can be assumed active.
557 
558 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
559 /// for each lane which may be active.
561 
562 /// The group of interleaved loads/stores sharing the same stride and
563 /// close to each other.
564 ///
565 /// Each member in this group has an index starting from 0, and the largest
566 /// index should be less than interleaved factor, which is equal to the absolute
567 /// value of the access's stride.
568 ///
569 /// E.g. An interleaved load group of factor 4:
570 /// for (unsigned i = 0; i < 1024; i+=4) {
571 /// a = A[i]; // Member of index 0
572 /// b = A[i+1]; // Member of index 1
573 /// d = A[i+3]; // Member of index 3
574 /// ...
575 /// }
576 ///
577 /// An interleaved store group of factor 4:
578 /// for (unsigned i = 0; i < 1024; i+=4) {
579 /// ...
580 /// A[i] = a; // Member of index 0
581 /// A[i+1] = b; // Member of index 1
582 /// A[i+2] = c; // Member of index 2
583 /// A[i+3] = d; // Member of index 3
584 /// }
585 ///
586 /// Note: the interleaved load group could have gaps (missing members), but
587 /// the interleaved store group doesn't allow gaps.
588 template <typename InstTy> class InterleaveGroup {
589 public:
590  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
591  : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
592  InsertPos(nullptr) {}
593 
594  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
595  : Alignment(Alignment), InsertPos(Instr) {
596  Factor = std::abs(Stride);
597  assert(Factor > 1 && "Invalid interleave factor");
598 
599  Reverse = Stride < 0;
600  Members[0] = Instr;
601  }
602 
603  bool isReverse() const { return Reverse; }
604  uint32_t getFactor() const { return Factor; }
605  Align getAlign() const { return Alignment; }
606  uint32_t getNumMembers() const { return Members.size(); }
607 
608  /// Try to insert a new member \p Instr with index \p Index and
609  /// alignment \p NewAlign. The index is related to the leader and it could be
610  /// negative if it is the new leader.
611  ///
612  /// \returns false if the instruction doesn't belong to the group.
613  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
614  // Make sure the key fits in an int32_t.
615  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
616  if (!MaybeKey)
617  return false;
618  int32_t Key = *MaybeKey;
619 
620  // Skip if the key is used for either the tombstone or empty special values.
623  return false;
624 
625  // Skip if there is already a member with the same index.
626  if (Members.find(Key) != Members.end())
627  return false;
628 
629  if (Key > LargestKey) {
630  // The largest index is always less than the interleave factor.
631  if (Index >= static_cast<int32_t>(Factor))
632  return false;
633 
634  LargestKey = Key;
635  } else if (Key < SmallestKey) {
636 
637  // Make sure the largest index fits in an int32_t.
638  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
639  if (!MaybeLargestIndex)
640  return false;
641 
642  // The largest index is always less than the interleave factor.
643  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
644  return false;
645 
646  SmallestKey = Key;
647  }
648 
649  // It's always safe to select the minimum alignment.
650  Alignment = std::min(Alignment, NewAlign);
651  Members[Key] = Instr;
652  return true;
653  }
654 
655  /// Get the member with the given index \p Index
656  ///
657  /// \returns nullptr if contains no such member.
658  InstTy *getMember(uint32_t Index) const {
659  int32_t Key = SmallestKey + Index;
660  return Members.lookup(Key);
661  }
662 
663  /// Get the index for the given member. Unlike the key in the member
664  /// map, the index starts from 0.
665  uint32_t getIndex(const InstTy *Instr) const {
666  for (auto I : Members) {
667  if (I.second == Instr)
668  return I.first - SmallestKey;
669  }
670 
671  llvm_unreachable("InterleaveGroup contains no such member");
672  }
673 
674  InstTy *getInsertPos() const { return InsertPos; }
675  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
676 
677  /// Add metadata (e.g. alias info) from the instructions in this group to \p
678  /// NewInst.
679  ///
680  /// FIXME: this function currently does not add noalias metadata a'la
681  /// addNewMedata. To do that we need to compute the intersection of the
682  /// noalias info from all members.
683  void addMetadata(InstTy *NewInst) const;
684 
685  /// Returns true if this Group requires a scalar iteration to handle gaps.
686  bool requiresScalarEpilogue() const {
687  // If the last member of the Group exists, then a scalar epilog is not
688  // needed for this group.
689  if (getMember(getFactor() - 1))
690  return false;
691 
692  // We have a group with gaps. It therefore cannot be a group of stores,
693  // and it can't be a reversed access, because such groups get invalidated.
694  assert(!getMember(0)->mayWriteToMemory() &&
695  "Group should have been invalidated");
696  assert(!isReverse() && "Group should have been invalidated");
697 
698  // This is a group of loads, with gaps, and without a last-member
699  return true;
700  }
701 
702 private:
703  uint32_t Factor; // Interleave Factor.
704  bool Reverse;
705  Align Alignment;
707  int32_t SmallestKey = 0;
708  int32_t LargestKey = 0;
709 
710  // To avoid breaking dependences, vectorized instructions of an interleave
711  // group should be inserted at either the first load or the last store in
712  // program order.
713  //
714  // E.g. %even = load i32 // Insert Position
715  // %add = add i32 %even // Use of %even
716  // %odd = load i32
717  //
718  // store i32 %even
719  // %odd = add i32 // Def of %odd
720  // store i32 %odd // Insert Position
721  InstTy *InsertPos;
722 };
723 
724 /// Drive the analysis of interleaved memory accesses in the loop.
725 ///
726 /// Use this class to analyze interleaved accesses only when we can vectorize
727 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
728 /// on interleaved accesses is unsafe.
729 ///
730 /// The analysis collects interleave groups and records the relationships
731 /// between the member and the group in a map.
733 public:
735  DominatorTree *DT, LoopInfo *LI,
736  const LoopAccessInfo *LAI)
737  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
738 
740 
741  /// Analyze the interleaved accesses and collect them in interleave
742  /// groups. Substitute symbolic strides using \p Strides.
743  /// Consider also predicated loads/stores in the analysis if
744  /// \p EnableMaskedInterleavedGroup is true.
745  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
746 
747  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
748  /// contrary to original assumption. Although we currently prevent group
749  /// formation for predicated accesses, we may be able to relax this limitation
750  /// in the future once we handle more complicated blocks. Returns true if any
751  /// groups were invalidated.
753  if (InterleaveGroups.empty()) {
754  assert(
755  !RequiresScalarEpilogue &&
756  "RequiresScalarEpilog should not be set without interleave groups");
757  return false;
758  }
759 
760  InterleaveGroupMap.clear();
761  for (auto *Ptr : InterleaveGroups)
762  delete Ptr;
763  InterleaveGroups.clear();
764  RequiresScalarEpilogue = false;
765  return true;
766  }
767 
768  /// Check if \p Instr belongs to any interleave group.
769  bool isInterleaved(Instruction *Instr) const {
770  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
771  }
772 
773  /// Get the interleave group that \p Instr belongs to.
774  ///
775  /// \returns nullptr if doesn't have such group.
777  getInterleaveGroup(const Instruction *Instr) const {
778  return InterleaveGroupMap.lookup(Instr);
779  }
780 
783  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
784  }
785 
786  /// Returns true if an interleaved group that may access memory
787  /// out-of-bounds requires a scalar epilogue iteration for correctness.
788  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
789 
790  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
791  /// happen when optimizing for size forbids a scalar epilogue, and the gap
792  /// cannot be filtered by masking the load/store.
794 
795 private:
796  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
797  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
798  /// The interleaved access analysis can also add new predicates (for example
799  /// by versioning strides of pointers).
801 
802  Loop *TheLoop;
803  DominatorTree *DT;
804  LoopInfo *LI;
805  const LoopAccessInfo *LAI;
806 
807  /// True if the loop may contain non-reversed interleaved groups with
808  /// out-of-bounds accesses. We ensure we don't speculatively access memory
809  /// out-of-bounds by executing at least one scalar epilogue iteration.
810  bool RequiresScalarEpilogue = false;
811 
812  /// Holds the relationships between the members and the interleave group.
814 
815  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
816 
817  /// Holds dependences among the memory accesses in the loop. It maps a source
818  /// access to a set of dependent sink accesses.
820 
821  /// The descriptor for a strided memory access.
822  struct StrideDescriptor {
823  StrideDescriptor() = default;
824  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
825  Align Alignment)
826  : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
827 
828  // The access's stride. It is negative for a reverse access.
829  int64_t Stride = 0;
830 
831  // The scalar expression of this access.
832  const SCEV *Scev = nullptr;
833 
834  // The size of the memory object.
835  uint64_t Size = 0;
836 
837  // The alignment of this access.
838  Align Alignment;
839  };
840 
841  /// A type for holding instructions and their stride descriptors.
842  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
843 
844  /// Create a new interleave group with the given instruction \p Instr,
845  /// stride \p Stride and alignment \p Align.
846  ///
847  /// \returns the newly created interleave group.
848  InterleaveGroup<Instruction> *
849  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
850  assert(!InterleaveGroupMap.count(Instr) &&
851  "Already in an interleaved access group");
852  InterleaveGroupMap[Instr] =
853  new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
854  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
855  return InterleaveGroupMap[Instr];
856  }
857 
858  /// Release the group and remove all the relationships.
859  void releaseGroup(InterleaveGroup<Instruction> *Group) {
860  for (unsigned i = 0; i < Group->getFactor(); i++)
861  if (Instruction *Member = Group->getMember(i))
862  InterleaveGroupMap.erase(Member);
863 
864  InterleaveGroups.erase(Group);
865  delete Group;
866  }
867 
868  /// Collect all the accesses with a constant stride in program order.
869  void collectConstStrideAccesses(
870  MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
871  const ValueToValueMap &Strides);
872 
873  /// Returns true if \p Stride is allowed in an interleaved group.
874  static bool isStrided(int Stride);
875 
876  /// Returns true if \p BB is a predicated block.
877  bool isPredicated(BasicBlock *BB) const {
878  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
879  }
880 
881  /// Returns true if LoopAccessInfo can be used for dependence queries.
882  bool areDependencesValid() const {
883  return LAI && LAI->getDepChecker().getDependences();
884  }
885 
886  /// Returns true if memory accesses \p A and \p B can be reordered, if
887  /// necessary, when constructing interleaved groups.
888  ///
889  /// \p A must precede \p B in program order. We return false if reordering is
890  /// not necessary or is prevented because \p A and \p B may be dependent.
891  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
892  StrideEntry *B) const {
893  // Code motion for interleaved accesses can potentially hoist strided loads
894  // and sink strided stores. The code below checks the legality of the
895  // following two conditions:
896  //
897  // 1. Potentially moving a strided load (B) before any store (A) that
898  // precedes B, or
899  //
900  // 2. Potentially moving a strided store (A) after any load or store (B)
901  // that A precedes.
902  //
903  // It's legal to reorder A and B if we know there isn't a dependence from A
904  // to B. Note that this determination is conservative since some
905  // dependences could potentially be reordered safely.
906 
907  // A is potentially the source of a dependence.
908  auto *Src = A->first;
909  auto SrcDes = A->second;
910 
911  // B is potentially the sink of a dependence.
912  auto *Sink = B->first;
913  auto SinkDes = B->second;
914 
915  // Code motion for interleaved accesses can't violate WAR dependences.
916  // Thus, reordering is legal if the source isn't a write.
917  if (!Src->mayWriteToMemory())
918  return true;
919 
920  // At least one of the accesses must be strided.
921  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
922  return true;
923 
924  // If dependence information is not available from LoopAccessInfo,
925  // conservatively assume the instructions can't be reordered.
926  if (!areDependencesValid())
927  return false;
928 
929  // If we know there is a dependence from source to sink, assume the
930  // instructions can't be reordered. Otherwise, reordering is legal.
931  return Dependences.find(Src) == Dependences.end() ||
932  !Dependences.lookup(Src).count(Sink);
933  }
934 
935  /// Collect the dependences from LoopAccessInfo.
936  ///
937  /// We process the dependences once during the interleaved access analysis to
938  /// enable constant-time dependence queries.
939  void collectDependences() {
940  if (!areDependencesValid())
941  return;
942  auto *Deps = LAI->getDepChecker().getDependences();
943  for (auto Dep : *Deps)
944  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
945  }
946 };
947 
948 } // llvm namespace
949 
950 #endif
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition: VectorUtils.h:605
llvm::VFInfo::Shape
VFShape Shape
Definition: VectorUtils.h:126
llvm
Definition: AllocatorList.h:23
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:897
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::VFParameter::Alignment
Align Alignment
Definition: VectorUtils.h:66
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::VFParamKind::Vector
@ Vector
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::VFISAKind::AVX512
@ AVX512
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:613
llvm::ElementCount
Definition: TypeSize.h:386
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:120
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:137
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:925
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:590
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2135
llvm::VFISAKind::Unknown
@ Unknown
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::VFInfo::operator==
bool operator==(const VFInfo &Other) const
Instruction Set Architecture.
Definition: VectorUtils.h:132
llvm::InterleavedAccessInfo::InterleavedAccessInfo
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:734
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:350
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
LoopAccessAnalysis.h
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1319
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:870
llvm::VFParameter::ParamKind
VFParamKind ParamKind
Definition: VectorUtils.h:64
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::VFShape::getScalarShape
static VFShape getScalarShape(const CallInst &CI)
Definition: VectorUtils.h:101
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:484
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
llvm::VFParameter::ParamPos
unsigned ParamPos
Definition: VectorUtils.h:63
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:411
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:178
llvm::Optional
Definition: APInt.h:33
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:710
llvm::VFShape::Parameters
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:85
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:642
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:292
llvm::VFISAKind
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:44
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:286
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Sanity check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1342
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:172
llvm::VFISAKind::SSE
@ SSE
llvm::VFISAKind::SVE
@ SVE
llvm::DenseMapInfo
Definition: APInt.h:34
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:365
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1303
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1262
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:594
llvm::VFParamKind
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:25
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:785
llvm::VFInfo::VectorName
std::string VectorName
Scalar Function Name.
Definition: VectorUtils.h:128
llvm::checkedSub
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:57
llvm::VFDatabase
The Vector Function Database.
Definition: VectorUtils.h:216
CheckedArithmetic.h
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2093
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:849
llvm::VFInfo
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:125
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:194
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
llvm::InterleavedAccessInfo::~InterleavedAccessInfo
~InterleavedAccessInfo()
Definition: VectorUtils.h:739
llvm::ToVectorTy
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:307
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
llvm::VFISAKind::LLVM
@ LLVM
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::VFISAKind::AVX2
@ AVX2
llvm::Instruction
Definition: Instruction.h:45
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:732
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:234
llvm::VFShape
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:82
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:603
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:658
llvm::InterleavedAccessInfo::getInterleaveGroups
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:782
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::InterleavedAccessInfo::isInterleaved
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:769
llvm::checkedAdd
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:48
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:675
llvm::VFParameter
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:62
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:140
llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition: VectorUtils.h:674
llvm::VFParameter::LinearStepOrPos
int LinearStepOrPos
Definition: VectorUtils.h:65
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:665
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::VFShape::VF
unsigned VF
Definition: VectorUtils.h:83
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:257
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:604
llvm::VFInfo::ISA
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Definition: VectorUtils.h:129
llvm::VFDatabase::getVectorizedFunction
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:276
llvm::DenseMap< int32_t, InstTy * >
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:519
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:842
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1022
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::VFShape::IsScalable
bool IsScalable
Definition: VectorUtils.h:84
llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:64
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:756
llvm::VFInfo::ScalarName
std::string ScalarName
Classification of the vector function.
Definition: VectorUtils.h:127
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::VFShape::operator==
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:87
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:561
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:663
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::VFParamKind::Unknown
@ Unknown
llvm::VFISAKind::AdvancedSIMD
@ AdvancedSIMD
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:171
uint32_t
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:776
llvm::InterleaveGroup::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:686
llvm::VFABI::getVFParamKindFromString
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
Definition: VFABIDemangling.cpp:461
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:432
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1329
llvm::VFShape::updateParam
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Definition: VectorUtils.h:93
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:100
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:270
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
llvm::InterleavedAccessInfo::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:788
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:328
SmallVector.h
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:201
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:161
llvm::VFShape::get
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
Definition: VectorUtils.h:110
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::SmallVectorImpl< std::string >
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:319
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:796
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::VFABI::_LLVM_Scalarize_
static constexpr const char * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
Definition: VectorUtils.h:147
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:804
llvm::VFISAKind::AVX
@ AVX
llvm::InterleavedAccessInfo::invalidateGroups
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:752
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:628
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::VFDatabase::VFDatabase
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:269
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:280
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:606
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1168
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1289
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::VFParameter::operator==
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:69
llvm::InterleavedAccessInfo::getInterleaveGroup
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:777