LLVM  14.0.0git
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallVector.h"
20 
21 namespace llvm {
22 class TargetLibraryInfo;
23 
24 /// Describes the type of Parameters
25 enum class VFParamKind {
26  Vector, // No semantic information.
27  OMP_Linear, // declare simd linear(i)
28  OMP_LinearRef, // declare simd linear(ref(i))
29  OMP_LinearVal, // declare simd linear(val(i))
30  OMP_LinearUVal, // declare simd linear(uval(i))
31  OMP_LinearPos, // declare simd linear(i:c) uniform(c)
32  OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
33  OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
34  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
35  OMP_Uniform, // declare simd uniform(i)
36  GlobalPredicate, // Global logical predicate that acts on all lanes
37  // of the input and output mask concurrently. For
38  // example, it is implied by the `M` token in the
39  // Vector Function ABI mangled name.
40  Unknown
41 };
42 
43 /// Describes the type of Instruction Set Architecture
44 enum class VFISAKind {
45  AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
46  SVE, // AArch64 Scalable Vector Extension
47  SSE, // x86 SSE
48  AVX, // x86 AVX
49  AVX2, // x86 AVX2
50  AVX512, // x86 AVX512
51  LLVM, // LLVM internal ISA for functions that are not
52  // attached to an existing ABI via name mangling.
53  Unknown // Unknown ISA
54 };
55 
56 /// Encapsulates information needed to describe a parameter.
57 ///
58 /// The description of the parameter is not linked directly to
59 /// OpenMP or any other vector function description. This structure
60 /// is extendible to handle other paradigms that describe vector
61 /// functions and their parameters.
62 struct VFParameter {
63  unsigned ParamPos; // Parameter Position in Scalar Function.
64  VFParamKind ParamKind; // Kind of Parameter.
65  int LinearStepOrPos = 0; // Step or Position of the Parameter.
66  Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
67 
68  // Comparison operator.
69  bool operator==(const VFParameter &Other) const {
70  return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
71  std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
72  Other.Alignment);
73  }
74 };
75 
76 /// Contains the information about the kind of vectorization
77 /// available.
78 ///
79 /// This object in independent on the paradigm used to
80 /// represent vector functions. in particular, it is not attached to
81 /// any target-specific ABI.
82 struct VFShape {
83  ElementCount VF; // Vectorization factor.
84  SmallVector<VFParameter, 8> Parameters; // List of parameter information.
85  // Comparison operator.
86  bool operator==(const VFShape &Other) const {
87  return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
88  }
89 
90  /// Update the parameter in position P.ParamPos to P.
92  assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
93  Parameters[P.ParamPos] = P;
94  assert(hasValidParameterList() && "Invalid parameter list");
95  }
96 
97  // Retrieve the VFShape that can be used to map a (scalar) function to itself,
98  // with VF = 1.
99  static VFShape getScalarShape(const CallInst &CI) {
100  return VFShape::get(CI, ElementCount::getFixed(1),
101  /*HasGlobalPredicate*/ false);
102  }
103 
104  // Retrieve the basic vectorization shape of the function, where all
105  // parameters are mapped to VFParamKind::Vector with \p EC
106  // lanes. Specifies whether the function has a Global Predicate
107  // argument via \p HasGlobalPred.
108  static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
110  for (unsigned I = 0; I < CI.arg_size(); ++I)
112  if (HasGlobalPred)
113  Parameters.push_back(
115 
116  return {EC, Parameters};
117  }
118  /// Sanity check on the Parameters in the VFShape.
119  bool hasValidParameterList() const;
120 };
121 
122 /// Holds the VFShape for a specific scalar to vector function mapping.
123 struct VFInfo {
124  VFShape Shape; /// Classification of the vector function.
125  std::string ScalarName; /// Scalar Function Name.
126  std::string VectorName; /// Vector Function Name associated to this VFInfo.
127  VFISAKind ISA; /// Instruction Set Architecture.
128 };
129 
130 namespace VFABI {
131 /// LLVM Internal VFABI ISA token for vector functions.
132 static constexpr char const *_LLVM_ = "_LLVM_";
133 /// Prefix for internal name redirection for vector function that
134 /// tells the compiler to scalarize the call using the scalar name
135 /// of the function. For example, a mangled name like
136 /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
137 /// vectorizer to vectorize the scalar call `foo`, and to scalarize
138 /// it once vectorization is done.
139 static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
140 
141 /// Function to construct a VFInfo out of a mangled names in the
142 /// following format:
143 ///
144 /// <VFABI_name>{(<redirection>)}
145 ///
146 /// where <VFABI_name> is the name of the vector function, mangled according
147 /// to the rules described in the Vector Function ABI of the target vector
148 /// extension (or <isa> from now on). The <VFABI_name> is in the following
149 /// format:
150 ///
151 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
152 ///
153 /// This methods support demangling rules for the following <isa>:
154 ///
155 /// * AArch64: https://developer.arm.com/docs/101129/latest
156 ///
157 /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
158 /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
159 ///
160 /// \param MangledName -> input string in the format
161 /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
162 /// \param M -> Module used to retrieve informations about the vector
163 /// function that are not possible to retrieve from the mangled
164 /// name. At the moment, this parameter is needed only to retrieve the
165 /// Vectorization Factor of scalable vector functions from their
166 /// respective IR declarations.
168 
169 /// This routine mangles the given VectorName according to the LangRef
170 /// specification for vector-function-abi-variant attribute and is specific to
171 /// the TLI mappings. It is the responsibility of the caller to make sure that
172 /// this is only used if all parameters in the vector function are vector type.
173 /// This returned string holds scalar-to-vector mapping:
174 /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
175 ///
176 /// where:
177 ///
178 /// <isa> = "_LLVM_"
179 /// <mask> = "N". Note: TLI does not support masked interfaces.
180 /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
181 /// field of the `VecDesc` struct. If the number of lanes is scalable
182 /// then 'x' is printed instead.
183 /// <vparams> = "v", as many as are the numArgs.
184 /// <scalarname> = the name of the scalar function.
185 /// <vectorname> = the name of the vector function.
186 std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
187  unsigned numArgs, ElementCount VF);
188 
189 /// Retrieve the `VFParamKind` from a string token.
191 
192 // Name of the attribute where the variant mappings are stored.
193 static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
194 
195 /// Populates a set of strings representing the Vector Function ABI variants
196 /// associated to the CallInst CI. If the CI does not contain the
197 /// vector-function-abi-variant attribute, we return without populating
198 /// VariantMappings, i.e. callers of getVectorVariantNames need not check for
199 /// the presence of the attribute (see InjectTLIMappings).
200 void getVectorVariantNames(const CallInst &CI,
201  SmallVectorImpl<std::string> &VariantMappings);
202 } // end namespace VFABI
203 
204 /// The Vector Function Database.
205 ///
206 /// Helper class used to find the vector functions associated to a
207 /// scalar CallInst.
208 class VFDatabase {
209  /// The Module of the CallInst CI.
210  const Module *M;
211  /// The CallInst instance being queried for scalar to vector mappings.
212  const CallInst &CI;
213  /// List of vector functions descriptors associated to the call
214  /// instruction.
215  const SmallVector<VFInfo, 8> ScalarToVectorMappings;
216 
217  /// Retrieve the scalar-to-vector mappings associated to the rule of
218  /// a vector Function ABI.
219  static void getVFABIMappings(const CallInst &CI,
221  if (!CI.getCalledFunction())
222  return;
223 
224  const StringRef ScalarName = CI.getCalledFunction()->getName();
225 
226  SmallVector<std::string, 8> ListOfStrings;
227  // The check for the vector-function-abi-variant attribute is done when
228  // retrieving the vector variant names here.
229  VFABI::getVectorVariantNames(CI, ListOfStrings);
230  if (ListOfStrings.empty())
231  return;
232  for (const auto &MangledName : ListOfStrings) {
233  const Optional<VFInfo> Shape =
234  VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
235  // A match is found via scalar and vector names, and also by
236  // ensuring that the variant described in the attribute has a
237  // corresponding definition or declaration of the vector
238  // function in the Module M.
239  if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) {
240  assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&
241  "Vector function is missing.");
242  Mappings.push_back(Shape.getValue());
243  }
244  }
245  }
246 
247 public:
248  /// Retrieve all the VFInfo instances associated to the CallInst CI.
251 
252  // Get mappings from the Vector Function ABI variants.
253  getVFABIMappings(CI, Ret);
254 
255  // Other non-VFABI variants should be retrieved here.
256 
257  return Ret;
258  }
259 
260  /// Constructor, requires a CallInst instance.
262  : M(CI.getModule()), CI(CI),
263  ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
264  /// \defgroup VFDatabase query interface.
265  ///
266  /// @{
267  /// Retrieve the Function with VFShape \p Shape.
268  Function *getVectorizedFunction(const VFShape &Shape) const {
269  if (Shape == VFShape::getScalarShape(CI))
270  return CI.getCalledFunction();
271 
272  for (const auto &Info : ScalarToVectorMappings)
273  if (Info.Shape == Shape)
274  return M->getFunction(Info.VectorName);
275 
276  return nullptr;
277  }
278  /// @}
279 };
280 
281 template <typename T> class ArrayRef;
282 class DemandedBits;
283 class GetElementPtrInst;
284 template <typename InstTy> class InterleaveGroup;
285 class IRBuilderBase;
286 class Loop;
287 class ScalarEvolution;
288 class TargetTransformInfo;
289 class Type;
290 class Value;
291 
292 namespace Intrinsic {
293 typedef unsigned ID;
294 }
295 
296 /// A helper function for converting Scalar types to vector types. If
297 /// the incoming type is void, we return void. If the EC represents a
298 /// scalar, we return the scalar type.
299 inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
300  if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
301  return Scalar;
302  return VectorType::get(Scalar, EC);
303 }
304 
305 inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
306  return ToVectorTy(Scalar, ElementCount::getFixed(VF));
307 }
308 
309 /// Identify if the intrinsic is trivially vectorizable.
310 /// This method returns true if the intrinsic's argument types are all scalars
311 /// for the scalar form of the intrinsic and all vectors (or scalars handled by
312 /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
314 
315 /// Identifies if the vector form of the intrinsic has a scalar operand.
316 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
317 
318 /// Identifies if the vector form of the intrinsic has a scalar operand that has
319 /// an overloaded type.
321  unsigned ScalarOpdIdx);
322 
323 /// Returns intrinsic ID for call.
324 /// For the input call instruction it finds mapping intrinsic and returns
325 /// its intrinsic ID, in case it does not found it return not_intrinsic.
326 Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
327  const TargetLibraryInfo *TLI);
328 
329 /// Find the operand of the GEP that should be checked for consecutive
330 /// stores. This ignores trailing indices that have no effect on the final
331 /// pointer.
332 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
333 
334 /// If the argument is a GEP, then returns the operand identified by
335 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
336 /// operand, it returns that instead.
337 Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
338 
339 /// If a value has only one user that is a CastInst, return it.
340 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
341 
342 /// Get the stride of a pointer access in a loop. Looks for symbolic
343 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
344 Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
345 
346 /// Given a vector and an element number, see if the scalar value is
347 /// already around as a register, for example if it were inserted then extracted
348 /// from the vector.
349 Value *findScalarElement(Value *V, unsigned EltNo);
350 
351 /// If all non-negative \p Mask elements are the same value, return that value.
352 /// If all elements are negative (undefined) or \p Mask contains different
353 /// non-negative values, return -1.
354 int getSplatIndex(ArrayRef<int> Mask);
355 
356 /// Get splat value if the input is a splat vector or return nullptr.
357 /// The value may be extracted from a splat constants vector or from
358 /// a sequence of instructions that broadcast a single value into a vector.
359 Value *getSplatValue(const Value *V);
360 
361 /// Return true if each element of the vector value \p V is poisoned or equal to
362 /// every other non-poisoned element. If an index element is specified, either
363 /// every element of the vector is poisoned or the element at that index is not
364 /// poisoned and equal to every other non-poisoned element.
365 /// This may be more powerful than the related getSplatValue() because it is
366 /// not limited by finding a scalar source value to a splatted vector.
367 bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
368 
369 /// Replace each shuffle mask index with the scaled sequential indices for an
370 /// equivalent mask of narrowed elements. Mask elements that are less than 0
371 /// (sentinel values) are repeated in the output mask.
372 ///
373 /// Example with Scale = 4:
374 /// <4 x i32> <3, 2, 0, -1> -->
375 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
376 ///
377 /// This is the reverse process of widening shuffle mask elements, but it always
378 /// succeeds because the indexes can always be multiplied (scaled up) to map to
379 /// narrower vector elements.
380 void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
381  SmallVectorImpl<int> &ScaledMask);
382 
383 /// Try to transform a shuffle mask by replacing elements with the scaled index
384 /// for an equivalent mask of widened elements. If all mask elements that would
385 /// map to a wider element of the new mask are the same negative number
386 /// (sentinel value), that element of the new mask is the same value. If any
387 /// element in a given slice is negative and some other element in that slice is
388 /// not the same value, return false (partial matches with sentinel values are
389 /// not allowed).
390 ///
391 /// Example with Scale = 4:
392 /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
393 /// <4 x i32> <3, 2, 0, -1>
394 ///
395 /// This is the reverse process of narrowing shuffle mask elements if it
396 /// succeeds. This transform is not always possible because indexes may not
397 /// divide evenly (scale down) to map to wider vector elements.
398 bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
399  SmallVectorImpl<int> &ScaledMask);
400 
401 /// Compute a map of integer instructions to their minimum legal type
402 /// size.
403 ///
404 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
405 /// type (e.g. i32) whenever arithmetic is performed on them.
406 ///
407 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
408 /// the arithmetic type down again. However InstCombine refuses to create
409 /// illegal types, so for targets without i8 or i16 registers, the lengthening
410 /// and shrinking remains.
411 ///
412 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
413 /// their scalar equivalents do not, so during vectorization it is important to
414 /// remove these lengthens and truncates when deciding the profitability of
415 /// vectorization.
416 ///
417 /// This function analyzes the given range of instructions and determines the
418 /// minimum type size each can be converted to. It attempts to remove or
419 /// minimize type size changes across each def-use chain, so for example in the
420 /// following code:
421 ///
422 /// %1 = load i8, i8*
423 /// %2 = add i8 %1, 2
424 /// %3 = load i16, i16*
425 /// %4 = zext i8 %2 to i32
426 /// %5 = zext i16 %3 to i32
427 /// %6 = add i32 %4, %5
428 /// %7 = trunc i32 %6 to i16
429 ///
430 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
431 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
432 ///
433 /// If the optional TargetTransformInfo is provided, this function tries harder
434 /// to do less work by only looking at illegal types.
435 MapVector<Instruction*, uint64_t>
436 computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
437  DemandedBits &DB,
438  const TargetTransformInfo *TTI=nullptr);
439 
440 /// Compute the union of two access-group lists.
441 ///
442 /// If the list contains just one access group, it is returned directly. If the
443 /// list is empty, returns nullptr.
444 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
445 
446 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
447 /// are both in. If either instruction does not access memory at all, it is
448 /// considered to be in every list.
449 ///
450 /// If the list contains just one access group, it is returned directly. If the
451 /// list is empty, returns nullptr.
452 MDNode *intersectAccessGroups(const Instruction *Inst1,
453  const Instruction *Inst2);
454 
455 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
456 /// MD_nontemporal, MD_access_group].
457 /// For K in Kinds, we get the MDNode for K from each of the
458 /// elements of VL, compute their "intersection" (i.e., the most generic
459 /// metadata value that covers all of the individual values), and set I's
460 /// metadata for M equal to the intersection value.
461 ///
462 /// This function always sets a (possibly null) value for each K in Kinds.
463 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
464 
465 /// Create a mask that filters the members of an interleave group where there
466 /// are gaps.
467 ///
468 /// For example, the mask for \p Group with interleave-factor 3
469 /// and \p VF 4, that has only its first member present is:
470 ///
471 /// <1,0,0,1,0,0,1,0,0,1,0,0>
472 ///
473 /// Note: The result is a mask of 0's and 1's, as opposed to the other
474 /// create[*]Mask() utilities which create a shuffle mask (mask that
475 /// consists of indices).
476 Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
477  const InterleaveGroup<Instruction> &Group);
478 
479 /// Create a mask with replicated elements.
480 ///
481 /// This function creates a shuffle mask for replicating each of the \p VF
482 /// elements in a vector \p ReplicationFactor times. It can be used to
483 /// transform a mask of \p VF elements into a mask of
484 /// \p VF * \p ReplicationFactor elements used by a predicated
485 /// interleaved-group of loads/stores whose Interleaved-factor ==
486 /// \p ReplicationFactor.
487 ///
488 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
489 ///
490 /// <0,0,0,1,1,1,2,2,2,3,3,3>
491 llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
492  unsigned VF);
493 
494 /// Create an interleave shuffle mask.
495 ///
496 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
497 /// vectorization factor \p VF into a single wide vector. The mask is of the
498 /// form:
499 ///
500 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
501 ///
502 /// For example, the mask for VF = 4 and NumVecs = 2 is:
503 ///
504 /// <0, 4, 1, 5, 2, 6, 3, 7>.
505 llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
506 
507 /// Create a stride shuffle mask.
508 ///
509 /// This function creates a shuffle mask whose elements begin at \p Start and
510 /// are incremented by \p Stride. The mask can be used to deinterleave an
511 /// interleaved vector into separate vectors of vectorization factor \p VF. The
512 /// mask is of the form:
513 ///
514 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
515 ///
516 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
517 ///
518 /// <0, 2, 4, 6>
519 llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
520  unsigned VF);
521 
522 /// Create a sequential shuffle mask.
523 ///
524 /// This function creates shuffle mask whose elements are sequential and begin
525 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
526 /// NumUndefs undef values. The mask is of the form:
527 ///
528 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
529 ///
530 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
531 ///
532 /// <0, 1, 2, 3, undef, undef, undef, undef>
534 createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
535 
536 /// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
537 /// mask assuming both operands are identical. This assumes that the unary
538 /// shuffle will use elements from operand 0 (operand 1 will be unused).
540  unsigned NumElts);
541 
542 /// Concatenate a list of vectors.
543 ///
544 /// This function generates code that concatenate the vectors in \p Vecs into a
545 /// single large vector. The number of vectors should be greater than one, and
546 /// their element types should be the same. The number of elements in the
547 /// vectors should also be the same; however, if the last vector has fewer
548 /// elements, it will be padded with undefs.
549 Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
550 
551 /// Given a mask vector of i1, Return true if all of the elements of this
552 /// predicate mask are known to be false or undef. That is, return true if all
553 /// lanes can be assumed inactive.
555 
556 /// Given a mask vector of i1, Return true if all of the elements of this
557 /// predicate mask are known to be true or undef. That is, return true if all
558 /// lanes can be assumed active.
560 
561 /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
562 /// for each lane which may be active.
564 
565 /// The group of interleaved loads/stores sharing the same stride and
566 /// close to each other.
567 ///
568 /// Each member in this group has an index starting from 0, and the largest
569 /// index should be less than interleaved factor, which is equal to the absolute
570 /// value of the access's stride.
571 ///
572 /// E.g. An interleaved load group of factor 4:
573 /// for (unsigned i = 0; i < 1024; i+=4) {
574 /// a = A[i]; // Member of index 0
575 /// b = A[i+1]; // Member of index 1
576 /// d = A[i+3]; // Member of index 3
577 /// ...
578 /// }
579 ///
580 /// An interleaved store group of factor 4:
581 /// for (unsigned i = 0; i < 1024; i+=4) {
582 /// ...
583 /// A[i] = a; // Member of index 0
584 /// A[i+1] = b; // Member of index 1
585 /// A[i+2] = c; // Member of index 2
586 /// A[i+3] = d; // Member of index 3
587 /// }
588 ///
589 /// Note: the interleaved load group could have gaps (missing members), but
590 /// the interleaved store group doesn't allow gaps.
591 template <typename InstTy> class InterleaveGroup {
592 public:
593  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
594  : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
595  InsertPos(nullptr) {}
596 
597  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
598  : Alignment(Alignment), InsertPos(Instr) {
599  Factor = std::abs(Stride);
600  assert(Factor > 1 && "Invalid interleave factor");
601 
602  Reverse = Stride < 0;
603  Members[0] = Instr;
604  }
605 
606  bool isReverse() const { return Reverse; }
607  uint32_t getFactor() const { return Factor; }
608  Align getAlign() const { return Alignment; }
609  uint32_t getNumMembers() const { return Members.size(); }
610 
611  /// Try to insert a new member \p Instr with index \p Index and
612  /// alignment \p NewAlign. The index is related to the leader and it could be
613  /// negative if it is the new leader.
614  ///
615  /// \returns false if the instruction doesn't belong to the group.
616  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
617  // Make sure the key fits in an int32_t.
618  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
619  if (!MaybeKey)
620  return false;
621  int32_t Key = *MaybeKey;
622 
623  // Skip if the key is used for either the tombstone or empty special values.
626  return false;
627 
628  // Skip if there is already a member with the same index.
629  if (Members.find(Key) != Members.end())
630  return false;
631 
632  if (Key > LargestKey) {
633  // The largest index is always less than the interleave factor.
634  if (Index >= static_cast<int32_t>(Factor))
635  return false;
636 
637  LargestKey = Key;
638  } else if (Key < SmallestKey) {
639 
640  // Make sure the largest index fits in an int32_t.
641  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
642  if (!MaybeLargestIndex)
643  return false;
644 
645  // The largest index is always less than the interleave factor.
646  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
647  return false;
648 
649  SmallestKey = Key;
650  }
651 
652  // It's always safe to select the minimum alignment.
653  Alignment = std::min(Alignment, NewAlign);
654  Members[Key] = Instr;
655  return true;
656  }
657 
658  /// Get the member with the given index \p Index
659  ///
660  /// \returns nullptr if contains no such member.
661  InstTy *getMember(uint32_t Index) const {
662  int32_t Key = SmallestKey + Index;
663  return Members.lookup(Key);
664  }
665 
666  /// Get the index for the given member. Unlike the key in the member
667  /// map, the index starts from 0.
668  uint32_t getIndex(const InstTy *Instr) const {
669  for (auto I : Members) {
670  if (I.second == Instr)
671  return I.first - SmallestKey;
672  }
673 
674  llvm_unreachable("InterleaveGroup contains no such member");
675  }
676 
677  InstTy *getInsertPos() const { return InsertPos; }
678  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
679 
680  /// Add metadata (e.g. alias info) from the instructions in this group to \p
681  /// NewInst.
682  ///
683  /// FIXME: this function currently does not add noalias metadata a'la
684  /// addNewMedata. To do that we need to compute the intersection of the
685  /// noalias info from all members.
686  void addMetadata(InstTy *NewInst) const;
687 
688  /// Returns true if this Group requires a scalar iteration to handle gaps.
689  bool requiresScalarEpilogue() const {
690  // If the last member of the Group exists, then a scalar epilog is not
691  // needed for this group.
692  if (getMember(getFactor() - 1))
693  return false;
694 
695  // We have a group with gaps. It therefore can't be a reversed access,
696  // because such groups get invalidated (TODO).
697  assert(!isReverse() && "Group should have been invalidated");
698 
699  // This is a group of loads, with gaps, and without a last-member
700  return true;
701  }
702 
703 private:
704  uint32_t Factor; // Interleave Factor.
705  bool Reverse;
706  Align Alignment;
708  int32_t SmallestKey = 0;
709  int32_t LargestKey = 0;
710 
711  // To avoid breaking dependences, vectorized instructions of an interleave
712  // group should be inserted at either the first load or the last store in
713  // program order.
714  //
715  // E.g. %even = load i32 // Insert Position
716  // %add = add i32 %even // Use of %even
717  // %odd = load i32
718  //
719  // store i32 %even
720  // %odd = add i32 // Def of %odd
721  // store i32 %odd // Insert Position
722  InstTy *InsertPos;
723 };
724 
725 /// Drive the analysis of interleaved memory accesses in the loop.
726 ///
727 /// Use this class to analyze interleaved accesses only when we can vectorize
728 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
729 /// on interleaved accesses is unsafe.
730 ///
731 /// The analysis collects interleave groups and records the relationships
732 /// between the member and the group in a map.
734 public:
736  DominatorTree *DT, LoopInfo *LI,
737  const LoopAccessInfo *LAI)
738  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
739 
741 
742  /// Analyze the interleaved accesses and collect them in interleave
743  /// groups. Substitute symbolic strides using \p Strides.
744  /// Consider also predicated loads/stores in the analysis if
745  /// \p EnableMaskedInterleavedGroup is true.
746  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
747 
748  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
749  /// contrary to original assumption. Although we currently prevent group
750  /// formation for predicated accesses, we may be able to relax this limitation
751  /// in the future once we handle more complicated blocks. Returns true if any
752  /// groups were invalidated.
754  if (InterleaveGroups.empty()) {
755  assert(
756  !RequiresScalarEpilogue &&
757  "RequiresScalarEpilog should not be set without interleave groups");
758  return false;
759  }
760 
761  InterleaveGroupMap.clear();
762  for (auto *Ptr : InterleaveGroups)
763  delete Ptr;
764  InterleaveGroups.clear();
765  RequiresScalarEpilogue = false;
766  return true;
767  }
768 
769  /// Check if \p Instr belongs to any interleave group.
770  bool isInterleaved(Instruction *Instr) const {
771  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
772  }
773 
774  /// Get the interleave group that \p Instr belongs to.
775  ///
776  /// \returns nullptr if doesn't have such group.
778  getInterleaveGroup(const Instruction *Instr) const {
779  return InterleaveGroupMap.lookup(Instr);
780  }
781 
784  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
785  }
786 
787  /// Returns true if an interleaved group that may access memory
788  /// out-of-bounds requires a scalar epilogue iteration for correctness.
789  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
790 
791  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
792  /// happen when optimizing for size forbids a scalar epilogue, and the gap
793  /// cannot be filtered by masking the load/store.
795 
796 private:
797  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
798  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
799  /// The interleaved access analysis can also add new predicates (for example
800  /// by versioning strides of pointers).
802 
803  Loop *TheLoop;
804  DominatorTree *DT;
805  LoopInfo *LI;
806  const LoopAccessInfo *LAI;
807 
808  /// True if the loop may contain non-reversed interleaved groups with
809  /// out-of-bounds accesses. We ensure we don't speculatively access memory
810  /// out-of-bounds by executing at least one scalar epilogue iteration.
811  bool RequiresScalarEpilogue = false;
812 
813  /// Holds the relationships between the members and the interleave group.
815 
816  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
817 
818  /// Holds dependences among the memory accesses in the loop. It maps a source
819  /// access to a set of dependent sink accesses.
821 
822  /// The descriptor for a strided memory access.
823  struct StrideDescriptor {
824  StrideDescriptor() = default;
825  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
826  Align Alignment)
827  : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
828 
829  // The access's stride. It is negative for a reverse access.
830  int64_t Stride = 0;
831 
832  // The scalar expression of this access.
833  const SCEV *Scev = nullptr;
834 
835  // The size of the memory object.
836  uint64_t Size = 0;
837 
838  // The alignment of this access.
839  Align Alignment;
840  };
841 
842  /// A type for holding instructions and their stride descriptors.
843  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
844 
845  /// Create a new interleave group with the given instruction \p Instr,
846  /// stride \p Stride and alignment \p Align.
847  ///
848  /// \returns the newly created interleave group.
849  InterleaveGroup<Instruction> *
850  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
851  assert(!InterleaveGroupMap.count(Instr) &&
852  "Already in an interleaved access group");
853  InterleaveGroupMap[Instr] =
854  new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
855  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
856  return InterleaveGroupMap[Instr];
857  }
858 
859  /// Release the group and remove all the relationships.
860  void releaseGroup(InterleaveGroup<Instruction> *Group) {
861  for (unsigned i = 0; i < Group->getFactor(); i++)
862  if (Instruction *Member = Group->getMember(i))
863  InterleaveGroupMap.erase(Member);
864 
865  InterleaveGroups.erase(Group);
866  delete Group;
867  }
868 
869  /// Collect all the accesses with a constant stride in program order.
870  void collectConstStrideAccesses(
871  MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
872  const ValueToValueMap &Strides);
873 
874  /// Returns true if \p Stride is allowed in an interleaved group.
875  static bool isStrided(int Stride);
876 
877  /// Returns true if \p BB is a predicated block.
878  bool isPredicated(BasicBlock *BB) const {
879  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
880  }
881 
882  /// Returns true if LoopAccessInfo can be used for dependence queries.
883  bool areDependencesValid() const {
884  return LAI && LAI->getDepChecker().getDependences();
885  }
886 
887  /// Returns true if memory accesses \p A and \p B can be reordered, if
888  /// necessary, when constructing interleaved groups.
889  ///
890  /// \p A must precede \p B in program order. We return false if reordering is
891  /// not necessary or is prevented because \p A and \p B may be dependent.
892  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
893  StrideEntry *B) const {
894  // Code motion for interleaved accesses can potentially hoist strided loads
895  // and sink strided stores. The code below checks the legality of the
896  // following two conditions:
897  //
898  // 1. Potentially moving a strided load (B) before any store (A) that
899  // precedes B, or
900  //
901  // 2. Potentially moving a strided store (A) after any load or store (B)
902  // that A precedes.
903  //
904  // It's legal to reorder A and B if we know there isn't a dependence from A
905  // to B. Note that this determination is conservative since some
906  // dependences could potentially be reordered safely.
907 
908  // A is potentially the source of a dependence.
909  auto *Src = A->first;
910  auto SrcDes = A->second;
911 
912  // B is potentially the sink of a dependence.
913  auto *Sink = B->first;
914  auto SinkDes = B->second;
915 
916  // Code motion for interleaved accesses can't violate WAR dependences.
917  // Thus, reordering is legal if the source isn't a write.
918  if (!Src->mayWriteToMemory())
919  return true;
920 
921  // At least one of the accesses must be strided.
922  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
923  return true;
924 
925  // If dependence information is not available from LoopAccessInfo,
926  // conservatively assume the instructions can't be reordered.
927  if (!areDependencesValid())
928  return false;
929 
930  // If we know there is a dependence from source to sink, assume the
931  // instructions can't be reordered. Otherwise, reordering is legal.
932  return Dependences.find(Src) == Dependences.end() ||
933  !Dependences.lookup(Src).count(Sink);
934  }
935 
936  /// Collect the dependences from LoopAccessInfo.
937  ///
938  /// We process the dependences once during the interleaved access analysis to
939  /// enable constant-time dependence queries.
940  void collectDependences() {
941  if (!areDependencesValid())
942  return;
943  auto *Deps = LAI->getDepChecker().getDependences();
944  for (auto Dep : *Deps)
945  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
946  }
947 };
948 
949 } // llvm namespace
950 
951 #endif
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition: VectorUtils.h:608
llvm::VFInfo::Shape
VFShape Shape
Definition: VectorUtils.h:124
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:929
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::VFParameter::Alignment
Align Alignment
Definition: VectorUtils.h:66
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::VFParamKind::Vector
@ Vector
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::VFISAKind::AVX512
@ AVX512
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:616
llvm::ElementCount
Definition: TypeSize.h:386
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:130
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:147
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:957
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:593
llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2113
llvm::VFISAKind::Unknown
@ Unknown
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::InterleavedAccessInfo::InterleavedAccessInfo
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:735
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:366
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
LoopAccessAnalysis.h
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1377
MapVector.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:903
llvm::VFParameter::ParamKind
VFParamKind ParamKind
Definition: VectorUtils.h:64
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::VFShape::getScalarShape
static VFShape getScalarShape(const CallInst &CI)
Definition: VectorUtils.h:99
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:500
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::VFParameter::ParamPos
unsigned ParamPos
Definition: VectorUtils.h:63
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:427
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:188
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:833
llvm::Optional
Definition: APInt.h:33
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:726
llvm::VFShape::Parameters
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:84
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:178
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:658
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:284
llvm::VFISAKind
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:44
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Sanity check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1398
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:172
llvm::VFISAKind::SSE
@ SSE
llvm::VFISAKind::SVE
@ SVE
llvm::DenseMapInfo
Definition: APInt.h:34
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:381
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1361
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1320
llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:597
llvm::VFParamKind
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:25
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:801
llvm::VFInfo::VectorName
std::string VectorName
Scalar Function Name.
Definition: VectorUtils.h:126
llvm::checkedSub
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:57
llvm::VFDatabase
The Vector Function Database.
Definition: VectorUtils.h:208
CheckedArithmetic.h
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2145
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:857
llvm::VFInfo
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:123
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:204
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1383
llvm::InterleavedAccessInfo::~InterleavedAccessInfo
~InterleavedAccessInfo()
Definition: VectorUtils.h:740
llvm::ToVectorTy
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:299
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
llvm::VFISAKind::LLVM
@ LLVM
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::VFISAKind::AVX2
@ AVX2
llvm::Instruction
Definition: Instruction.h:45
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:733
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:224
llvm::VFShape
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:82
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:606
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:661
llvm::InterleavedAccessInfo::getInterleaveGroups
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:783
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::InterleavedAccessInfo::isInterleaved
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:770
llvm::checkedAdd
std::enable_if_t< std::is_signed< T >::value, llvm::Optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Definition: CheckedArithmetic.h:48
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:678
llvm::VFParameter
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:62
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition: VectorUtils.h:677
llvm::VFParameter::LinearStepOrPos
int LinearStepOrPos
Definition: VectorUtils.h:65
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:668
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
llvm::hasVectorInstrinsicOverloadedScalarOpd
bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand that has an overloaded type.
Definition: VectorUtils.cpp:117
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:607
llvm::VFInfo::ISA
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Definition: VectorUtils.h:127
llvm::VFDatabase::getVectorizedFunction
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:268
llvm::DenseMap< int32_t, InstTy * >
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:515
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:875
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1052
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:772
llvm::VFInfo::ScalarName
std::string ScalarName
Classification of the vector function.
Definition: VectorUtils.h:125
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::VFShape::operator==
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:86
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:557
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:679
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::VFParamKind::Unknown
@ Unknown
llvm::VFISAKind::AdvancedSIMD
@ AdvancedSIMD
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::cl::Sink
@ Sink
Definition: CommandLine.h:171
uint32_t
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:792
llvm::InterleaveGroup::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:689
llvm::VFABI::getVFParamKindFromString
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
Definition: VFABIDemangling.cpp:461
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:448
llvm::VFShape::VF
ElementCount VF
Definition: VectorUtils.h:83
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1326
llvm::VFShape::updateParam
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Definition: VectorUtils.h:91
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:100
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:280
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
llvm::InterleavedAccessInfo::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:789
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:344
SmallVector.h
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:193
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:171
llvm::VFShape::get
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
Definition: VectorUtils.h:108
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::SmallVectorImpl< std::string >
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:319
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:812
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::VFABI::_LLVM_Scalarize_
static constexpr const char * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
Definition: VectorUtils.h:139
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:820
llvm::VFISAKind::AVX
@ AVX
llvm::InterleavedAccessInfo::invalidateGroups
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:753
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:670
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::VFDatabase::VFDatabase
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:261
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:609
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1184
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1347
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::VFParameter::operator==
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:69
llvm::InterleavedAccessInfo::getInterleaveGroup
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:778