LLVM 17.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
20
21namespace llvm {
22class TargetLibraryInfo;
23
24/// Describes the type of Parameters
25enum class VFParamKind {
26 Vector, // No semantic information.
27 OMP_Linear, // declare simd linear(i)
28 OMP_LinearRef, // declare simd linear(ref(i))
29 OMP_LinearVal, // declare simd linear(val(i))
30 OMP_LinearUVal, // declare simd linear(uval(i))
31 OMP_LinearPos, // declare simd linear(i:c) uniform(c)
32 OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
33 OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
34 OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
35 OMP_Uniform, // declare simd uniform(i)
36 GlobalPredicate, // Global logical predicate that acts on all lanes
37 // of the input and output mask concurrently. For
38 // example, it is implied by the `M` token in the
39 // Vector Function ABI mangled name.
41};
42
43/// Describes the type of Instruction Set Architecture
44enum class VFISAKind {
45 AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
46 SVE, // AArch64 Scalable Vector Extension
47 SSE, // x86 SSE
48 AVX, // x86 AVX
49 AVX2, // x86 AVX2
50 AVX512, // x86 AVX512
51 LLVM, // LLVM internal ISA for functions that are not
52 // attached to an existing ABI via name mangling.
53 Unknown // Unknown ISA
54};
55
56/// Encapsulates information needed to describe a parameter.
57///
58/// The description of the parameter is not linked directly to
59/// OpenMP or any other vector function description. This structure
60/// is extendible to handle other paradigms that describe vector
61/// functions and their parameters.
63 unsigned ParamPos; // Parameter Position in Scalar Function.
64 VFParamKind ParamKind; // Kind of Parameter.
65 int LinearStepOrPos = 0; // Step or Position of the Parameter.
66 Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
67
68 // Comparison operator.
69 bool operator==(const VFParameter &Other) const {
70 return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
71 std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
72 Other.Alignment);
73 }
74};
75
76/// Contains the information about the kind of vectorization
77/// available.
78///
79/// This object in independent on the paradigm used to
80/// represent vector functions. in particular, it is not attached to
81/// any target-specific ABI.
82struct VFShape {
83 ElementCount VF; // Vectorization factor.
84 SmallVector<VFParameter, 8> Parameters; // List of parameter information.
85 // Comparison operator.
86 bool operator==(const VFShape &Other) const {
87 return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
88 }
89
90 /// Update the parameter in position P.ParamPos to P.
92 assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
93 Parameters[P.ParamPos] = P;
94 assert(hasValidParameterList() && "Invalid parameter list");
95 }
96
97 // Retrieve the VFShape that can be used to map a (scalar) function to itself,
98 // with VF = 1.
99 static VFShape getScalarShape(const CallInst &CI) {
101 /*HasGlobalPredicate*/ false);
102 }
103
104 // Retrieve the basic vectorization shape of the function, where all
105 // parameters are mapped to VFParamKind::Vector with \p EC
106 // lanes. Specifies whether the function has a Global Predicate
107 // argument via \p HasGlobalPred.
108 static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
110 for (unsigned I = 0; I < CI.arg_size(); ++I)
112 if (HasGlobalPred)
113 Parameters.push_back(
115
116 return {EC, Parameters};
117 }
118 /// Validation check on the Parameters in the VFShape.
119 bool hasValidParameterList() const;
120};
121
122/// Holds the VFShape for a specific scalar to vector function mapping.
123struct VFInfo {
124 VFShape Shape; /// Classification of the vector function.
125 std::string ScalarName; /// Scalar Function Name.
126 std::string VectorName; /// Vector Function Name associated to this VFInfo.
127 VFISAKind ISA; /// Instruction Set Architecture.
128
129 /// Returns the index of the first parameter with the kind 'GlobalPredicate',
130 /// if any exist.
131 std::optional<unsigned> getParamIndexForOptionalMask() const {
132 unsigned ParamCount = Shape.Parameters.size();
133 for (unsigned i = 0; i < ParamCount; ++i)
135 return i;
136
137 return std::nullopt;
138 }
139
140 /// Returns true if at least one of the operands to the vectorized function
141 /// has the kind 'GlobalPredicate'.
142 bool isMasked() const { return getParamIndexForOptionalMask().has_value(); }
143};
144
145namespace VFABI {
146/// LLVM Internal VFABI ISA token for vector functions.
147static constexpr char const *_LLVM_ = "_LLVM_";
148/// Prefix for internal name redirection for vector function that
149/// tells the compiler to scalarize the call using the scalar name
150/// of the function. For example, a mangled name like
151/// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
152/// vectorizer to vectorize the scalar call `foo`, and to scalarize
153/// it once vectorization is done.
154static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
155
156/// Function to construct a VFInfo out of a mangled names in the
157/// following format:
158///
159/// <VFABI_name>{(<redirection>)}
160///
161/// where <VFABI_name> is the name of the vector function, mangled according
162/// to the rules described in the Vector Function ABI of the target vector
163/// extension (or <isa> from now on). The <VFABI_name> is in the following
164/// format:
165///
166/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
167///
168/// This methods support demangling rules for the following <isa>:
169///
170/// * AArch64: https://developer.arm.com/docs/101129/latest
171///
172/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
173/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
174///
175/// \param MangledName -> input string in the format
176/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
177/// \param M -> Module used to retrieve informations about the vector
178/// function that are not possible to retrieve from the mangled
179/// name. At the moment, this parameter is needed only to retrieve the
180/// Vectorization Factor of scalable vector functions from their
181/// respective IR declarations.
182std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName,
183 const Module &M);
184
185/// This routine mangles the given VectorName according to the LangRef
186/// specification for vector-function-abi-variant attribute and is specific to
187/// the TLI mappings. It is the responsibility of the caller to make sure that
188/// this is only used if all parameters in the vector function are vector type.
189/// This returned string holds scalar-to-vector mapping:
190/// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
191///
192/// where:
193///
194/// <isa> = "_LLVM_"
195/// <mask> = "M" if masked, "N" if no mask.
196/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
197/// field of the `VecDesc` struct. If the number of lanes is scalable
198/// then 'x' is printed instead.
199/// <vparams> = "v", as many as are the numArgs.
200/// <scalarname> = the name of the scalar function.
201/// <vectorname> = the name of the vector function.
202std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
203 unsigned numArgs, ElementCount VF,
204 bool Masked = false);
205
206/// Retrieve the `VFParamKind` from a string token.
208
209// Name of the attribute where the variant mappings are stored.
210static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
211
212/// Populates a set of strings representing the Vector Function ABI variants
213/// associated to the CallInst CI. If the CI does not contain the
214/// vector-function-abi-variant attribute, we return without populating
215/// VariantMappings, i.e. callers of getVectorVariantNames need not check for
216/// the presence of the attribute (see InjectTLIMappings).
217void getVectorVariantNames(const CallInst &CI,
218 SmallVectorImpl<std::string> &VariantMappings);
219} // end namespace VFABI
220
221/// The Vector Function Database.
222///
223/// Helper class used to find the vector functions associated to a
224/// scalar CallInst.
226 /// The Module of the CallInst CI.
227 const Module *M;
228 /// The CallInst instance being queried for scalar to vector mappings.
229 const CallInst &CI;
230 /// List of vector functions descriptors associated to the call
231 /// instruction.
232 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
233
234 /// Retrieve the scalar-to-vector mappings associated to the rule of
235 /// a vector Function ABI.
236 static void getVFABIMappings(const CallInst &CI,
238 if (!CI.getCalledFunction())
239 return;
240
241 const StringRef ScalarName = CI.getCalledFunction()->getName();
242
243 SmallVector<std::string, 8> ListOfStrings;
244 // The check for the vector-function-abi-variant attribute is done when
245 // retrieving the vector variant names here.
246 VFABI::getVectorVariantNames(CI, ListOfStrings);
247 if (ListOfStrings.empty())
248 return;
249 for (const auto &MangledName : ListOfStrings) {
250 const std::optional<VFInfo> Shape =
251 VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
252 // A match is found via scalar and vector names, and also by
253 // ensuring that the variant described in the attribute has a
254 // corresponding definition or declaration of the vector
255 // function in the Module M.
256 if (Shape && (Shape->ScalarName == ScalarName)) {
257 assert(CI.getModule()->getFunction(Shape->VectorName) &&
258 "Vector function is missing.");
259 Mappings.push_back(*Shape);
260 }
261 }
262 }
263
264public:
265 /// Retrieve all the VFInfo instances associated to the CallInst CI.
268
269 // Get mappings from the Vector Function ABI variants.
270 getVFABIMappings(CI, Ret);
271
272 // Other non-VFABI variants should be retrieved here.
273
274 return Ret;
275 }
276
277 /// Constructor, requires a CallInst instance.
279 : M(CI.getModule()), CI(CI),
280 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
281 /// \defgroup VFDatabase query interface.
282 ///
283 /// @{
284 /// Retrieve the Function with VFShape \p Shape.
286 if (Shape == VFShape::getScalarShape(CI))
287 return CI.getCalledFunction();
288
289 for (const auto &Info : ScalarToVectorMappings)
290 if (Info.Shape == Shape)
291 return M->getFunction(Info.VectorName);
292
293 return nullptr;
294 }
295 /// @}
296};
297
298template <typename T> class ArrayRef;
299class DemandedBits;
300class GetElementPtrInst;
301template <typename InstTy> class InterleaveGroup;
302class IRBuilderBase;
303class Loop;
304class ScalarEvolution;
305class TargetTransformInfo;
306class Type;
307class Value;
308
309namespace Intrinsic {
310typedef unsigned ID;
311}
312
313/// A helper function for converting Scalar types to vector types. If
314/// the incoming type is void, we return void. If the EC represents a
315/// scalar, we return the scalar type.
316inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
317 if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
318 return Scalar;
319 return VectorType::get(Scalar, EC);
320}
321
322inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
323 return ToVectorTy(Scalar, ElementCount::getFixed(VF));
324}
325
326/// Identify if the intrinsic is trivially vectorizable.
327/// This method returns true if the intrinsic's argument types are all scalars
328/// for the scalar form of the intrinsic and all vectors (or scalars handled by
329/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
331
332/// Identifies if the vector form of the intrinsic has a scalar operand.
334 unsigned ScalarOpdIdx);
335
336/// Identifies if the vector form of the intrinsic has a operand that has
337/// an overloaded type.
339
340/// Returns intrinsic ID for call.
341/// For the input call instruction it finds mapping intrinsic and returns
342/// its intrinsic ID, in case it does not found it return not_intrinsic.
344 const TargetLibraryInfo *TLI);
345
346/// Find the operand of the GEP that should be checked for consecutive
347/// stores. This ignores trailing indices that have no effect on the final
348/// pointer.
349unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
350
351/// If the argument is a GEP, then returns the operand identified by
352/// getGEPInductionOperand. However, if there is some other non-loop-invariant
353/// operand, it returns that instead.
354Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
355
356/// If a value has only one user that is a CastInst, return it.
357Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
358
359/// Get the stride of a pointer access in a loop. Looks for symbolic
360/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
361Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
362
363/// Given a vector and an element number, see if the scalar value is
364/// already around as a register, for example if it were inserted then extracted
365/// from the vector.
366Value *findScalarElement(Value *V, unsigned EltNo);
367
368/// If all non-negative \p Mask elements are the same value, return that value.
369/// If all elements are negative (undefined) or \p Mask contains different
370/// non-negative values, return -1.
371int getSplatIndex(ArrayRef<int> Mask);
372
373/// Get splat value if the input is a splat vector or return nullptr.
374/// The value may be extracted from a splat constants vector or from
375/// a sequence of instructions that broadcast a single value into a vector.
376Value *getSplatValue(const Value *V);
377
378/// Return true if each element of the vector value \p V is poisoned or equal to
379/// every other non-poisoned element. If an index element is specified, either
380/// every element of the vector is poisoned or the element at that index is not
381/// poisoned and equal to every other non-poisoned element.
382/// This may be more powerful than the related getSplatValue() because it is
383/// not limited by finding a scalar source value to a splatted vector.
384bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
385
386/// Transform a shuffle mask's output demanded element mask into demanded
387/// element masks for the 2 operands, returns false if the mask isn't valid.
388/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
389/// \p AllowUndefElts permits "-1" indices to be treated as undef.
390bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
391 const APInt &DemandedElts, APInt &DemandedLHS,
392 APInt &DemandedRHS, bool AllowUndefElts = false);
393
394/// Replace each shuffle mask index with the scaled sequential indices for an
395/// equivalent mask of narrowed elements. Mask elements that are less than 0
396/// (sentinel values) are repeated in the output mask.
397///
398/// Example with Scale = 4:
399/// <4 x i32> <3, 2, 0, -1> -->
400/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
401///
402/// This is the reverse process of widening shuffle mask elements, but it always
403/// succeeds because the indexes can always be multiplied (scaled up) to map to
404/// narrower vector elements.
405void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
406 SmallVectorImpl<int> &ScaledMask);
407
408/// Try to transform a shuffle mask by replacing elements with the scaled index
409/// for an equivalent mask of widened elements. If all mask elements that would
410/// map to a wider element of the new mask are the same negative number
411/// (sentinel value), that element of the new mask is the same value. If any
412/// element in a given slice is negative and some other element in that slice is
413/// not the same value, return false (partial matches with sentinel values are
414/// not allowed).
415///
416/// Example with Scale = 4:
417/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
418/// <4 x i32> <3, 2, 0, -1>
419///
420/// This is the reverse process of narrowing shuffle mask elements if it
421/// succeeds. This transform is not always possible because indexes may not
422/// divide evenly (scale down) to map to wider vector elements.
423bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
424 SmallVectorImpl<int> &ScaledMask);
425
426/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
427/// to get the shuffle mask with widest possible elements.
428void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,
429 SmallVectorImpl<int> &ScaledMask);
430
431/// Splits and processes shuffle mask depending on the number of input and
432/// output registers. The function does 2 main things: 1) splits the
433/// source/destination vectors into real registers; 2) do the mask analysis to
434/// identify which real registers are permuted. Then the function processes
435/// resulting registers mask using provided action items. If no input register
436/// is defined, \p NoInputAction action is used. If only 1 input register is
437/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
438/// process > 2 input registers and masks.
439/// \param Mask Original shuffle mask.
440/// \param NumOfSrcRegs Number of source registers.
441/// \param NumOfDestRegs Number of destination registers.
442/// \param NumOfUsedRegs Number of actually used destination registers.
444 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
445 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
446 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
447 function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
448
449/// Compute a map of integer instructions to their minimum legal type
450/// size.
451///
452/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
453/// type (e.g. i32) whenever arithmetic is performed on them.
454///
455/// For targets with native i8 or i16 operations, usually InstCombine can shrink
456/// the arithmetic type down again. However InstCombine refuses to create
457/// illegal types, so for targets without i8 or i16 registers, the lengthening
458/// and shrinking remains.
459///
460/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
461/// their scalar equivalents do not, so during vectorization it is important to
462/// remove these lengthens and truncates when deciding the profitability of
463/// vectorization.
464///
465/// This function analyzes the given range of instructions and determines the
466/// minimum type size each can be converted to. It attempts to remove or
467/// minimize type size changes across each def-use chain, so for example in the
468/// following code:
469///
470/// %1 = load i8, i8*
471/// %2 = add i8 %1, 2
472/// %3 = load i16, i16*
473/// %4 = zext i8 %2 to i32
474/// %5 = zext i16 %3 to i32
475/// %6 = add i32 %4, %5
476/// %7 = trunc i32 %6 to i16
477///
478/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
479/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
480///
481/// If the optional TargetTransformInfo is provided, this function tries harder
482/// to do less work by only looking at illegal types.
483MapVector<Instruction*, uint64_t>
484computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
485 DemandedBits &DB,
486 const TargetTransformInfo *TTI=nullptr);
487
488/// Compute the union of two access-group lists.
489///
490/// If the list contains just one access group, it is returned directly. If the
491/// list is empty, returns nullptr.
492MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
493
494/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
495/// are both in. If either instruction does not access memory at all, it is
496/// considered to be in every list.
497///
498/// If the list contains just one access group, it is returned directly. If the
499/// list is empty, returns nullptr.
500MDNode *intersectAccessGroups(const Instruction *Inst1,
501 const Instruction *Inst2);
502
503/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
504/// MD_nontemporal, MD_access_group].
505/// For K in Kinds, we get the MDNode for K from each of the
506/// elements of VL, compute their "intersection" (i.e., the most generic
507/// metadata value that covers all of the individual values), and set I's
508/// metadata for M equal to the intersection value.
509///
510/// This function always sets a (possibly null) value for each K in Kinds.
511Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
512
513/// Create a mask that filters the members of an interleave group where there
514/// are gaps.
515///
516/// For example, the mask for \p Group with interleave-factor 3
517/// and \p VF 4, that has only its first member present is:
518///
519/// <1,0,0,1,0,0,1,0,0,1,0,0>
520///
521/// Note: The result is a mask of 0's and 1's, as opposed to the other
522/// create[*]Mask() utilities which create a shuffle mask (mask that
523/// consists of indices).
524Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
525 const InterleaveGroup<Instruction> &Group);
526
527/// Create a mask with replicated elements.
528///
529/// This function creates a shuffle mask for replicating each of the \p VF
530/// elements in a vector \p ReplicationFactor times. It can be used to
531/// transform a mask of \p VF elements into a mask of
532/// \p VF * \p ReplicationFactor elements used by a predicated
533/// interleaved-group of loads/stores whose Interleaved-factor ==
534/// \p ReplicationFactor.
535///
536/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
537///
538/// <0,0,0,1,1,1,2,2,2,3,3,3>
539llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
540 unsigned VF);
541
542/// Create an interleave shuffle mask.
543///
544/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
545/// vectorization factor \p VF into a single wide vector. The mask is of the
546/// form:
547///
548/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
549///
550/// For example, the mask for VF = 4 and NumVecs = 2 is:
551///
552/// <0, 4, 1, 5, 2, 6, 3, 7>.
553llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
554
555/// Create a stride shuffle mask.
556///
557/// This function creates a shuffle mask whose elements begin at \p Start and
558/// are incremented by \p Stride. The mask can be used to deinterleave an
559/// interleaved vector into separate vectors of vectorization factor \p VF. The
560/// mask is of the form:
561///
562/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
563///
564/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
565///
566/// <0, 2, 4, 6>
567llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
568 unsigned VF);
569
570/// Create a sequential shuffle mask.
571///
572/// This function creates shuffle mask whose elements are sequential and begin
573/// at \p Start. The mask contains \p NumInts integers and is padded with \p
574/// NumUndefs undef values. The mask is of the form:
575///
576/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
577///
578/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
579///
580/// <0, 1, 2, 3, undef, undef, undef, undef>
582createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
583
584/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
585/// mask assuming both operands are identical. This assumes that the unary
586/// shuffle will use elements from operand 0 (operand 1 will be unused).
588 unsigned NumElts);
589
590/// Concatenate a list of vectors.
591///
592/// This function generates code that concatenate the vectors in \p Vecs into a
593/// single large vector. The number of vectors should be greater than one, and
594/// their element types should be the same. The number of elements in the
595/// vectors should also be the same; however, if the last vector has fewer
596/// elements, it will be padded with undefs.
597Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
598
599/// Given a mask vector of i1, Return true if all of the elements of this
600/// predicate mask are known to be false or undef. That is, return true if all
601/// lanes can be assumed inactive.
602bool maskIsAllZeroOrUndef(Value *Mask);
603
604/// Given a mask vector of i1, Return true if all of the elements of this
605/// predicate mask are known to be true or undef. That is, return true if all
606/// lanes can be assumed active.
607bool maskIsAllOneOrUndef(Value *Mask);
608
609/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
610/// for each lane which may be active.
611APInt possiblyDemandedEltsInMask(Value *Mask);
612
613/// The group of interleaved loads/stores sharing the same stride and
614/// close to each other.
615///
616/// Each member in this group has an index starting from 0, and the largest
617/// index should be less than interleaved factor, which is equal to the absolute
618/// value of the access's stride.
619///
620/// E.g. An interleaved load group of factor 4:
621/// for (unsigned i = 0; i < 1024; i+=4) {
622/// a = A[i]; // Member of index 0
623/// b = A[i+1]; // Member of index 1
624/// d = A[i+3]; // Member of index 3
625/// ...
626/// }
627///
628/// An interleaved store group of factor 4:
629/// for (unsigned i = 0; i < 1024; i+=4) {
630/// ...
631/// A[i] = a; // Member of index 0
632/// A[i+1] = b; // Member of index 1
633/// A[i+2] = c; // Member of index 2
634/// A[i+3] = d; // Member of index 3
635/// }
636///
637/// Note: the interleaved load group could have gaps (missing members), but
638/// the interleaved store group doesn't allow gaps.
639template <typename InstTy> class InterleaveGroup {
640public:
641 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
642 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
643 InsertPos(nullptr) {}
644
645 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
646 : Alignment(Alignment), InsertPos(Instr) {
647 Factor = std::abs(Stride);
648 assert(Factor > 1 && "Invalid interleave factor");
649
650 Reverse = Stride < 0;
651 Members[0] = Instr;
652 }
653
654 bool isReverse() const { return Reverse; }
655 uint32_t getFactor() const { return Factor; }
656 Align getAlign() const { return Alignment; }
657 uint32_t getNumMembers() const { return Members.size(); }
658
659 /// Try to insert a new member \p Instr with index \p Index and
660 /// alignment \p NewAlign. The index is related to the leader and it could be
661 /// negative if it is the new leader.
662 ///
663 /// \returns false if the instruction doesn't belong to the group.
664 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
665 // Make sure the key fits in an int32_t.
666 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
667 if (!MaybeKey)
668 return false;
669 int32_t Key = *MaybeKey;
670
671 // Skip if the key is used for either the tombstone or empty special values.
674 return false;
675
676 // Skip if there is already a member with the same index.
677 if (Members.find(Key) != Members.end())
678 return false;
679
680 if (Key > LargestKey) {
681 // The largest index is always less than the interleave factor.
682 if (Index >= static_cast<int32_t>(Factor))
683 return false;
684
685 LargestKey = Key;
686 } else if (Key < SmallestKey) {
687
688 // Make sure the largest index fits in an int32_t.
689 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
690 if (!MaybeLargestIndex)
691 return false;
692
693 // The largest index is always less than the interleave factor.
694 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
695 return false;
696
697 SmallestKey = Key;
698 }
699
700 // It's always safe to select the minimum alignment.
701 Alignment = std::min(Alignment, NewAlign);
702 Members[Key] = Instr;
703 return true;
704 }
705
706 /// Get the member with the given index \p Index
707 ///
708 /// \returns nullptr if contains no such member.
709 InstTy *getMember(uint32_t Index) const {
710 int32_t Key = SmallestKey + Index;
711 return Members.lookup(Key);
712 }
713
714 /// Get the index for the given member. Unlike the key in the member
715 /// map, the index starts from 0.
716 uint32_t getIndex(const InstTy *Instr) const {
717 for (auto I : Members) {
718 if (I.second == Instr)
719 return I.first - SmallestKey;
720 }
721
722 llvm_unreachable("InterleaveGroup contains no such member");
723 }
724
725 InstTy *getInsertPos() const { return InsertPos; }
726 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
727
728 /// Add metadata (e.g. alias info) from the instructions in this group to \p
729 /// NewInst.
730 ///
731 /// FIXME: this function currently does not add noalias metadata a'la
732 /// addNewMedata. To do that we need to compute the intersection of the
733 /// noalias info from all members.
734 void addMetadata(InstTy *NewInst) const;
735
736 /// Returns true if this Group requires a scalar iteration to handle gaps.
738 // If the last member of the Group exists, then a scalar epilog is not
739 // needed for this group.
740 if (getMember(getFactor() - 1))
741 return false;
742
743 // We have a group with gaps. It therefore can't be a reversed access,
744 // because such groups get invalidated (TODO).
745 assert(!isReverse() && "Group should have been invalidated");
746
747 // This is a group of loads, with gaps, and without a last-member
748 return true;
749 }
750
751private:
752 uint32_t Factor; // Interleave Factor.
753 bool Reverse;
754 Align Alignment;
756 int32_t SmallestKey = 0;
757 int32_t LargestKey = 0;
758
759 // To avoid breaking dependences, vectorized instructions of an interleave
760 // group should be inserted at either the first load or the last store in
761 // program order.
762 //
763 // E.g. %even = load i32 // Insert Position
764 // %add = add i32 %even // Use of %even
765 // %odd = load i32
766 //
767 // store i32 %even
768 // %odd = add i32 // Def of %odd
769 // store i32 %odd // Insert Position
770 InstTy *InsertPos;
771};
772
773/// Drive the analysis of interleaved memory accesses in the loop.
774///
775/// Use this class to analyze interleaved accesses only when we can vectorize
776/// a loop. Otherwise it's meaningless to do analysis as the vectorization
777/// on interleaved accesses is unsafe.
778///
779/// The analysis collects interleave groups and records the relationships
780/// between the member and the group in a map.
782public:
784 DominatorTree *DT, LoopInfo *LI,
785 const LoopAccessInfo *LAI)
786 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
787
789
790 /// Analyze the interleaved accesses and collect them in interleave
791 /// groups. Substitute symbolic strides using \p Strides.
792 /// Consider also predicated loads/stores in the analysis if
793 /// \p EnableMaskedInterleavedGroup is true.
794 void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
795
796 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
797 /// contrary to original assumption. Although we currently prevent group
798 /// formation for predicated accesses, we may be able to relax this limitation
799 /// in the future once we handle more complicated blocks. Returns true if any
800 /// groups were invalidated.
802 if (InterleaveGroups.empty()) {
803 assert(
804 !RequiresScalarEpilogue &&
805 "RequiresScalarEpilog should not be set without interleave groups");
806 return false;
807 }
808
809 InterleaveGroupMap.clear();
810 for (auto *Ptr : InterleaveGroups)
811 delete Ptr;
812 InterleaveGroups.clear();
813 RequiresScalarEpilogue = false;
814 return true;
815 }
816
817 /// Check if \p Instr belongs to any interleave group.
818 bool isInterleaved(Instruction *Instr) const {
819 return InterleaveGroupMap.contains(Instr);
820 }
821
822 /// Get the interleave group that \p Instr belongs to.
823 ///
824 /// \returns nullptr if doesn't have such group.
826 getInterleaveGroup(const Instruction *Instr) const {
827 return InterleaveGroupMap.lookup(Instr);
828 }
829
832 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
833 }
834
835 /// Returns true if an interleaved group that may access memory
836 /// out-of-bounds requires a scalar epilogue iteration for correctness.
837 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
838
839 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
840 /// happen when optimizing for size forbids a scalar epilogue, and the gap
841 /// cannot be filtered by masking the load/store.
843
844 /// Returns true if we have any interleave groups.
845 bool hasGroups() const { return !InterleaveGroups.empty(); }
846
847private:
848 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
849 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
850 /// The interleaved access analysis can also add new predicates (for example
851 /// by versioning strides of pointers).
853
854 Loop *TheLoop;
855 DominatorTree *DT;
856 LoopInfo *LI;
857 const LoopAccessInfo *LAI;
858
859 /// True if the loop may contain non-reversed interleaved groups with
860 /// out-of-bounds accesses. We ensure we don't speculatively access memory
861 /// out-of-bounds by executing at least one scalar epilogue iteration.
862 bool RequiresScalarEpilogue = false;
863
864 /// Holds the relationships between the members and the interleave group.
866
867 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
868
869 /// Holds dependences among the memory accesses in the loop. It maps a source
870 /// access to a set of dependent sink accesses.
872
873 /// The descriptor for a strided memory access.
874 struct StrideDescriptor {
875 StrideDescriptor() = default;
876 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
877 Align Alignment)
878 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
879
880 // The access's stride. It is negative for a reverse access.
881 int64_t Stride = 0;
882
883 // The scalar expression of this access.
884 const SCEV *Scev = nullptr;
885
886 // The size of the memory object.
887 uint64_t Size = 0;
888
889 // The alignment of this access.
890 Align Alignment;
891 };
892
893 /// A type for holding instructions and their stride descriptors.
894 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
895
896 /// Create a new interleave group with the given instruction \p Instr,
897 /// stride \p Stride and alignment \p Align.
898 ///
899 /// \returns the newly created interleave group.
900 InterleaveGroup<Instruction> *
901 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
902 assert(!InterleaveGroupMap.count(Instr) &&
903 "Already in an interleaved access group");
904 InterleaveGroupMap[Instr] =
905 new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
906 InterleaveGroups.insert(InterleaveGroupMap[Instr]);
907 return InterleaveGroupMap[Instr];
908 }
909
910 /// Release the group and remove all the relationships.
911 void releaseGroup(InterleaveGroup<Instruction> *Group) {
912 for (unsigned i = 0; i < Group->getFactor(); i++)
913 if (Instruction *Member = Group->getMember(i))
914 InterleaveGroupMap.erase(Member);
915
916 InterleaveGroups.erase(Group);
917 delete Group;
918 }
919
920 /// Collect all the accesses with a constant stride in program order.
921 void collectConstStrideAccesses(
922 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
923 const ValueToValueMap &Strides);
924
925 /// Returns true if \p Stride is allowed in an interleaved group.
926 static bool isStrided(int Stride);
927
928 /// Returns true if \p BB is a predicated block.
929 bool isPredicated(BasicBlock *BB) const {
930 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
931 }
932
933 /// Returns true if LoopAccessInfo can be used for dependence queries.
934 bool areDependencesValid() const {
935 return LAI && LAI->getDepChecker().getDependences();
936 }
937
938 /// Returns true if memory accesses \p A and \p B can be reordered, if
939 /// necessary, when constructing interleaved groups.
940 ///
941 /// \p A must precede \p B in program order. We return false if reordering is
942 /// not necessary or is prevented because \p A and \p B may be dependent.
943 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
944 StrideEntry *B) const {
945 // Code motion for interleaved accesses can potentially hoist strided loads
946 // and sink strided stores. The code below checks the legality of the
947 // following two conditions:
948 //
949 // 1. Potentially moving a strided load (B) before any store (A) that
950 // precedes B, or
951 //
952 // 2. Potentially moving a strided store (A) after any load or store (B)
953 // that A precedes.
954 //
955 // It's legal to reorder A and B if we know there isn't a dependence from A
956 // to B. Note that this determination is conservative since some
957 // dependences could potentially be reordered safely.
958
959 // A is potentially the source of a dependence.
960 auto *Src = A->first;
961 auto SrcDes = A->second;
962
963 // B is potentially the sink of a dependence.
964 auto *Sink = B->first;
965 auto SinkDes = B->second;
966
967 // Code motion for interleaved accesses can't violate WAR dependences.
968 // Thus, reordering is legal if the source isn't a write.
969 if (!Src->mayWriteToMemory())
970 return true;
971
972 // At least one of the accesses must be strided.
973 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
974 return true;
975
976 // If dependence information is not available from LoopAccessInfo,
977 // conservatively assume the instructions can't be reordered.
978 if (!areDependencesValid())
979 return false;
980
981 // If we know there is a dependence from source to sink, assume the
982 // instructions can't be reordered. Otherwise, reordering is legal.
983 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
984 }
985
986 /// Collect the dependences from LoopAccessInfo.
987 ///
988 /// We process the dependences once during the interleaved access analysis to
989 /// enable constant-time dependence queries.
990 void collectDependences() {
991 if (!areDependencesValid())
992 return;
993 auto *Deps = LAI->getDepChecker().getDependences();
994 for (auto Dep : *Deps)
995 Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
996 }
997};
998
999} // llvm namespace
1000
1001#endif
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
RelocType Type
Definition: COFFYAML.cpp:390
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Inject TLI Mappings
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
unsigned arg_size() const
Definition: InstrTypes.h:1351
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:315
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:639
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:737
uint32_t getFactor() const
Definition: VectorUtils.h:655
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:709
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:641
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:716
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:726
bool isReverse() const
Definition: VectorUtils.h:654
InstTy * getInsertPos() const
Definition: VectorUtils.h:725
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:656
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:645
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:664
uint32_t getNumMembers() const
Definition: VectorUtils.h:657
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:781
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:826
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:837
bool hasGroups() const
Returns true if we have any interleave groups.
Definition: VectorUtils.h:845
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:818
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:801
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:831
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:783
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
The Vector Function Database.
Definition: VectorUtils.h:225
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:278
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:266
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:688
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:285
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static constexpr char const * MappingsAttrName
Definition: VectorUtils.h:210
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
static constexpr char const * _LLVM_Scalarize_
Prefix for internal name redirection for vector function that tells the compiler to scalarize the cal...
Definition: VectorUtils.h:154
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
VFParamKind getVFParamKindFromString(const StringRef Token)
Retrieve the VFParamKind from a string token.
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF, bool Masked=false)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
static constexpr char const * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:147
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, Value * > ValueToValueMap
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:316
TargetTransformInfo TTI
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
VFISAKind
Describes the type of Instruction Set Architecture.
Definition: VectorUtils.h:44
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
VFParamKind
Describes the type of Parameters.
Definition: VectorUtils.h:25
std::enable_if_t< std::is_signed< T >::value, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
std::enable_if_t< std::is_signed< T >::value, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:123
bool isMasked() const
Returns true if at least one of the operands to the vectorized function has the kind 'GlobalPredicate...
Definition: VectorUtils.h:142
std::string VectorName
Scalar Function Name.
Definition: VectorUtils.h:126
std::optional< unsigned > getParamIndexForOptionalMask() const
Instruction Set Architecture.
Definition: VectorUtils.h:131
VFISAKind ISA
Vector Function Name associated to this VFInfo.
Definition: VectorUtils.h:127
std::string ScalarName
Classification of the vector function.
Definition: VectorUtils.h:125
VFShape Shape
Definition: VectorUtils.h:124
Encapsulates information needed to describe a parameter.
Definition: VectorUtils.h:62
bool operator==(const VFParameter &Other) const
Definition: VectorUtils.h:69
VFParamKind ParamKind
Definition: VectorUtils.h:64
unsigned ParamPos
Definition: VectorUtils.h:63
Contains the information about the kind of vectorization available.
Definition: VectorUtils.h:82
static VFShape getScalarShape(const CallInst &CI)
Definition: VectorUtils.h:99
static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred)
Definition: VectorUtils.h:108
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
ElementCount VF
Definition: VectorUtils.h:83
void updateParam(VFParameter P)
Update the parameter in position P.ParamPos to P.
Definition: VectorUtils.h:91
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:84
bool operator==(const VFShape &Other) const
Definition: VectorUtils.h:86