LLVM 23.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
21#include "llvm/IR/Module.h"
26
27namespace llvm {
29class IntrinsicInst;
30
31/// The Vector Function Database.
32///
33/// Helper class used to find the vector functions associated to a
34/// scalar CallInst.
36 /// The Module of the CallInst CI.
37 const Module *M;
38 /// The CallInst instance being queried for scalar to vector mappings.
39 const CallInst &CI;
40 /// List of vector functions descriptors associated to the call
41 /// instruction.
42 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
43
44 /// Retrieve the scalar-to-vector mappings associated to the rule of
45 /// a vector Function ABI.
46 static void getVFABIMappings(const CallInst &CI,
47 SmallVectorImpl<VFInfo> &Mappings) {
48 if (!CI.getCalledFunction())
49 return;
50
51 const StringRef ScalarName = CI.getCalledFunction()->getName();
52
53 SmallVector<std::string, 8> ListOfStrings;
54 // The check for the vector-function-abi-variant attribute is done when
55 // retrieving the vector variant names here.
56 VFABI::getVectorVariantNames(CI, ListOfStrings);
57 if (ListOfStrings.empty())
58 return;
59 for (const auto &MangledName : ListOfStrings) {
60 const std::optional<VFInfo> Shape =
61 VFABI::tryDemangleForVFABI(MangledName, CI.getFunctionType());
62 // A match is found via scalar and vector names, and also by
63 // ensuring that the variant described in the attribute has a
64 // corresponding definition or declaration of the vector
65 // function in the Module M.
66 if (Shape && (Shape->ScalarName == ScalarName)) {
67 assert(CI.getModule()->getFunction(Shape->VectorName) &&
68 "Vector function is missing.");
69 Mappings.push_back(*Shape);
70 }
71 }
72 }
73
74public:
75 /// Retrieve all the VFInfo instances associated to the CallInst CI.
78
79 // Get mappings from the Vector Function ABI variants.
80 getVFABIMappings(CI, Ret);
81
82 // Other non-VFABI variants should be retrieved here.
83
84 return Ret;
85 }
86
87 static bool hasMaskedVariant(const CallInst &CI,
88 std::optional<ElementCount> VF = std::nullopt) {
89 // Check whether we have at least one masked vector version of a scalar
90 // function. If no VF is specified then we check for any masked variant,
91 // otherwise we look for one that matches the supplied VF.
92 auto Mappings = VFDatabase::getMappings(CI);
93 for (VFInfo Info : Mappings)
94 if (!VF || Info.Shape.VF == *VF)
95 if (Info.isMasked())
96 return true;
97
98 return false;
99 }
100
101 /// Constructor, requires a CallInst instance.
103 : M(CI.getModule()), CI(CI),
104 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
105
106 /// \defgroup VFDatabase query interface.
107 ///
108 /// @{
109 /// Retrieve the Function with VFShape \p Shape.
111 if (Shape == VFShape::getScalarShape(CI.getFunctionType()))
112 return CI.getCalledFunction();
113
114 for (const auto &Info : ScalarToVectorMappings)
115 if (Info.Shape == Shape)
116 return M->getFunction(Info.VectorName);
117
118 return nullptr;
119 }
120 /// @}
121};
122
123template <typename T> class ArrayRef;
124class DemandedBits;
125template <typename InstTy> class InterleaveGroup;
126class IRBuilderBase;
127class Loop;
128class TargetTransformInfo;
129class Value;
130
131namespace Intrinsic {
132typedef unsigned ID;
133}
134
135/// Identify if the intrinsic is trivially vectorizable.
136/// This method returns true if the intrinsic's argument types are all scalars
137/// for the scalar form of the intrinsic and all vectors (or scalars handled by
138/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
139///
140/// Note: isTriviallyVectorizable implies isTriviallyScalarizable.
142
143/// Identify if the intrinsic is trivially scalarizable.
144/// This method returns true following the same predicates of
145/// isTriviallyVectorizable.
146
147/// Note: There are intrinsics where implementing vectorization for the
148/// intrinsic is redundant, but we want to implement scalarization of the
149/// vector. To prevent the requirement that an intrinsic also implements
150/// vectorization we provide this separate function.
152
153/// Identifies if the vector form of the intrinsic has a scalar operand.
154/// \p TTI is used to consider target specific intrinsics, if no target specific
155/// intrinsics will be considered then it is appropriate to pass in nullptr.
156LLVM_ABI bool
158 const TargetTransformInfo *TTI);
159
160/// Identifies if the vector form of the intrinsic is overloaded on the type of
161/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
162/// \p TTI is used to consider target specific intrinsics, if no target specific
163/// intrinsics will be considered then it is appropriate to pass in nullptr.
164LLVM_ABI bool
166 const TargetTransformInfo *TTI);
167
168/// Identifies if the vector form of the intrinsic that returns a struct is
169/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to
170/// consider target specific intrinsics, if no target specific intrinsics
171/// will be considered then it is appropriate to pass in nullptr.
173 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI);
174
175/// Returns intrinsic ID for call.
176/// For the input call instruction it finds mapping intrinsic and returns
177/// its intrinsic ID, in case it does not found it return not_intrinsic.
180
181/// Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
183
184/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
186
187/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each
188/// factor.
190
191/// Given a vector and an element number, see if the scalar value is
192/// already around as a register, for example if it were inserted then extracted
193/// from the vector.
194LLVM_ABI Value *findScalarElement(Value *V, unsigned EltNo);
195
196/// If all non-negative \p Mask elements are the same value, return that value.
197/// If all elements are negative (undefined) or \p Mask contains different
198/// non-negative values, return -1.
200
201/// Get splat value if the input is a splat vector or return nullptr.
202/// The value may be extracted from a splat constants vector or from
203/// a sequence of instructions that broadcast a single value into a vector.
205
206/// Return true if each element of the vector value \p V is poisoned or equal to
207/// every other non-poisoned element. If an index element is specified, either
208/// every element of the vector is poisoned or the element at that index is not
209/// poisoned and equal to every other non-poisoned element.
210/// This may be more powerful than the related getSplatValue() because it is
211/// not limited by finding a scalar source value to a splatted vector.
212LLVM_ABI bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
213
214/// Transform a shuffle mask's output demanded element mask into demanded
215/// element masks for the 2 operands, returns false if the mask isn't valid.
216/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
217/// \p AllowUndefElts permits "-1" indices to be treated as undef.
218LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
219 const APInt &DemandedElts,
220 APInt &DemandedLHS, APInt &DemandedRHS,
221 bool AllowUndefElts = false);
222
223/// Does this shuffle mask represent either one slide shuffle or a pair of
224/// two slide shuffles, combined with a select on some constant vector mask?
225/// A slide is a shuffle mask which shifts some set of elements up or down
226/// the vector, with all other elements being undefined. An identity shuffle
227/// will be matched a slide by 0. The output parameter provides the source
228/// (-1 means no source), and slide direction for each slide.
229LLVM_ABI bool isMaskedSlidePair(ArrayRef<int> Mask, int NumElts,
230 std::array<std::pair<int, int>, 2> &SrcInfo);
231
232/// Replace each shuffle mask index with the scaled sequential indices for an
233/// equivalent mask of narrowed elements. Mask elements that are less than 0
234/// (sentinel values) are repeated in the output mask.
235///
236/// Example with Scale = 4:
237/// <4 x i32> <3, 2, 0, -1> -->
238/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
239///
240/// This is the reverse process of widening shuffle mask elements, but it always
241/// succeeds because the indexes can always be multiplied (scaled up) to map to
242/// narrower vector elements.
243LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
244 SmallVectorImpl<int> &ScaledMask);
245
246/// Try to transform a shuffle mask by replacing elements with the scaled index
247/// for an equivalent mask of widened elements. If all mask elements that would
248/// map to a wider element of the new mask are the same negative number
249/// (sentinel value), that element of the new mask is the same value. If any
250/// element in a given slice is negative and some other element in that slice is
251/// not the same value, return false (partial matches with sentinel values are
252/// not allowed).
253///
254/// Example with Scale = 4:
255/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
256/// <4 x i32> <3, 2, 0, -1>
257///
258/// This is the reverse process of narrowing shuffle mask elements if it
259/// succeeds. This transform is not always possible because indexes may not
260/// divide evenly (scale down) to map to wider vector elements.
261LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
262 SmallVectorImpl<int> &ScaledMask);
263
264/// A variant of the previous method which is specialized for Scale=2, and
265/// treats -1 as undef and allows widening when a wider element is partially
266/// undef in the narrow form of the mask. This transformation discards
267/// information about which bytes in the original shuffle were undef.
269 SmallVectorImpl<int> &NewMask);
270
271/// Attempt to narrow/widen the \p Mask shuffle mask to the \p NumDstElts target
272/// width. Internally this will call narrowShuffleMaskElts/widenShuffleMaskElts.
273/// This will assert unless NumDstElts is a multiple of Mask.size (or
274/// vice-versa). Returns false on failure, and ScaledMask will be in an
275/// undefined state.
276LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
277 SmallVectorImpl<int> &ScaledMask);
278
279/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
280/// to get the shuffle mask with widest possible elements.
282 SmallVectorImpl<int> &ScaledMask);
283
284/// Splits and processes shuffle mask depending on the number of input and
285/// output registers. The function does 2 main things: 1) splits the
286/// source/destination vectors into real registers; 2) do the mask analysis to
287/// identify which real registers are permuted. Then the function processes
288/// resulting registers mask using provided action items. If no input register
289/// is defined, \p NoInputAction action is used. If only 1 input register is
290/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
291/// process > 2 input registers and masks.
292/// \param Mask Original shuffle mask.
293/// \param NumOfSrcRegs Number of source registers.
294/// \param NumOfDestRegs Number of destination registers.
295/// \param NumOfUsedRegs Number of actually used destination registers.
297 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
298 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
299 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
300 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
301 ManyInputsAction);
302
303/// Compute the demanded elements mask of horizontal binary operations. A
304/// horizontal operation combines two adjacent elements in a vector operand.
305/// This function returns a mask for the elements that correspond to the first
306/// operand of this horizontal combination. For example, for two vectors
307/// [X1, X2, X3, X4] and [Y1, Y2, Y3, Y4], the resulting mask can include the
308/// elements X1, X3, Y1, and Y3. To get the other operands, simply shift the
309/// result of this function to the left by 1.
310///
311/// \param VectorBitWidth the total bit width of the vector
312/// \param DemandedElts the demanded elements mask for the operation
313/// \param DemandedLHS the demanded elements mask for the left operand
314/// \param DemandedRHS the demanded elements mask for the right operand
315LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
316 const APInt &DemandedElts,
317 APInt &DemandedLHS,
318 APInt &DemandedRHS);
319
320/// Compute a map of integer instructions to their minimum legal type
321/// size.
322///
323/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
324/// type (e.g. i32) whenever arithmetic is performed on them.
325///
326/// For targets with native i8 or i16 operations, usually InstCombine can shrink
327/// the arithmetic type down again. However InstCombine refuses to create
328/// illegal types, so for targets without i8 or i16 registers, the lengthening
329/// and shrinking remains.
330///
331/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
332/// their scalar equivalents do not, so during vectorization it is important to
333/// remove these lengthens and truncates when deciding the profitability of
334/// vectorization.
335///
336/// This function analyzes the given range of instructions and determines the
337/// minimum type size each can be converted to. It attempts to remove or
338/// minimize type size changes across each def-use chain, so for example in the
339/// following code:
340///
341/// %1 = load i8, i8*
342/// %2 = add i8 %1, 2
343/// %3 = load i16, i16*
344/// %4 = zext i8 %2 to i32
345/// %5 = zext i16 %3 to i32
346/// %6 = add i32 %4, %5
347/// %7 = trunc i32 %6 to i16
348///
349/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
350/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
351///
352/// If the optional TargetTransformInfo is provided, this function tries harder
353/// to do less work by only looking at illegal types.
356 const TargetTransformInfo *TTI = nullptr);
357
358/// Compute the union of two access-group lists.
359///
360/// If the list contains just one access group, it is returned directly. If the
361/// list is empty, returns nullptr.
362LLVM_ABI MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
363
364/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
365/// are both in. If either instruction does not access memory at all, it is
366/// considered to be in every list.
367///
368/// If the list contains just one access group, it is returned directly. If the
369/// list is empty, returns nullptr.
371 const Instruction *Inst2);
372
373/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
374/// vectorization. It can be preserved after vectorization if the kind is one of
375/// [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,
376/// MD_access_group, MD_mmra].
378 Instruction *Inst,
379 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata);
380
381/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
382/// MD_nontemporal, MD_access_group, MD_mmra].
383/// For K in Kinds, we get the MDNode for K from each of the
384/// elements of VL, compute their "intersection" (i.e., the most generic
385/// metadata value that covers all of the individual values), and set I's
386/// metadata for M equal to the intersection value.
387///
388/// This function always sets a (possibly null) value for each K in Kinds.
390
391/// Create a mask that filters the members of an interleave group where there
392/// are gaps.
393///
394/// For example, the mask for \p Group with interleave-factor 3
395/// and \p VF 4, that has only its first member present is:
396///
397/// <1,0,0,1,0,0,1,0,0,1,0,0>
398///
399/// Note: The result is a mask of 0's and 1's, as opposed to the other
400/// create[*]Mask() utilities which create a shuffle mask (mask that
401/// consists of indices).
403createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
404 const InterleaveGroup<Instruction> &Group);
405
406/// Create a mask with replicated elements.
407///
408/// This function creates a shuffle mask for replicating each of the \p VF
409/// elements in a vector \p ReplicationFactor times. It can be used to
410/// transform a mask of \p VF elements into a mask of
411/// \p VF * \p ReplicationFactor elements used by a predicated
412/// interleaved-group of loads/stores whose Interleaved-factor ==
413/// \p ReplicationFactor.
414///
415/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
416///
417/// <0,0,0,1,1,1,2,2,2,3,3,3>
419createReplicatedMask(unsigned ReplicationFactor, unsigned VF);
420
421/// Create an interleave shuffle mask.
422///
423/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
424/// vectorization factor \p VF into a single wide vector. The mask is of the
425/// form:
426///
427/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
428///
429/// For example, the mask for VF = 4 and NumVecs = 2 is:
430///
431/// <0, 4, 1, 5, 2, 6, 3, 7>.
433 unsigned NumVecs);
434
435/// Create a stride shuffle mask.
436///
437/// This function creates a shuffle mask whose elements begin at \p Start and
438/// are incremented by \p Stride. The mask can be used to deinterleave an
439/// interleaved vector into separate vectors of vectorization factor \p VF. The
440/// mask is of the form:
441///
442/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
443///
444/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
445///
446/// <0, 2, 4, 6>
448createStrideMask(unsigned Start, unsigned Stride, unsigned VF);
449
450/// Create a sequential shuffle mask.
451///
452/// This function creates shuffle mask whose elements are sequential and begin
453/// at \p Start. The mask contains \p NumInts integers and is padded with \p
454/// NumUndefs undef values. The mask is of the form:
455///
456/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
457///
458/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
459///
460/// <0, 1, 2, 3, undef, undef, undef, undef>
462createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
463
464/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
465/// mask assuming both operands are identical. This assumes that the unary
466/// shuffle will use elements from operand 0 (operand 1 will be unused).
468 unsigned NumElts);
469
470/// Concatenate a list of vectors.
471///
472/// This function generates code that concatenate the vectors in \p Vecs into a
473/// single large vector. The number of vectors should be greater than one, and
474/// their element types should be the same. The number of elements in the
475/// vectors should also be the same; however, if the last vector has fewer
476/// elements, it will be padded with undefs.
478 ArrayRef<Value *> Vecs);
479
480/// Given a mask vector of i1, Return true if all of the elements of this
481/// predicate mask are known to be false or undef. That is, return true if all
482/// lanes can be assumed inactive.
484
485/// Given a mask vector of i1, Return true if all of the elements of this
486/// predicate mask are known to be true or undef. That is, return true if all
487/// lanes can be assumed active.
489
490/// Given a mask vector of i1, Return true if any of the elements of this
491/// predicate mask are known to be true or undef. That is, return true if at
492/// least one lane can be assumed active.
494
495/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
496/// for each lane which may be active.
498
499/// The group of interleaved loads/stores sharing the same stride and
500/// close to each other.
501///
502/// Each member in this group has an index starting from 0, and the largest
503/// index should be less than interleaved factor, which is equal to the absolute
504/// value of the access's stride.
505///
506/// E.g. An interleaved load group of factor 4:
507/// for (unsigned i = 0; i < 1024; i+=4) {
508/// a = A[i]; // Member of index 0
509/// b = A[i+1]; // Member of index 1
510/// d = A[i+3]; // Member of index 3
511/// ...
512/// }
513///
514/// An interleaved store group of factor 4:
515/// for (unsigned i = 0; i < 1024; i+=4) {
516/// ...
517/// A[i] = a; // Member of index 0
518/// A[i+1] = b; // Member of index 1
519/// A[i+2] = c; // Member of index 2
520/// A[i+3] = d; // Member of index 3
521/// }
522///
523/// Note: the interleaved load group could have gaps (missing members), but
524/// the interleaved store group doesn't allow gaps.
525template <typename InstTy> class InterleaveGroup {
526public:
527 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
528 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
529 InsertPos(nullptr) {}
530
531 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
532 : Alignment(Alignment), InsertPos(Instr) {
533 Factor = std::abs(Stride);
534 assert(Factor > 1 && "Invalid interleave factor");
535
536 Reverse = Stride < 0;
537 Members[0] = Instr;
538 }
539
540 bool isReverse() const { return Reverse; }
541 uint32_t getFactor() const { return Factor; }
542 Align getAlign() const { return Alignment; }
543 uint32_t getNumMembers() const { return Members.size(); }
544
545 /// Try to insert a new member \p Instr with index \p Index and
546 /// alignment \p NewAlign. The index is related to the leader and it could be
547 /// negative if it is the new leader.
548 ///
549 /// \returns false if the instruction doesn't belong to the group.
550 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
551 // Make sure the key fits in an int32_t.
552 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
553 if (!MaybeKey)
554 return false;
555 int32_t Key = *MaybeKey;
556
557 // Skip if the key is used for either the tombstone or empty special values.
560 return false;
561
562 // Skip if there is already a member with the same index.
563 if (Members.contains(Key))
564 return false;
565
566 if (Key > LargestKey) {
567 // The largest index is always less than the interleave factor.
568 if (Index >= static_cast<int32_t>(Factor))
569 return false;
570
571 LargestKey = Key;
572 } else if (Key < SmallestKey) {
573
574 // Make sure the largest index fits in an int32_t.
575 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
576 if (!MaybeLargestIndex)
577 return false;
578
579 // The largest index is always less than the interleave factor.
580 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
581 return false;
582
583 SmallestKey = Key;
584 }
585
586 // It's always safe to select the minimum alignment.
587 Alignment = std::min(Alignment, NewAlign);
588 Members[Key] = Instr;
589 return true;
590 }
591
592 /// Get the member with the given index \p Index
593 ///
594 /// \returns nullptr if contains no such member.
595 InstTy *getMember(uint32_t Index) const {
596 int32_t Key = SmallestKey + Index;
597 return Members.lookup(Key);
598 }
599
600 /// Return an iterator range over the non-null members of this group, in
601 /// index order.
602 auto members() const {
603 return make_filter_range(
604 map_range(seq<uint32_t>(0, Factor),
605 [this](uint32_t I) { return getMember(I); }),
606 [](InstTy *I) { return I != nullptr; });
607 }
608
609 /// Get the index for the given member. Unlike the key in the member
610 /// map, the index starts from 0.
611 uint32_t getIndex(const InstTy *Instr) const {
612 for (auto I : Members) {
613 if (I.second == Instr)
614 return I.first - SmallestKey;
615 }
616
617 llvm_unreachable("InterleaveGroup contains no such member");
618 }
619
620 InstTy *getInsertPos() const { return InsertPos; }
621 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
622
623 /// Add metadata (e.g. alias info) from the instructions in this group to \p
624 /// NewInst.
625 ///
626 /// FIXME: this function currently does not add noalias metadata a'la
627 /// addNewMedata. To do that we need to compute the intersection of the
628 /// noalias info from all members.
629 void addMetadata(InstTy *NewInst) const;
630
631 /// Returns true if this Group requires a scalar iteration to handle gaps.
633 // If the last member of the Group exists, then a scalar epilog is not
634 // needed for this group.
635 if (getMember(getFactor() - 1))
636 return false;
637
638 // We have a group with gaps. It therefore can't be a reversed access,
639 // because such groups get invalidated (TODO).
640 assert(!isReverse() && "Group should have been invalidated");
641
642 // This is a group of loads, with gaps, and without a last-member
643 return true;
644 }
645
646 /// Return true if this group is full, i.e. it has no gaps.
647 bool isFull() const { return getNumMembers() == getFactor(); }
648
649private:
650 uint32_t Factor; // Interleave Factor.
651 bool Reverse;
652 Align Alignment;
654 int32_t SmallestKey = 0;
655 int32_t LargestKey = 0;
656
657 // To avoid breaking dependences, vectorized instructions of an interleave
658 // group should be inserted at either the first load or the last store in
659 // program order.
660 //
661 // E.g. %even = load i32 // Insert Position
662 // %add = add i32 %even // Use of %even
663 // %odd = load i32
664 //
665 // store i32 %even
666 // %odd = add i32 // Def of %odd
667 // store i32 %odd // Insert Position
668 InstTy *InsertPos;
669};
670
671/// Drive the analysis of interleaved memory accesses in the loop.
672///
673/// Use this class to analyze interleaved accesses only when we can vectorize
674/// a loop. Otherwise it's meaningless to do analysis as the vectorization
675/// on interleaved accesses is unsafe.
676///
677/// The analysis collects interleave groups and records the relationships
678/// between the member and the group in a map.
680public:
682 DominatorTree *DT, LoopInfo *LI,
683 const LoopAccessInfo *LAI)
684 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
685
687
688 /// Analyze the interleaved accesses and collect them in interleave
689 /// groups. Substitute symbolic strides using \p Strides.
690 /// Consider also predicated loads/stores in the analysis if
691 /// \p EnableMaskedInterleavedGroup is true.
692 LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
693
694 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
695 /// contrary to original assumption. Although we currently prevent group
696 /// formation for predicated accesses, we may be able to relax this limitation
697 /// in the future once we handle more complicated blocks. Returns true if any
698 /// groups were invalidated.
700 if (InterleaveGroups.empty()) {
701 assert(
702 !RequiresScalarEpilogue &&
703 "RequiresScalarEpilog should not be set without interleave groups");
704 return false;
705 }
706
707 InterleaveGroupMap.clear();
708 for (auto *Ptr : InterleaveGroups)
709 delete Ptr;
710 InterleaveGroups.clear();
711 RequiresScalarEpilogue = false;
712 return true;
713 }
714
715 /// Check if \p Instr belongs to any interleave group.
716 bool isInterleaved(Instruction *Instr) const {
717 return InterleaveGroupMap.contains(Instr);
718 }
719
720 /// Get the interleave group that \p Instr belongs to.
721 ///
722 /// \returns nullptr if doesn't have such group.
724 getInterleaveGroup(const Instruction *Instr) const {
725 return InterleaveGroupMap.lookup(Instr);
726 }
727
730 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
731 }
732
733 /// Returns true if an interleaved group that may access memory
734 /// out-of-bounds requires a scalar epilogue iteration for correctness.
735 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
736
737 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
738 /// happen when optimizing for size forbids a scalar epilogue, and the gap
739 /// cannot be filtered by masking the load/store.
741
742 /// Returns true if we have any interleave groups.
743 bool hasGroups() const { return !InterleaveGroups.empty(); }
744
745private:
746 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
747 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
748 /// The interleaved access analysis can also add new predicates (for example
749 /// by versioning strides of pointers).
751
752 Loop *TheLoop;
753 DominatorTree *DT;
754 LoopInfo *LI;
755 const LoopAccessInfo *LAI;
756
757 /// True if the loop may contain non-reversed interleaved groups with
758 /// out-of-bounds accesses. We ensure we don't speculatively access memory
759 /// out-of-bounds by executing at least one scalar epilogue iteration.
760 bool RequiresScalarEpilogue = false;
761
762 /// Holds the relationships between the members and the interleave group.
764
765 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
766
767 /// Holds dependences among the memory accesses in the loop. It maps a source
768 /// access to a set of dependent sink accesses.
770
771 /// The descriptor for a strided memory access.
772 struct StrideDescriptor {
773 StrideDescriptor() = default;
774 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
775 Align Alignment)
776 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
777
778 // The access's stride. It is negative for a reverse access.
779 int64_t Stride = 0;
780
781 // The scalar expression of this access.
782 const SCEV *Scev = nullptr;
783
784 // The size of the memory object.
785 uint64_t Size = 0;
786
787 // The alignment of this access.
788 Align Alignment;
789 };
790
791 /// A type for holding instructions and their stride descriptors.
792 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
793
794 /// Create a new interleave group with the given instruction \p Instr,
795 /// stride \p Stride and alignment \p Align.
796 ///
797 /// \returns the newly created interleave group.
798 InterleaveGroup<Instruction> *
799 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
800 auto [It, Inserted] = InterleaveGroupMap.try_emplace(Instr);
801 assert(Inserted && "Already in an interleaved access group");
802 It->second = new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
803 InterleaveGroups.insert(It->second);
804 return It->second;
805 }
806
807 /// Release the group and remove all the relationships.
808 void releaseGroup(InterleaveGroup<Instruction> *Group) {
809 InterleaveGroups.erase(Group);
810 releaseGroupWithoutRemovingFromSet(Group);
811 }
812
813 /// Do everything necessary to release the group, apart from removing it from
814 /// the InterleaveGroups set.
815 void releaseGroupWithoutRemovingFromSet(InterleaveGroup<Instruction> *Group) {
816 for (unsigned i = 0; i < Group->getFactor(); i++)
817 if (Instruction *Member = Group->getMember(i))
818 InterleaveGroupMap.erase(Member);
819
820 delete Group;
821 }
822
823 /// Collect all the accesses with a constant stride in program order.
824 void collectConstStrideAccesses(
825 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
826 const DenseMap<Value *, const SCEV *> &Strides);
827
828 /// Returns true if \p Stride is allowed in an interleaved group.
829 LLVM_ABI static bool isStrided(int Stride);
830
831 /// Returns true if \p BB is a predicated block.
832 bool isPredicated(BasicBlock *BB) const {
833 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
834 }
835
836 /// Returns true if LoopAccessInfo can be used for dependence queries.
837 bool areDependencesValid() const {
838 return LAI && LAI->getDepChecker().getDependences();
839 }
840
841 /// Returns true if memory accesses \p A and \p B can be reordered, if
842 /// necessary, when constructing interleaved groups.
843 ///
844 /// \p A must precede \p B in program order. We return false if reordering is
845 /// not necessary or is prevented because \p A and \p B may be dependent.
846 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
847 StrideEntry *B) const {
848 // Code motion for interleaved accesses can potentially hoist strided loads
849 // and sink strided stores. The code below checks the legality of the
850 // following two conditions:
851 //
852 // 1. Potentially moving a strided load (B) before any store (A) that
853 // precedes B, or
854 //
855 // 2. Potentially moving a strided store (A) after any load or store (B)
856 // that A precedes.
857 //
858 // It's legal to reorder A and B if we know there isn't a dependence from A
859 // to B. Note that this determination is conservative since some
860 // dependences could potentially be reordered safely.
861
862 // A is potentially the source of a dependence.
863 auto *Src = A->first;
864 auto SrcDes = A->second;
865
866 // B is potentially the sink of a dependence.
867 auto *Sink = B->first;
868 auto SinkDes = B->second;
869
870 // Code motion for interleaved accesses can't violate WAR dependences.
871 // Thus, reordering is legal if the source isn't a write.
872 if (!Src->mayWriteToMemory())
873 return true;
874
875 // At least one of the accesses must be strided.
876 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
877 return true;
878
879 // If dependence information is not available from LoopAccessInfo,
880 // conservatively assume the instructions can't be reordered.
881 if (!areDependencesValid())
882 return false;
883
884 // If we know there is a dependence from source to sink, assume the
885 // instructions can't be reordered. Otherwise, reordering is legal.
886 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
887 }
888
889 /// Collect the dependences from LoopAccessInfo.
890 ///
891 /// We process the dependences once during the interleaved access analysis to
892 /// enable constant-time dependence queries.
893 void collectDependences() {
894 if (!areDependencesValid())
895 return;
896 const auto &DepChecker = LAI->getDepChecker();
897 auto *Deps = DepChecker.getDependences();
898 for (auto Dep : *Deps)
899 Dependences[Dep.getSource(DepChecker)].insert(
900 Dep.getDestination(DepChecker));
901 }
902};
903
904} // llvm namespace
905
906#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallVector class.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
Definition Constant.h:43
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
const Function & getFunction() const
Definition Function.h:166
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
The group of interleaved loads/stores sharing the same stride and close to each other.
auto members() const
Return an iterator range over the non-null members of this group, in index order.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
InstTy * getInsertPos() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool hasGroups() const
Returns true if we have any interleave groups.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
A wrapper class for inspecting calls to intrinsic functions.
Drive the analysis of memory accesses in the loop.
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:87
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
An efficient, type-erasing, non-owning reference to a callable.
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
LLVM_ABI void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially scalarizable.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Holds the VFShape for a specific scalar to vector function mapping.
Contains the information about the kind of vectorization available.
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.