LLVM 22.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
19#include "llvm/IR/Module.h"
24
25namespace llvm {
27class IntrinsicInst;
28
29/// The Vector Function Database.
30///
31/// Helper class used to find the vector functions associated to a
32/// scalar CallInst.
34 /// The Module of the CallInst CI.
35 const Module *M;
36 /// The CallInst instance being queried for scalar to vector mappings.
37 const CallInst &CI;
38 /// List of vector functions descriptors associated to the call
39 /// instruction.
40 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
41
42 /// Retrieve the scalar-to-vector mappings associated to the rule of
43 /// a vector Function ABI.
44 static void getVFABIMappings(const CallInst &CI,
45 SmallVectorImpl<VFInfo> &Mappings) {
46 if (!CI.getCalledFunction())
47 return;
48
49 const StringRef ScalarName = CI.getCalledFunction()->getName();
50
51 SmallVector<std::string, 8> ListOfStrings;
52 // The check for the vector-function-abi-variant attribute is done when
53 // retrieving the vector variant names here.
54 VFABI::getVectorVariantNames(CI, ListOfStrings);
55 if (ListOfStrings.empty())
56 return;
57 for (const auto &MangledName : ListOfStrings) {
58 const std::optional<VFInfo> Shape =
59 VFABI::tryDemangleForVFABI(MangledName, CI.getFunctionType());
60 // A match is found via scalar and vector names, and also by
61 // ensuring that the variant described in the attribute has a
62 // corresponding definition or declaration of the vector
63 // function in the Module M.
64 if (Shape && (Shape->ScalarName == ScalarName)) {
65 assert(CI.getModule()->getFunction(Shape->VectorName) &&
66 "Vector function is missing.");
67 Mappings.push_back(*Shape);
68 }
69 }
70 }
71
72public:
73 /// Retrieve all the VFInfo instances associated to the CallInst CI.
76
77 // Get mappings from the Vector Function ABI variants.
78 getVFABIMappings(CI, Ret);
79
80 // Other non-VFABI variants should be retrieved here.
81
82 return Ret;
83 }
84
85 static bool hasMaskedVariant(const CallInst &CI,
86 std::optional<ElementCount> VF = std::nullopt) {
87 // Check whether we have at least one masked vector version of a scalar
88 // function. If no VF is specified then we check for any masked variant,
89 // otherwise we look for one that matches the supplied VF.
90 auto Mappings = VFDatabase::getMappings(CI);
91 for (VFInfo Info : Mappings)
92 if (!VF || Info.Shape.VF == *VF)
93 if (Info.isMasked())
94 return true;
95
96 return false;
97 }
98
99 /// Constructor, requires a CallInst instance.
101 : M(CI.getModule()), CI(CI),
102 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
103
104 /// \defgroup VFDatabase query interface.
105 ///
106 /// @{
107 /// Retrieve the Function with VFShape \p Shape.
109 if (Shape == VFShape::getScalarShape(CI.getFunctionType()))
110 return CI.getCalledFunction();
111
112 for (const auto &Info : ScalarToVectorMappings)
113 if (Info.Shape == Shape)
114 return M->getFunction(Info.VectorName);
115
116 return nullptr;
117 }
118 /// @}
119};
120
121template <typename T> class ArrayRef;
122class DemandedBits;
123template <typename InstTy> class InterleaveGroup;
124class IRBuilderBase;
125class Loop;
126class TargetTransformInfo;
127class Value;
128
129namespace Intrinsic {
130typedef unsigned ID;
131}
132
133/// Identify if the intrinsic is trivially vectorizable.
134/// This method returns true if the intrinsic's argument types are all scalars
135/// for the scalar form of the intrinsic and all vectors (or scalars handled by
136/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
137///
138/// Note: isTriviallyVectorizable implies isTriviallyScalarizable.
140
141/// Identify if the intrinsic is trivially scalarizable.
142/// This method returns true following the same predicates of
143/// isTriviallyVectorizable.
144
145/// Note: There are intrinsics where implementing vectorization for the
146/// intrinsic is redundant, but we want to implement scalarization of the
147/// vector. To prevent the requirement that an intrinsic also implements
148/// vectorization we provide this separate function.
150 const TargetTransformInfo *TTI);
151
152/// Identifies if the vector form of the intrinsic has a scalar operand.
153/// \p TTI is used to consider target specific intrinsics, if no target specific
154/// intrinsics will be considered then it is appropriate to pass in nullptr.
155LLVM_ABI bool
157 const TargetTransformInfo *TTI);
158
159/// Identifies if the vector form of the intrinsic is overloaded on the type of
160/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
161/// \p TTI is used to consider target specific intrinsics, if no target specific
162/// intrinsics will be considered then it is appropriate to pass in nullptr.
163LLVM_ABI bool
165 const TargetTransformInfo *TTI);
166
167/// Identifies if the vector form of the intrinsic that returns a struct is
168/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to
169/// consider target specific intrinsics, if no target specific intrinsics
170/// will be considered then it is appropriate to pass in nullptr.
172 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI);
173
174/// Returns intrinsic ID for call.
175/// For the input call instruction it finds mapping intrinsic and returns
176/// its intrinsic ID, in case it does not found it return not_intrinsic.
179
180/// Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
182
183/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
185
186/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each
187/// factor.
189
190/// Given a vector and an element number, see if the scalar value is
191/// already around as a register, for example if it were inserted then extracted
192/// from the vector.
193LLVM_ABI Value *findScalarElement(Value *V, unsigned EltNo);
194
195/// If all non-negative \p Mask elements are the same value, return that value.
196/// If all elements are negative (undefined) or \p Mask contains different
197/// non-negative values, return -1.
199
200/// Get splat value if the input is a splat vector or return nullptr.
201/// The value may be extracted from a splat constants vector or from
202/// a sequence of instructions that broadcast a single value into a vector.
204
205/// Return true if each element of the vector value \p V is poisoned or equal to
206/// every other non-poisoned element. If an index element is specified, either
207/// every element of the vector is poisoned or the element at that index is not
208/// poisoned and equal to every other non-poisoned element.
209/// This may be more powerful than the related getSplatValue() because it is
210/// not limited by finding a scalar source value to a splatted vector.
211LLVM_ABI bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
212
213/// Transform a shuffle mask's output demanded element mask into demanded
214/// element masks for the 2 operands, returns false if the mask isn't valid.
215/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
216/// \p AllowUndefElts permits "-1" indices to be treated as undef.
217LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
218 const APInt &DemandedElts,
219 APInt &DemandedLHS, APInt &DemandedRHS,
220 bool AllowUndefElts = false);
221
222/// Does this shuffle mask represent either one slide shuffle or a pair of
223/// two slide shuffles, combined with a select on some constant vector mask?
224/// A slide is a shuffle mask which shifts some set of elements up or down
225/// the vector, with all other elements being undefined. An identity shuffle
226/// will be matched a slide by 0. The output parameter provides the source
227/// (-1 means no source), and slide direction for each slide.
228LLVM_ABI bool isMaskedSlidePair(ArrayRef<int> Mask, int NumElts,
229 std::array<std::pair<int, int>, 2> &SrcInfo);
230
231/// Replace each shuffle mask index with the scaled sequential indices for an
232/// equivalent mask of narrowed elements. Mask elements that are less than 0
233/// (sentinel values) are repeated in the output mask.
234///
235/// Example with Scale = 4:
236/// <4 x i32> <3, 2, 0, -1> -->
237/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
238///
239/// This is the reverse process of widening shuffle mask elements, but it always
240/// succeeds because the indexes can always be multiplied (scaled up) to map to
241/// narrower vector elements.
242LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
243 SmallVectorImpl<int> &ScaledMask);
244
245/// Try to transform a shuffle mask by replacing elements with the scaled index
246/// for an equivalent mask of widened elements. If all mask elements that would
247/// map to a wider element of the new mask are the same negative number
248/// (sentinel value), that element of the new mask is the same value. If any
249/// element in a given slice is negative and some other element in that slice is
250/// not the same value, return false (partial matches with sentinel values are
251/// not allowed).
252///
253/// Example with Scale = 4:
254/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
255/// <4 x i32> <3, 2, 0, -1>
256///
257/// This is the reverse process of narrowing shuffle mask elements if it
258/// succeeds. This transform is not always possible because indexes may not
259/// divide evenly (scale down) to map to wider vector elements.
260LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
261 SmallVectorImpl<int> &ScaledMask);
262
263/// A variant of the previous method which is specialized for Scale=2, and
264/// treats -1 as undef and allows widening when a wider element is partially
265/// undef in the narrow form of the mask. This transformation discards
266/// information about which bytes in the original shuffle were undef.
268 SmallVectorImpl<int> &NewMask);
269
270/// Attempt to narrow/widen the \p Mask shuffle mask to the \p NumDstElts target
271/// width. Internally this will call narrowShuffleMaskElts/widenShuffleMaskElts.
272/// This will assert unless NumDstElts is a multiple of Mask.size (or
273/// vice-versa). Returns false on failure, and ScaledMask will be in an
274/// undefined state.
275LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
276 SmallVectorImpl<int> &ScaledMask);
277
278/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
279/// to get the shuffle mask with widest possible elements.
281 SmallVectorImpl<int> &ScaledMask);
282
283/// Splits and processes shuffle mask depending on the number of input and
284/// output registers. The function does 2 main things: 1) splits the
285/// source/destination vectors into real registers; 2) do the mask analysis to
286/// identify which real registers are permuted. Then the function processes
287/// resulting registers mask using provided action items. If no input register
288/// is defined, \p NoInputAction action is used. If only 1 input register is
289/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
290/// process > 2 input registers and masks.
291/// \param Mask Original shuffle mask.
292/// \param NumOfSrcRegs Number of source registers.
293/// \param NumOfDestRegs Number of destination registers.
294/// \param NumOfUsedRegs Number of actually used destination registers.
296 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
297 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
298 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
299 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
300 ManyInputsAction);
301
302/// Compute the demanded elements mask of horizontal binary operations. A
303/// horizontal operation combines two adjacent elements in a vector operand.
304/// This function returns a mask for the elements that correspond to the first
305/// operand of this horizontal combination. For example, for two vectors
306/// [X1, X2, X3, X4] and [Y1, Y2, Y3, Y4], the resulting mask can include the
307/// elements X1, X3, Y1, and Y3. To get the other operands, simply shift the
308/// result of this function to the left by 1.
309///
310/// \param VectorBitWidth the total bit width of the vector
311/// \param DemandedElts the demanded elements mask for the operation
312/// \param DemandedLHS the demanded elements mask for the left operand
313/// \param DemandedRHS the demanded elements mask for the right operand
314LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
315 const APInt &DemandedElts,
316 APInt &DemandedLHS,
317 APInt &DemandedRHS);
318
319/// Compute a map of integer instructions to their minimum legal type
320/// size.
321///
322/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
323/// type (e.g. i32) whenever arithmetic is performed on them.
324///
325/// For targets with native i8 or i16 operations, usually InstCombine can shrink
326/// the arithmetic type down again. However InstCombine refuses to create
327/// illegal types, so for targets without i8 or i16 registers, the lengthening
328/// and shrinking remains.
329///
330/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
331/// their scalar equivalents do not, so during vectorization it is important to
332/// remove these lengthens and truncates when deciding the profitability of
333/// vectorization.
334///
335/// This function analyzes the given range of instructions and determines the
336/// minimum type size each can be converted to. It attempts to remove or
337/// minimize type size changes across each def-use chain, so for example in the
338/// following code:
339///
340/// %1 = load i8, i8*
341/// %2 = add i8 %1, 2
342/// %3 = load i16, i16*
343/// %4 = zext i8 %2 to i32
344/// %5 = zext i16 %3 to i32
345/// %6 = add i32 %4, %5
346/// %7 = trunc i32 %6 to i16
347///
348/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
349/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
350///
351/// If the optional TargetTransformInfo is provided, this function tries harder
352/// to do less work by only looking at illegal types.
355 const TargetTransformInfo *TTI = nullptr);
356
357/// Compute the union of two access-group lists.
358///
359/// If the list contains just one access group, it is returned directly. If the
360/// list is empty, returns nullptr.
361LLVM_ABI MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
362
363/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
364/// are both in. If either instruction does not access memory at all, it is
365/// considered to be in every list.
366///
367/// If the list contains just one access group, it is returned directly. If the
368/// list is empty, returns nullptr.
370 const Instruction *Inst2);
371
372/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
373/// vectorization. It can be preserved after vectorization if the kind is one of
374/// [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,
375/// MD_access_group, MD_mmra].
377 Instruction *Inst,
378 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata);
379
380/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
381/// MD_nontemporal, MD_access_group, MD_mmra].
382/// For K in Kinds, we get the MDNode for K from each of the
383/// elements of VL, compute their "intersection" (i.e., the most generic
384/// metadata value that covers all of the individual values), and set I's
385/// metadata for M equal to the intersection value.
386///
387/// This function always sets a (possibly null) value for each K in Kinds.
389
390/// Create a mask that filters the members of an interleave group where there
391/// are gaps.
392///
393/// For example, the mask for \p Group with interleave-factor 3
394/// and \p VF 4, that has only its first member present is:
395///
396/// <1,0,0,1,0,0,1,0,0,1,0,0>
397///
398/// Note: The result is a mask of 0's and 1's, as opposed to the other
399/// create[*]Mask() utilities which create a shuffle mask (mask that
400/// consists of indices).
402createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
403 const InterleaveGroup<Instruction> &Group);
404
405/// Create a mask with replicated elements.
406///
407/// This function creates a shuffle mask for replicating each of the \p VF
408/// elements in a vector \p ReplicationFactor times. It can be used to
409/// transform a mask of \p VF elements into a mask of
410/// \p VF * \p ReplicationFactor elements used by a predicated
411/// interleaved-group of loads/stores whose Interleaved-factor ==
412/// \p ReplicationFactor.
413///
414/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
415///
416/// <0,0,0,1,1,1,2,2,2,3,3,3>
418createReplicatedMask(unsigned ReplicationFactor, unsigned VF);
419
420/// Create an interleave shuffle mask.
421///
422/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
423/// vectorization factor \p VF into a single wide vector. The mask is of the
424/// form:
425///
426/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
427///
428/// For example, the mask for VF = 4 and NumVecs = 2 is:
429///
430/// <0, 4, 1, 5, 2, 6, 3, 7>.
432 unsigned NumVecs);
433
434/// Create a stride shuffle mask.
435///
436/// This function creates a shuffle mask whose elements begin at \p Start and
437/// are incremented by \p Stride. The mask can be used to deinterleave an
438/// interleaved vector into separate vectors of vectorization factor \p VF. The
439/// mask is of the form:
440///
441/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
442///
443/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
444///
445/// <0, 2, 4, 6>
447createStrideMask(unsigned Start, unsigned Stride, unsigned VF);
448
449/// Create a sequential shuffle mask.
450///
451/// This function creates shuffle mask whose elements are sequential and begin
452/// at \p Start. The mask contains \p NumInts integers and is padded with \p
453/// NumUndefs undef values. The mask is of the form:
454///
455/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
456///
457/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
458///
459/// <0, 1, 2, 3, undef, undef, undef, undef>
461createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
462
463/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
464/// mask assuming both operands are identical. This assumes that the unary
465/// shuffle will use elements from operand 0 (operand 1 will be unused).
467 unsigned NumElts);
468
469/// Concatenate a list of vectors.
470///
471/// This function generates code that concatenate the vectors in \p Vecs into a
472/// single large vector. The number of vectors should be greater than one, and
473/// their element types should be the same. The number of elements in the
474/// vectors should also be the same; however, if the last vector has fewer
475/// elements, it will be padded with undefs.
477 ArrayRef<Value *> Vecs);
478
479/// Given a mask vector of i1, Return true if all of the elements of this
480/// predicate mask are known to be false or undef. That is, return true if all
481/// lanes can be assumed inactive.
483
484/// Given a mask vector of i1, Return true if all of the elements of this
485/// predicate mask are known to be true or undef. That is, return true if all
486/// lanes can be assumed active.
488
489/// Given a mask vector of i1, Return true if any of the elements of this
490/// predicate mask are known to be true or undef. That is, return true if at
491/// least one lane can be assumed active.
493
494/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
495/// for each lane which may be active.
497
498/// The group of interleaved loads/stores sharing the same stride and
499/// close to each other.
500///
501/// Each member in this group has an index starting from 0, and the largest
502/// index should be less than interleaved factor, which is equal to the absolute
503/// value of the access's stride.
504///
505/// E.g. An interleaved load group of factor 4:
506/// for (unsigned i = 0; i < 1024; i+=4) {
507/// a = A[i]; // Member of index 0
508/// b = A[i+1]; // Member of index 1
509/// d = A[i+3]; // Member of index 3
510/// ...
511/// }
512///
513/// An interleaved store group of factor 4:
514/// for (unsigned i = 0; i < 1024; i+=4) {
515/// ...
516/// A[i] = a; // Member of index 0
517/// A[i+1] = b; // Member of index 1
518/// A[i+2] = c; // Member of index 2
519/// A[i+3] = d; // Member of index 3
520/// }
521///
522/// Note: the interleaved load group could have gaps (missing members), but
523/// the interleaved store group doesn't allow gaps.
524template <typename InstTy> class InterleaveGroup {
525public:
526 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
527 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
528 InsertPos(nullptr) {}
529
530 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
531 : Alignment(Alignment), InsertPos(Instr) {
532 Factor = std::abs(Stride);
533 assert(Factor > 1 && "Invalid interleave factor");
534
535 Reverse = Stride < 0;
536 Members[0] = Instr;
537 }
538
539 bool isReverse() const { return Reverse; }
540 uint32_t getFactor() const { return Factor; }
541 Align getAlign() const { return Alignment; }
542 uint32_t getNumMembers() const { return Members.size(); }
543
544 /// Try to insert a new member \p Instr with index \p Index and
545 /// alignment \p NewAlign. The index is related to the leader and it could be
546 /// negative if it is the new leader.
547 ///
548 /// \returns false if the instruction doesn't belong to the group.
549 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
550 // Make sure the key fits in an int32_t.
551 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
552 if (!MaybeKey)
553 return false;
554 int32_t Key = *MaybeKey;
555
556 // Skip if the key is used for either the tombstone or empty special values.
559 return false;
560
561 // Skip if there is already a member with the same index.
562 if (Members.contains(Key))
563 return false;
564
565 if (Key > LargestKey) {
566 // The largest index is always less than the interleave factor.
567 if (Index >= static_cast<int32_t>(Factor))
568 return false;
569
570 LargestKey = Key;
571 } else if (Key < SmallestKey) {
572
573 // Make sure the largest index fits in an int32_t.
574 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
575 if (!MaybeLargestIndex)
576 return false;
577
578 // The largest index is always less than the interleave factor.
579 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
580 return false;
581
582 SmallestKey = Key;
583 }
584
585 // It's always safe to select the minimum alignment.
586 Alignment = std::min(Alignment, NewAlign);
587 Members[Key] = Instr;
588 return true;
589 }
590
591 /// Get the member with the given index \p Index
592 ///
593 /// \returns nullptr if contains no such member.
594 InstTy *getMember(uint32_t Index) const {
595 int32_t Key = SmallestKey + Index;
596 return Members.lookup(Key);
597 }
598
599 /// Get the index for the given member. Unlike the key in the member
600 /// map, the index starts from 0.
601 uint32_t getIndex(const InstTy *Instr) const {
602 for (auto I : Members) {
603 if (I.second == Instr)
604 return I.first - SmallestKey;
605 }
606
607 llvm_unreachable("InterleaveGroup contains no such member");
608 }
609
610 InstTy *getInsertPos() const { return InsertPos; }
611 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
612
613 /// Add metadata (e.g. alias info) from the instructions in this group to \p
614 /// NewInst.
615 ///
616 /// FIXME: this function currently does not add noalias metadata a'la
617 /// addNewMedata. To do that we need to compute the intersection of the
618 /// noalias info from all members.
619 void addMetadata(InstTy *NewInst) const;
620
621 /// Returns true if this Group requires a scalar iteration to handle gaps.
623 // If the last member of the Group exists, then a scalar epilog is not
624 // needed for this group.
625 if (getMember(getFactor() - 1))
626 return false;
627
628 // We have a group with gaps. It therefore can't be a reversed access,
629 // because such groups get invalidated (TODO).
630 assert(!isReverse() && "Group should have been invalidated");
631
632 // This is a group of loads, with gaps, and without a last-member
633 return true;
634 }
635
636 /// Return true if this group is full, i.e. it has no gaps.
637 bool isFull() const { return getNumMembers() == getFactor(); }
638
639private:
640 uint32_t Factor; // Interleave Factor.
641 bool Reverse;
642 Align Alignment;
644 int32_t SmallestKey = 0;
645 int32_t LargestKey = 0;
646
647 // To avoid breaking dependences, vectorized instructions of an interleave
648 // group should be inserted at either the first load or the last store in
649 // program order.
650 //
651 // E.g. %even = load i32 // Insert Position
652 // %add = add i32 %even // Use of %even
653 // %odd = load i32
654 //
655 // store i32 %even
656 // %odd = add i32 // Def of %odd
657 // store i32 %odd // Insert Position
658 InstTy *InsertPos;
659};
660
661/// Drive the analysis of interleaved memory accesses in the loop.
662///
663/// Use this class to analyze interleaved accesses only when we can vectorize
664/// a loop. Otherwise it's meaningless to do analysis as the vectorization
665/// on interleaved accesses is unsafe.
666///
667/// The analysis collects interleave groups and records the relationships
668/// between the member and the group in a map.
670public:
672 DominatorTree *DT, LoopInfo *LI,
673 const LoopAccessInfo *LAI)
674 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
675
677
678 /// Analyze the interleaved accesses and collect them in interleave
679 /// groups. Substitute symbolic strides using \p Strides.
680 /// Consider also predicated loads/stores in the analysis if
681 /// \p EnableMaskedInterleavedGroup is true.
682 LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
683
684 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
685 /// contrary to original assumption. Although we currently prevent group
686 /// formation for predicated accesses, we may be able to relax this limitation
687 /// in the future once we handle more complicated blocks. Returns true if any
688 /// groups were invalidated.
690 if (InterleaveGroups.empty()) {
691 assert(
692 !RequiresScalarEpilogue &&
693 "RequiresScalarEpilog should not be set without interleave groups");
694 return false;
695 }
696
697 InterleaveGroupMap.clear();
698 for (auto *Ptr : InterleaveGroups)
699 delete Ptr;
700 InterleaveGroups.clear();
701 RequiresScalarEpilogue = false;
702 return true;
703 }
704
705 /// Check if \p Instr belongs to any interleave group.
706 bool isInterleaved(Instruction *Instr) const {
707 return InterleaveGroupMap.contains(Instr);
708 }
709
710 /// Get the interleave group that \p Instr belongs to.
711 ///
712 /// \returns nullptr if doesn't have such group.
714 getInterleaveGroup(const Instruction *Instr) const {
715 return InterleaveGroupMap.lookup(Instr);
716 }
717
720 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
721 }
722
723 /// Returns true if an interleaved group that may access memory
724 /// out-of-bounds requires a scalar epilogue iteration for correctness.
725 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
726
727 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
728 /// happen when optimizing for size forbids a scalar epilogue, and the gap
729 /// cannot be filtered by masking the load/store.
731
732 /// Returns true if we have any interleave groups.
733 bool hasGroups() const { return !InterleaveGroups.empty(); }
734
735private:
736 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
737 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
738 /// The interleaved access analysis can also add new predicates (for example
739 /// by versioning strides of pointers).
741
742 Loop *TheLoop;
743 DominatorTree *DT;
744 LoopInfo *LI;
745 const LoopAccessInfo *LAI;
746
747 /// True if the loop may contain non-reversed interleaved groups with
748 /// out-of-bounds accesses. We ensure we don't speculatively access memory
749 /// out-of-bounds by executing at least one scalar epilogue iteration.
750 bool RequiresScalarEpilogue = false;
751
752 /// Holds the relationships between the members and the interleave group.
754
755 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
756
757 /// Holds dependences among the memory accesses in the loop. It maps a source
758 /// access to a set of dependent sink accesses.
760
761 /// The descriptor for a strided memory access.
762 struct StrideDescriptor {
763 StrideDescriptor() = default;
764 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
765 Align Alignment)
766 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
767
768 // The access's stride. It is negative for a reverse access.
769 int64_t Stride = 0;
770
771 // The scalar expression of this access.
772 const SCEV *Scev = nullptr;
773
774 // The size of the memory object.
775 uint64_t Size = 0;
776
777 // The alignment of this access.
778 Align Alignment;
779 };
780
781 /// A type for holding instructions and their stride descriptors.
782 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
783
784 /// Create a new interleave group with the given instruction \p Instr,
785 /// stride \p Stride and alignment \p Align.
786 ///
787 /// \returns the newly created interleave group.
788 InterleaveGroup<Instruction> *
789 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
790 auto [It, Inserted] = InterleaveGroupMap.try_emplace(Instr);
791 assert(Inserted && "Already in an interleaved access group");
792 It->second = new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
793 InterleaveGroups.insert(It->second);
794 return It->second;
795 }
796
797 /// Release the group and remove all the relationships.
798 void releaseGroup(InterleaveGroup<Instruction> *Group) {
799 InterleaveGroups.erase(Group);
800 releaseGroupWithoutRemovingFromSet(Group);
801 }
802
803 /// Do everything necessary to release the group, apart from removing it from
804 /// the InterleaveGroups set.
805 void releaseGroupWithoutRemovingFromSet(InterleaveGroup<Instruction> *Group) {
806 for (unsigned i = 0; i < Group->getFactor(); i++)
807 if (Instruction *Member = Group->getMember(i))
808 InterleaveGroupMap.erase(Member);
809
810 delete Group;
811 }
812
813 /// Collect all the accesses with a constant stride in program order.
814 void collectConstStrideAccesses(
815 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
816 const DenseMap<Value *, const SCEV *> &Strides);
817
818 /// Returns true if \p Stride is allowed in an interleaved group.
819 LLVM_ABI static bool isStrided(int Stride);
820
821 /// Returns true if \p BB is a predicated block.
822 bool isPredicated(BasicBlock *BB) const {
823 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
824 }
825
826 /// Returns true if LoopAccessInfo can be used for dependence queries.
827 bool areDependencesValid() const {
828 return LAI && LAI->getDepChecker().getDependences();
829 }
830
831 /// Returns true if memory accesses \p A and \p B can be reordered, if
832 /// necessary, when constructing interleaved groups.
833 ///
834 /// \p A must precede \p B in program order. We return false if reordering is
835 /// not necessary or is prevented because \p A and \p B may be dependent.
836 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
837 StrideEntry *B) const {
838 // Code motion for interleaved accesses can potentially hoist strided loads
839 // and sink strided stores. The code below checks the legality of the
840 // following two conditions:
841 //
842 // 1. Potentially moving a strided load (B) before any store (A) that
843 // precedes B, or
844 //
845 // 2. Potentially moving a strided store (A) after any load or store (B)
846 // that A precedes.
847 //
848 // It's legal to reorder A and B if we know there isn't a dependence from A
849 // to B. Note that this determination is conservative since some
850 // dependences could potentially be reordered safely.
851
852 // A is potentially the source of a dependence.
853 auto *Src = A->first;
854 auto SrcDes = A->second;
855
856 // B is potentially the sink of a dependence.
857 auto *Sink = B->first;
858 auto SinkDes = B->second;
859
860 // Code motion for interleaved accesses can't violate WAR dependences.
861 // Thus, reordering is legal if the source isn't a write.
862 if (!Src->mayWriteToMemory())
863 return true;
864
865 // At least one of the accesses must be strided.
866 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
867 return true;
868
869 // If dependence information is not available from LoopAccessInfo,
870 // conservatively assume the instructions can't be reordered.
871 if (!areDependencesValid())
872 return false;
873
874 // If we know there is a dependence from source to sink, assume the
875 // instructions can't be reordered. Otherwise, reordering is legal.
876 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
877 }
878
879 /// Collect the dependences from LoopAccessInfo.
880 ///
881 /// We process the dependences once during the interleaved access analysis to
882 /// enable constant-time dependence queries.
883 void collectDependences() {
884 if (!areDependencesValid())
885 return;
886 const auto &DepChecker = LAI->getDepChecker();
887 auto *Deps = DepChecker.getDependences();
888 for (auto Dep : *Deps)
889 Dependences[Dep.getSource(DepChecker)].insert(
890 Dep.getDestination(DepChecker));
891 }
892};
893
894} // llvm namespace
895
896#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_ABI
Definition Compiler.h:213
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file defines the SmallVector class.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
Definition Constant.h:43
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
const Function & getFunction() const
Definition Function.h:164
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
The group of interleaved loads/stores sharing the same stride and close to each other.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
InstTy * getInsertPos() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool hasGroups() const
Returns true if we have any interleave groups.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
A wrapper class for inspecting calls to intrinsic functions.
Drive the analysis of memory accesses in the loop.
static LLVM_ABI bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1077
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Root of the metadata hierarchy.
Definition Metadata.h:63
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:74
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
An efficient, type-erasing, non-owning reference to a callable.
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
LLVM_ABI void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Holds the VFShape for a specific scalar to vector function mapping.
Contains the information about the kind of vectorization available.
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.