LLVM 19.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
21
22namespace llvm {
23class TargetLibraryInfo;
24
25/// The Vector Function Database.
26///
27/// Helper class used to find the vector functions associated to a
28/// scalar CallInst.
30 /// The Module of the CallInst CI.
31 const Module *M;
32 /// The CallInst instance being queried for scalar to vector mappings.
33 const CallInst &CI;
34 /// List of vector functions descriptors associated to the call
35 /// instruction.
36 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
37
38 /// Retrieve the scalar-to-vector mappings associated to the rule of
39 /// a vector Function ABI.
40 static void getVFABIMappings(const CallInst &CI,
41 SmallVectorImpl<VFInfo> &Mappings) {
42 if (!CI.getCalledFunction())
43 return;
44
45 const StringRef ScalarName = CI.getCalledFunction()->getName();
46
47 SmallVector<std::string, 8> ListOfStrings;
48 // The check for the vector-function-abi-variant attribute is done when
49 // retrieving the vector variant names here.
50 VFABI::getVectorVariantNames(CI, ListOfStrings);
51 if (ListOfStrings.empty())
52 return;
53 for (const auto &MangledName : ListOfStrings) {
54 const std::optional<VFInfo> Shape =
56 // A match is found via scalar and vector names, and also by
57 // ensuring that the variant described in the attribute has a
58 // corresponding definition or declaration of the vector
59 // function in the Module M.
60 if (Shape && (Shape->ScalarName == ScalarName)) {
61 assert(CI.getModule()->getFunction(Shape->VectorName) &&
62 "Vector function is missing.");
63 Mappings.push_back(*Shape);
64 }
65 }
66 }
67
68public:
69 /// Retrieve all the VFInfo instances associated to the CallInst CI.
72
73 // Get mappings from the Vector Function ABI variants.
74 getVFABIMappings(CI, Ret);
75
76 // Other non-VFABI variants should be retrieved here.
77
78 return Ret;
79 }
80
81 static bool hasMaskedVariant(const CallInst &CI,
82 std::optional<ElementCount> VF = std::nullopt) {
83 // Check whether we have at least one masked vector version of a scalar
84 // function. If no VF is specified then we check for any masked variant,
85 // otherwise we look for one that matches the supplied VF.
86 auto Mappings = VFDatabase::getMappings(CI);
87 for (VFInfo Info : Mappings)
88 if (!VF || Info.Shape.VF == *VF)
89 if (Info.isMasked())
90 return true;
91
92 return false;
93 }
94
95 /// Constructor, requires a CallInst instance.
97 : M(CI.getModule()), CI(CI),
98 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
99 /// \defgroup VFDatabase query interface.
100 ///
101 /// @{
102 /// Retrieve the Function with VFShape \p Shape.
104 if (Shape == VFShape::getScalarShape(CI.getFunctionType()))
105 return CI.getCalledFunction();
106
107 for (const auto &Info : ScalarToVectorMappings)
108 if (Info.Shape == Shape)
109 return M->getFunction(Info.VectorName);
110
111 return nullptr;
112 }
113 /// @}
114};
115
116template <typename T> class ArrayRef;
117class DemandedBits;
118template <typename InstTy> class InterleaveGroup;
119class IRBuilderBase;
120class Loop;
121class ScalarEvolution;
122class TargetTransformInfo;
123class Type;
124class Value;
125
126namespace Intrinsic {
127typedef unsigned ID;
128}
129
130/// A helper function for converting Scalar types to vector types. If
131/// the incoming type is void, we return void. If the EC represents a
132/// scalar, we return the scalar type.
133inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
134 if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
135 return Scalar;
136 return VectorType::get(Scalar, EC);
137}
138
139inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
140 return ToVectorTy(Scalar, ElementCount::getFixed(VF));
141}
142
143/// Identify if the intrinsic is trivially vectorizable.
144/// This method returns true if the intrinsic's argument types are all scalars
145/// for the scalar form of the intrinsic and all vectors (or scalars handled by
146/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
148
149/// Identifies if the vector form of the intrinsic has a scalar operand.
151 unsigned ScalarOpdIdx);
152
153/// Identifies if the vector form of the intrinsic is overloaded on the type of
154/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
156
157/// Returns intrinsic ID for call.
158/// For the input call instruction it finds mapping intrinsic and returns
159/// its intrinsic ID, in case it does not found it return not_intrinsic.
161 const TargetLibraryInfo *TLI);
162
163/// Given a vector and an element number, see if the scalar value is
164/// already around as a register, for example if it were inserted then extracted
165/// from the vector.
166Value *findScalarElement(Value *V, unsigned EltNo);
167
168/// If all non-negative \p Mask elements are the same value, return that value.
169/// If all elements are negative (undefined) or \p Mask contains different
170/// non-negative values, return -1.
171int getSplatIndex(ArrayRef<int> Mask);
172
173/// Get splat value if the input is a splat vector or return nullptr.
174/// The value may be extracted from a splat constants vector or from
175/// a sequence of instructions that broadcast a single value into a vector.
176Value *getSplatValue(const Value *V);
177
178/// Return true if each element of the vector value \p V is poisoned or equal to
179/// every other non-poisoned element. If an index element is specified, either
180/// every element of the vector is poisoned or the element at that index is not
181/// poisoned and equal to every other non-poisoned element.
182/// This may be more powerful than the related getSplatValue() because it is
183/// not limited by finding a scalar source value to a splatted vector.
184bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
185
186/// Transform a shuffle mask's output demanded element mask into demanded
187/// element masks for the 2 operands, returns false if the mask isn't valid.
188/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
189/// \p AllowUndefElts permits "-1" indices to be treated as undef.
190bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
191 const APInt &DemandedElts, APInt &DemandedLHS,
192 APInt &DemandedRHS, bool AllowUndefElts = false);
193
194/// Replace each shuffle mask index with the scaled sequential indices for an
195/// equivalent mask of narrowed elements. Mask elements that are less than 0
196/// (sentinel values) are repeated in the output mask.
197///
198/// Example with Scale = 4:
199/// <4 x i32> <3, 2, 0, -1> -->
200/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
201///
202/// This is the reverse process of widening shuffle mask elements, but it always
203/// succeeds because the indexes can always be multiplied (scaled up) to map to
204/// narrower vector elements.
205void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
206 SmallVectorImpl<int> &ScaledMask);
207
208/// Try to transform a shuffle mask by replacing elements with the scaled index
209/// for an equivalent mask of widened elements. If all mask elements that would
210/// map to a wider element of the new mask are the same negative number
211/// (sentinel value), that element of the new mask is the same value. If any
212/// element in a given slice is negative and some other element in that slice is
213/// not the same value, return false (partial matches with sentinel values are
214/// not allowed).
215///
216/// Example with Scale = 4:
217/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
218/// <4 x i32> <3, 2, 0, -1>
219///
220/// This is the reverse process of narrowing shuffle mask elements if it
221/// succeeds. This transform is not always possible because indexes may not
222/// divide evenly (scale down) to map to wider vector elements.
223bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
224 SmallVectorImpl<int> &ScaledMask);
225
226/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
227/// to get the shuffle mask with widest possible elements.
228void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,
229 SmallVectorImpl<int> &ScaledMask);
230
231/// Splits and processes shuffle mask depending on the number of input and
232/// output registers. The function does 2 main things: 1) splits the
233/// source/destination vectors into real registers; 2) do the mask analysis to
234/// identify which real registers are permuted. Then the function processes
235/// resulting registers mask using provided action items. If no input register
236/// is defined, \p NoInputAction action is used. If only 1 input register is
237/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
238/// process > 2 input registers and masks.
239/// \param Mask Original shuffle mask.
240/// \param NumOfSrcRegs Number of source registers.
241/// \param NumOfDestRegs Number of destination registers.
242/// \param NumOfUsedRegs Number of actually used destination registers.
244 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
245 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
246 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
247 function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
248
249/// Compute a map of integer instructions to their minimum legal type
250/// size.
251///
252/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
253/// type (e.g. i32) whenever arithmetic is performed on them.
254///
255/// For targets with native i8 or i16 operations, usually InstCombine can shrink
256/// the arithmetic type down again. However InstCombine refuses to create
257/// illegal types, so for targets without i8 or i16 registers, the lengthening
258/// and shrinking remains.
259///
260/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
261/// their scalar equivalents do not, so during vectorization it is important to
262/// remove these lengthens and truncates when deciding the profitability of
263/// vectorization.
264///
265/// This function analyzes the given range of instructions and determines the
266/// minimum type size each can be converted to. It attempts to remove or
267/// minimize type size changes across each def-use chain, so for example in the
268/// following code:
269///
270/// %1 = load i8, i8*
271/// %2 = add i8 %1, 2
272/// %3 = load i16, i16*
273/// %4 = zext i8 %2 to i32
274/// %5 = zext i16 %3 to i32
275/// %6 = add i32 %4, %5
276/// %7 = trunc i32 %6 to i16
277///
278/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
279/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
280///
281/// If the optional TargetTransformInfo is provided, this function tries harder
282/// to do less work by only looking at illegal types.
283MapVector<Instruction*, uint64_t>
284computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
285 DemandedBits &DB,
286 const TargetTransformInfo *TTI=nullptr);
287
288/// Compute the union of two access-group lists.
289///
290/// If the list contains just one access group, it is returned directly. If the
291/// list is empty, returns nullptr.
292MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
293
294/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
295/// are both in. If either instruction does not access memory at all, it is
296/// considered to be in every list.
297///
298/// If the list contains just one access group, it is returned directly. If the
299/// list is empty, returns nullptr.
300MDNode *intersectAccessGroups(const Instruction *Inst1,
301 const Instruction *Inst2);
302
303/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
304/// MD_nontemporal, MD_access_group].
305/// For K in Kinds, we get the MDNode for K from each of the
306/// elements of VL, compute their "intersection" (i.e., the most generic
307/// metadata value that covers all of the individual values), and set I's
308/// metadata for M equal to the intersection value.
309///
310/// This function always sets a (possibly null) value for each K in Kinds.
311Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
312
313/// Create a mask that filters the members of an interleave group where there
314/// are gaps.
315///
316/// For example, the mask for \p Group with interleave-factor 3
317/// and \p VF 4, that has only its first member present is:
318///
319/// <1,0,0,1,0,0,1,0,0,1,0,0>
320///
321/// Note: The result is a mask of 0's and 1's, as opposed to the other
322/// create[*]Mask() utilities which create a shuffle mask (mask that
323/// consists of indices).
324Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
325 const InterleaveGroup<Instruction> &Group);
326
327/// Create a mask with replicated elements.
328///
329/// This function creates a shuffle mask for replicating each of the \p VF
330/// elements in a vector \p ReplicationFactor times. It can be used to
331/// transform a mask of \p VF elements into a mask of
332/// \p VF * \p ReplicationFactor elements used by a predicated
333/// interleaved-group of loads/stores whose Interleaved-factor ==
334/// \p ReplicationFactor.
335///
336/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
337///
338/// <0,0,0,1,1,1,2,2,2,3,3,3>
339llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
340 unsigned VF);
341
342/// Create an interleave shuffle mask.
343///
344/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
345/// vectorization factor \p VF into a single wide vector. The mask is of the
346/// form:
347///
348/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
349///
350/// For example, the mask for VF = 4 and NumVecs = 2 is:
351///
352/// <0, 4, 1, 5, 2, 6, 3, 7>.
353llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
354
355/// Create a stride shuffle mask.
356///
357/// This function creates a shuffle mask whose elements begin at \p Start and
358/// are incremented by \p Stride. The mask can be used to deinterleave an
359/// interleaved vector into separate vectors of vectorization factor \p VF. The
360/// mask is of the form:
361///
362/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
363///
364/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
365///
366/// <0, 2, 4, 6>
367llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
368 unsigned VF);
369
370/// Create a sequential shuffle mask.
371///
372/// This function creates shuffle mask whose elements are sequential and begin
373/// at \p Start. The mask contains \p NumInts integers and is padded with \p
374/// NumUndefs undef values. The mask is of the form:
375///
376/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
377///
378/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
379///
380/// <0, 1, 2, 3, undef, undef, undef, undef>
382createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
383
384/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
385/// mask assuming both operands are identical. This assumes that the unary
386/// shuffle will use elements from operand 0 (operand 1 will be unused).
388 unsigned NumElts);
389
390/// Concatenate a list of vectors.
391///
392/// This function generates code that concatenate the vectors in \p Vecs into a
393/// single large vector. The number of vectors should be greater than one, and
394/// their element types should be the same. The number of elements in the
395/// vectors should also be the same; however, if the last vector has fewer
396/// elements, it will be padded with undefs.
397Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
398
399/// Given a mask vector of i1, Return true if all of the elements of this
400/// predicate mask are known to be false or undef. That is, return true if all
401/// lanes can be assumed inactive.
402bool maskIsAllZeroOrUndef(Value *Mask);
403
404/// Given a mask vector of i1, Return true if all of the elements of this
405/// predicate mask are known to be true or undef. That is, return true if all
406/// lanes can be assumed active.
407bool maskIsAllOneOrUndef(Value *Mask);
408
409/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
410/// for each lane which may be active.
411APInt possiblyDemandedEltsInMask(Value *Mask);
412
413/// The group of interleaved loads/stores sharing the same stride and
414/// close to each other.
415///
416/// Each member in this group has an index starting from 0, and the largest
417/// index should be less than interleaved factor, which is equal to the absolute
418/// value of the access's stride.
419///
420/// E.g. An interleaved load group of factor 4:
421/// for (unsigned i = 0; i < 1024; i+=4) {
422/// a = A[i]; // Member of index 0
423/// b = A[i+1]; // Member of index 1
424/// d = A[i+3]; // Member of index 3
425/// ...
426/// }
427///
428/// An interleaved store group of factor 4:
429/// for (unsigned i = 0; i < 1024; i+=4) {
430/// ...
431/// A[i] = a; // Member of index 0
432/// A[i+1] = b; // Member of index 1
433/// A[i+2] = c; // Member of index 2
434/// A[i+3] = d; // Member of index 3
435/// }
436///
437/// Note: the interleaved load group could have gaps (missing members), but
438/// the interleaved store group doesn't allow gaps.
439template <typename InstTy> class InterleaveGroup {
440public:
441 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
442 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
443 InsertPos(nullptr) {}
444
445 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
446 : Alignment(Alignment), InsertPos(Instr) {
447 Factor = std::abs(Stride);
448 assert(Factor > 1 && "Invalid interleave factor");
449
450 Reverse = Stride < 0;
451 Members[0] = Instr;
452 }
453
454 bool isReverse() const { return Reverse; }
455 uint32_t getFactor() const { return Factor; }
456 Align getAlign() const { return Alignment; }
457 uint32_t getNumMembers() const { return Members.size(); }
458
459 /// Try to insert a new member \p Instr with index \p Index and
460 /// alignment \p NewAlign. The index is related to the leader and it could be
461 /// negative if it is the new leader.
462 ///
463 /// \returns false if the instruction doesn't belong to the group.
464 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
465 // Make sure the key fits in an int32_t.
466 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
467 if (!MaybeKey)
468 return false;
469 int32_t Key = *MaybeKey;
470
471 // Skip if the key is used for either the tombstone or empty special values.
474 return false;
475
476 // Skip if there is already a member with the same index.
477 if (Members.contains(Key))
478 return false;
479
480 if (Key > LargestKey) {
481 // The largest index is always less than the interleave factor.
482 if (Index >= static_cast<int32_t>(Factor))
483 return false;
484
485 LargestKey = Key;
486 } else if (Key < SmallestKey) {
487
488 // Make sure the largest index fits in an int32_t.
489 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
490 if (!MaybeLargestIndex)
491 return false;
492
493 // The largest index is always less than the interleave factor.
494 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
495 return false;
496
497 SmallestKey = Key;
498 }
499
500 // It's always safe to select the minimum alignment.
501 Alignment = std::min(Alignment, NewAlign);
502 Members[Key] = Instr;
503 return true;
504 }
505
506 /// Get the member with the given index \p Index
507 ///
508 /// \returns nullptr if contains no such member.
509 InstTy *getMember(uint32_t Index) const {
510 int32_t Key = SmallestKey + Index;
511 return Members.lookup(Key);
512 }
513
514 /// Get the index for the given member. Unlike the key in the member
515 /// map, the index starts from 0.
516 uint32_t getIndex(const InstTy *Instr) const {
517 for (auto I : Members) {
518 if (I.second == Instr)
519 return I.first - SmallestKey;
520 }
521
522 llvm_unreachable("InterleaveGroup contains no such member");
523 }
524
525 InstTy *getInsertPos() const { return InsertPos; }
526 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
527
528 /// Add metadata (e.g. alias info) from the instructions in this group to \p
529 /// NewInst.
530 ///
531 /// FIXME: this function currently does not add noalias metadata a'la
532 /// addNewMedata. To do that we need to compute the intersection of the
533 /// noalias info from all members.
534 void addMetadata(InstTy *NewInst) const;
535
536 /// Returns true if this Group requires a scalar iteration to handle gaps.
538 // If the last member of the Group exists, then a scalar epilog is not
539 // needed for this group.
540 if (getMember(getFactor() - 1))
541 return false;
542
543 // We have a group with gaps. It therefore can't be a reversed access,
544 // because such groups get invalidated (TODO).
545 assert(!isReverse() && "Group should have been invalidated");
546
547 // This is a group of loads, with gaps, and without a last-member
548 return true;
549 }
550
551private:
552 uint32_t Factor; // Interleave Factor.
553 bool Reverse;
554 Align Alignment;
556 int32_t SmallestKey = 0;
557 int32_t LargestKey = 0;
558
559 // To avoid breaking dependences, vectorized instructions of an interleave
560 // group should be inserted at either the first load or the last store in
561 // program order.
562 //
563 // E.g. %even = load i32 // Insert Position
564 // %add = add i32 %even // Use of %even
565 // %odd = load i32
566 //
567 // store i32 %even
568 // %odd = add i32 // Def of %odd
569 // store i32 %odd // Insert Position
570 InstTy *InsertPos;
571};
572
573/// Drive the analysis of interleaved memory accesses in the loop.
574///
575/// Use this class to analyze interleaved accesses only when we can vectorize
576/// a loop. Otherwise it's meaningless to do analysis as the vectorization
577/// on interleaved accesses is unsafe.
578///
579/// The analysis collects interleave groups and records the relationships
580/// between the member and the group in a map.
582public:
584 DominatorTree *DT, LoopInfo *LI,
585 const LoopAccessInfo *LAI)
586 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
587
589
590 /// Analyze the interleaved accesses and collect them in interleave
591 /// groups. Substitute symbolic strides using \p Strides.
592 /// Consider also predicated loads/stores in the analysis if
593 /// \p EnableMaskedInterleavedGroup is true.
594 void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
595
596 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
597 /// contrary to original assumption. Although we currently prevent group
598 /// formation for predicated accesses, we may be able to relax this limitation
599 /// in the future once we handle more complicated blocks. Returns true if any
600 /// groups were invalidated.
602 if (InterleaveGroups.empty()) {
603 assert(
604 !RequiresScalarEpilogue &&
605 "RequiresScalarEpilog should not be set without interleave groups");
606 return false;
607 }
608
609 InterleaveGroupMap.clear();
610 for (auto *Ptr : InterleaveGroups)
611 delete Ptr;
612 InterleaveGroups.clear();
613 RequiresScalarEpilogue = false;
614 return true;
615 }
616
617 /// Check if \p Instr belongs to any interleave group.
618 bool isInterleaved(Instruction *Instr) const {
619 return InterleaveGroupMap.contains(Instr);
620 }
621
622 /// Get the interleave group that \p Instr belongs to.
623 ///
624 /// \returns nullptr if doesn't have such group.
626 getInterleaveGroup(const Instruction *Instr) const {
627 return InterleaveGroupMap.lookup(Instr);
628 }
629
632 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
633 }
634
635 /// Returns true if an interleaved group that may access memory
636 /// out-of-bounds requires a scalar epilogue iteration for correctness.
637 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
638
639 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
640 /// happen when optimizing for size forbids a scalar epilogue, and the gap
641 /// cannot be filtered by masking the load/store.
643
644 /// Returns true if we have any interleave groups.
645 bool hasGroups() const { return !InterleaveGroups.empty(); }
646
647private:
648 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
649 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
650 /// The interleaved access analysis can also add new predicates (for example
651 /// by versioning strides of pointers).
653
654 Loop *TheLoop;
655 DominatorTree *DT;
656 LoopInfo *LI;
657 const LoopAccessInfo *LAI;
658
659 /// True if the loop may contain non-reversed interleaved groups with
660 /// out-of-bounds accesses. We ensure we don't speculatively access memory
661 /// out-of-bounds by executing at least one scalar epilogue iteration.
662 bool RequiresScalarEpilogue = false;
663
664 /// Holds the relationships between the members and the interleave group.
666
667 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
668
669 /// Holds dependences among the memory accesses in the loop. It maps a source
670 /// access to a set of dependent sink accesses.
672
673 /// The descriptor for a strided memory access.
674 struct StrideDescriptor {
675 StrideDescriptor() = default;
676 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
677 Align Alignment)
678 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
679
680 // The access's stride. It is negative for a reverse access.
681 int64_t Stride = 0;
682
683 // The scalar expression of this access.
684 const SCEV *Scev = nullptr;
685
686 // The size of the memory object.
687 uint64_t Size = 0;
688
689 // The alignment of this access.
690 Align Alignment;
691 };
692
693 /// A type for holding instructions and their stride descriptors.
694 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
695
696 /// Create a new interleave group with the given instruction \p Instr,
697 /// stride \p Stride and alignment \p Align.
698 ///
699 /// \returns the newly created interleave group.
700 InterleaveGroup<Instruction> *
701 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
702 assert(!InterleaveGroupMap.count(Instr) &&
703 "Already in an interleaved access group");
704 InterleaveGroupMap[Instr] =
705 new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
706 InterleaveGroups.insert(InterleaveGroupMap[Instr]);
707 return InterleaveGroupMap[Instr];
708 }
709
710 /// Release the group and remove all the relationships.
711 void releaseGroup(InterleaveGroup<Instruction> *Group) {
712 for (unsigned i = 0; i < Group->getFactor(); i++)
713 if (Instruction *Member = Group->getMember(i))
714 InterleaveGroupMap.erase(Member);
715
716 InterleaveGroups.erase(Group);
717 delete Group;
718 }
719
720 /// Collect all the accesses with a constant stride in program order.
721 void collectConstStrideAccesses(
722 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
723 const DenseMap<Value *, const SCEV *> &Strides);
724
725 /// Returns true if \p Stride is allowed in an interleaved group.
726 static bool isStrided(int Stride);
727
728 /// Returns true if \p BB is a predicated block.
729 bool isPredicated(BasicBlock *BB) const {
730 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
731 }
732
733 /// Returns true if LoopAccessInfo can be used for dependence queries.
734 bool areDependencesValid() const {
735 return LAI && LAI->getDepChecker().getDependences();
736 }
737
738 /// Returns true if memory accesses \p A and \p B can be reordered, if
739 /// necessary, when constructing interleaved groups.
740 ///
741 /// \p A must precede \p B in program order. We return false if reordering is
742 /// not necessary or is prevented because \p A and \p B may be dependent.
743 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
744 StrideEntry *B) const {
745 // Code motion for interleaved accesses can potentially hoist strided loads
746 // and sink strided stores. The code below checks the legality of the
747 // following two conditions:
748 //
749 // 1. Potentially moving a strided load (B) before any store (A) that
750 // precedes B, or
751 //
752 // 2. Potentially moving a strided store (A) after any load or store (B)
753 // that A precedes.
754 //
755 // It's legal to reorder A and B if we know there isn't a dependence from A
756 // to B. Note that this determination is conservative since some
757 // dependences could potentially be reordered safely.
758
759 // A is potentially the source of a dependence.
760 auto *Src = A->first;
761 auto SrcDes = A->second;
762
763 // B is potentially the sink of a dependence.
764 auto *Sink = B->first;
765 auto SinkDes = B->second;
766
767 // Code motion for interleaved accesses can't violate WAR dependences.
768 // Thus, reordering is legal if the source isn't a write.
769 if (!Src->mayWriteToMemory())
770 return true;
771
772 // At least one of the accesses must be strided.
773 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
774 return true;
775
776 // If dependence information is not available from LoopAccessInfo,
777 // conservatively assume the instructions can't be reordered.
778 if (!areDependencesValid())
779 return false;
780
781 // If we know there is a dependence from source to sink, assume the
782 // instructions can't be reordered. Otherwise, reordering is legal.
783 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
784 }
785
786 /// Collect the dependences from LoopAccessInfo.
787 ///
788 /// We process the dependences once during the interleaved access analysis to
789 /// enable constant-time dependence queries.
790 void collectDependences() {
791 if (!areDependencesValid())
792 return;
793 auto *Deps = LAI->getDepChecker().getDependences();
794 for (auto Dep : *Deps)
795 Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
796 }
797};
798
799} // llvm namespace
800
801#endif
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
RelocType Type
Definition: COFFYAML.cpp:391
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1696
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1554
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:81
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:439
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:537
uint32_t getFactor() const
Definition: VectorUtils.h:455
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:509
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:441
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:516
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:526
bool isReverse() const
Definition: VectorUtils.h:454
InstTy * getInsertPos() const
Definition: VectorUtils.h:525
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:456
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:445
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:464
uint32_t getNumMembers() const
Definition: VectorUtils.h:457
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:581
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:626
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:637
bool hasGroups() const
Returns true if we have any interleave groups.
Definition: VectorUtils.h:645
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:618
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:601
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:631
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:583
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:191
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
The Vector Function Database.
Definition: VectorUtils.h:29
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:96
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:81
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:70
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:103
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Type * ToVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorUtils.h:133
TargetTransformInfo TTI
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:45
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:50
Holds the VFShape for a specific scalar to vector function mapping.
Contains the information about the kind of vectorization available.
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.