Line data Source code
1 : //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines some vectorizer utilities.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #ifndef LLVM_ANALYSIS_VECTORUTILS_H
15 : #define LLVM_ANALYSIS_VECTORUTILS_H
16 :
17 : #include "llvm/ADT/MapVector.h"
18 : #include "llvm/Analysis/LoopAccessAnalysis.h"
19 : #include "llvm/Analysis/TargetLibraryInfo.h"
20 : #include "llvm/IR/IRBuilder.h"
21 :
22 : namespace llvm {
23 :
24 : template <typename T> class ArrayRef;
25 : class DemandedBits;
26 : class GetElementPtrInst;
27 : class Loop;
28 : class ScalarEvolution;
29 : class TargetTransformInfo;
30 : class Type;
31 : class Value;
32 :
33 : namespace Intrinsic {
34 : enum ID : unsigned;
35 : }
36 :
37 : /// Identify if the intrinsic is trivially vectorizable.
38 : /// This method returns true if the intrinsic's argument types are all
39 : /// scalars for the scalar form of the intrinsic and all vectors for
40 : /// the vector form of the intrinsic.
41 : bool isTriviallyVectorizable(Intrinsic::ID ID);
42 :
43 : /// Identifies if the intrinsic has a scalar operand. It checks for
44 : /// ctlz,cttz and powi special intrinsics whose argument is scalar.
45 : bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
46 :
47 : /// Returns intrinsic ID for call.
48 : /// For the input call instruction it finds mapping intrinsic and returns
49 : /// its intrinsic ID, in case it does not found it return not_intrinsic.
50 : Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
51 : const TargetLibraryInfo *TLI);
52 :
53 : /// Find the operand of the GEP that should be checked for consecutive
54 : /// stores. This ignores trailing indices that have no effect on the final
55 : /// pointer.
56 : unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
57 :
58 : /// If the argument is a GEP, then returns the operand identified by
59 : /// getGEPInductionOperand. However, if there is some other non-loop-invariant
60 : /// operand, it returns that instead.
61 : Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
62 :
63 : /// If a value has only one user that is a CastInst, return it.
64 : Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
65 :
66 : /// Get the stride of a pointer access in a loop. Looks for symbolic
67 : /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
68 : Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
69 :
70 : /// Given a vector and an element number, see if the scalar value is
71 : /// already around as a register, for example if it were inserted then extracted
72 : /// from the vector.
73 : Value *findScalarElement(Value *V, unsigned EltNo);
74 :
75 : /// Get splat value if the input is a splat vector or return nullptr.
76 : /// The value may be extracted from a splat constants vector or from
77 : /// a sequence of instructions that broadcast a single value into a vector.
78 : const Value *getSplatValue(const Value *V);
79 :
80 : /// Compute a map of integer instructions to their minimum legal type
81 : /// size.
82 : ///
83 : /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
84 : /// type (e.g. i32) whenever arithmetic is performed on them.
85 : ///
86 : /// For targets with native i8 or i16 operations, usually InstCombine can shrink
87 : /// the arithmetic type down again. However InstCombine refuses to create
88 : /// illegal types, so for targets without i8 or i16 registers, the lengthening
89 : /// and shrinking remains.
90 : ///
91 : /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
92 : /// their scalar equivalents do not, so during vectorization it is important to
93 : /// remove these lengthens and truncates when deciding the profitability of
94 : /// vectorization.
95 : ///
96 : /// This function analyzes the given range of instructions and determines the
97 : /// minimum type size each can be converted to. It attempts to remove or
98 : /// minimize type size changes across each def-use chain, so for example in the
99 : /// following code:
100 : ///
101 : /// %1 = load i8, i8*
102 : /// %2 = add i8 %1, 2
103 : /// %3 = load i16, i16*
104 : /// %4 = zext i8 %2 to i32
105 : /// %5 = zext i16 %3 to i32
106 : /// %6 = add i32 %4, %5
107 : /// %7 = trunc i32 %6 to i16
108 : ///
109 : /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
110 : /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
111 : ///
112 : /// If the optional TargetTransformInfo is provided, this function tries harder
113 : /// to do less work by only looking at illegal types.
114 : MapVector<Instruction*, uint64_t>
115 : computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
116 : DemandedBits &DB,
117 : const TargetTransformInfo *TTI=nullptr);
118 :
119 : /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
120 : /// MD_nontemporal]. For K in Kinds, we get the MDNode for K from each of the
121 : /// elements of VL, compute their "intersection" (i.e., the most generic
122 : /// metadata value that covers all of the individual values), and set I's
123 : /// metadata for M equal to the intersection value.
124 : ///
125 : /// This function always sets a (possibly null) value for each K in Kinds.
126 : Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
127 :
128 : /// Create a mask with replicated elements.
129 : ///
130 : /// This function creates a shuffle mask for replicating each of the \p VF
131 : /// elements in a vector \p ReplicationFactor times. It can be used to
132 : /// transform a mask of \p VF elements into a mask of
133 : /// \p VF * \p ReplicationFactor elements used by a predicated
134 : /// interleaved-group of loads/stores whose Interleaved-factor ==
135 : /// \p ReplicationFactor.
136 : ///
137 : /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
138 : ///
139 : /// <0,0,0,1,1,1,2,2,2,3,3,3>
140 : Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
141 : unsigned VF);
142 :
143 : /// Create an interleave shuffle mask.
144 : ///
145 : /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
146 : /// vectorization factor \p VF into a single wide vector. The mask is of the
147 : /// form:
148 : ///
149 : /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
150 : ///
151 : /// For example, the mask for VF = 4 and NumVecs = 2 is:
152 : ///
153 : /// <0, 4, 1, 5, 2, 6, 3, 7>.
154 : Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
155 : unsigned NumVecs);
156 :
157 : /// Create a stride shuffle mask.
158 : ///
159 : /// This function creates a shuffle mask whose elements begin at \p Start and
160 : /// are incremented by \p Stride. The mask can be used to deinterleave an
161 : /// interleaved vector into separate vectors of vectorization factor \p VF. The
162 : /// mask is of the form:
163 : ///
164 : /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
165 : ///
166 : /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
167 : ///
168 : /// <0, 2, 4, 6>
169 : Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
170 : unsigned Stride, unsigned VF);
171 :
172 : /// Create a sequential shuffle mask.
173 : ///
174 : /// This function creates shuffle mask whose elements are sequential and begin
175 : /// at \p Start. The mask contains \p NumInts integers and is padded with \p
176 : /// NumUndefs undef values. The mask is of the form:
177 : ///
178 : /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
179 : ///
180 : /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
181 : ///
182 : /// <0, 1, 2, 3, undef, undef, undef, undef>
183 : Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
184 : unsigned NumInts, unsigned NumUndefs);
185 :
186 : /// Concatenate a list of vectors.
187 : ///
188 : /// This function generates code that concatenate the vectors in \p Vecs into a
189 : /// single large vector. The number of vectors should be greater than one, and
190 : /// their element types should be the same. The number of elements in the
191 : /// vectors should also be the same; however, if the last vector has fewer
192 : /// elements, it will be padded with undefs.
193 : Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
194 :
195 : /// The group of interleaved loads/stores sharing the same stride and
196 : /// close to each other.
197 : ///
198 : /// Each member in this group has an index starting from 0, and the largest
199 : /// index should be less than interleaved factor, which is equal to the absolute
200 : /// value of the access's stride.
201 : ///
202 : /// E.g. An interleaved load group of factor 4:
203 : /// for (unsigned i = 0; i < 1024; i+=4) {
204 : /// a = A[i]; // Member of index 0
205 : /// b = A[i+1]; // Member of index 1
206 : /// d = A[i+3]; // Member of index 3
207 : /// ...
208 : /// }
209 : ///
210 : /// An interleaved store group of factor 4:
211 : /// for (unsigned i = 0; i < 1024; i+=4) {
212 : /// ...
213 : /// A[i] = a; // Member of index 0
214 : /// A[i+1] = b; // Member of index 1
215 : /// A[i+2] = c; // Member of index 2
216 : /// A[i+3] = d; // Member of index 3
217 : /// }
218 : ///
219 : /// Note: the interleaved load group could have gaps (missing members), but
220 : /// the interleaved store group doesn't allow gaps.
221 99 : class InterleaveGroup {
222 : public:
223 99 : InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
224 99 : : Align(Align), InsertPos(Instr) {
225 : assert(Align && "The alignment should be non-zero");
226 :
227 99 : Factor = std::abs(Stride);
228 : assert(Factor > 1 && "Invalid interleave factor");
229 :
230 99 : Reverse = Stride < 0;
231 99 : Members[0] = Instr;
232 99 : }
233 :
234 0 : bool isReverse() const { return Reverse; }
235 0 : unsigned getFactor() const { return Factor; }
236 0 : unsigned getAlignment() const { return Align; }
237 : unsigned getNumMembers() const { return Members.size(); }
238 :
239 : /// Try to insert a new member \p Instr with index \p Index and
240 : /// alignment \p NewAlign. The index is related to the leader and it could be
241 : /// negative if it is the new leader.
242 : ///
243 : /// \returns false if the instruction doesn't belong to the group.
244 68 : bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
245 : assert(NewAlign && "The new member's alignment should be non-zero");
246 :
247 68 : int Key = Index + SmallestKey;
248 :
249 : // Skip if there is already a member with the same index.
250 68 : if (Members.find(Key) != Members.end())
251 : return false;
252 :
253 62 : if (Key > LargestKey) {
254 : // The largest index is always less than the interleave factor.
255 2 : if (Index >= static_cast<int>(Factor))
256 : return false;
257 :
258 2 : LargestKey = Key;
259 60 : } else if (Key < SmallestKey) {
260 : // The largest index is always less than the interleave factor.
261 60 : if (LargestKey - Key >= static_cast<int>(Factor))
262 : return false;
263 :
264 59 : SmallestKey = Key;
265 : }
266 :
267 : // It's always safe to select the minimum alignment.
268 61 : Align = std::min(Align, NewAlign);
269 61 : Members[Key] = Instr;
270 61 : return true;
271 : }
272 :
273 : /// Get the member with the given index \p Index
274 : ///
275 : /// \returns nullptr if contains no such member.
276 729 : Instruction *getMember(unsigned Index) const {
277 729 : int Key = SmallestKey + Index;
278 729 : auto Member = Members.find(Key);
279 729 : if (Member == Members.end())
280 : return nullptr;
281 :
282 475 : return Member->second;
283 : }
284 :
285 : /// Get the index for the given member. Unlike the key in the member
286 : /// map, the index starts from 0.
287 130 : unsigned getIndex(Instruction *Instr) const {
288 160 : for (auto I : Members)
289 160 : if (I.second == Instr)
290 130 : return I.first - SmallestKey;
291 :
292 0 : llvm_unreachable("InterleaveGroup contains no such member");
293 : }
294 :
295 0 : Instruction *getInsertPos() const { return InsertPos; }
296 36 : void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
297 :
298 : /// Add metadata (e.g. alias info) from the instructions in this group to \p
299 : /// NewInst.
300 : ///
301 : /// FIXME: this function currently does not add noalias metadata a'la
302 : /// addNewMedata. To do that we need to compute the intersection of the
303 : /// noalias info from all members.
304 93 : void addMetadata(Instruction *NewInst) const {
305 : SmallVector<Value *, 4> VL;
306 93 : std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
307 93 : [](std::pair<int, Instruction *> p) { return p.second; });
308 93 : propagateMetadata(NewInst, VL);
309 93 : }
310 :
311 : private:
312 : unsigned Factor; // Interleave Factor.
313 : bool Reverse;
314 : unsigned Align;
315 : DenseMap<int, Instruction *> Members;
316 : int SmallestKey = 0;
317 : int LargestKey = 0;
318 :
319 : // To avoid breaking dependences, vectorized instructions of an interleave
320 : // group should be inserted at either the first load or the last store in
321 : // program order.
322 : //
323 : // E.g. %even = load i32 // Insert Position
324 : // %add = add i32 %even // Use of %even
325 : // %odd = load i32
326 : //
327 : // store i32 %even
328 : // %odd = add i32 // Def of %odd
329 : // store i32 %odd // Insert Position
330 : Instruction *InsertPos;
331 : };
332 :
333 : /// Drive the analysis of interleaved memory accesses in the loop.
334 : ///
335 : /// Use this class to analyze interleaved accesses only when we can vectorize
336 : /// a loop. Otherwise it's meaningless to do analysis as the vectorization
337 : /// on interleaved accesses is unsafe.
338 : ///
339 : /// The analysis collects interleave groups and records the relationships
340 : /// between the member and the group in a map.
341 : class InterleavedAccessInfo {
342 : public:
343 : InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
344 : DominatorTree *DT, LoopInfo *LI,
345 : const LoopAccessInfo *LAI)
346 989 : : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
347 :
348 989 : ~InterleavedAccessInfo() { reset(); }
349 :
350 : /// Analyze the interleaved accesses and collect them in interleave
351 : /// groups. Substitute symbolic strides using \p Strides.
352 : /// Consider also predicated loads/stores in the analysis if
353 : /// \p EnableMaskedInterleavedGroup is true.
354 : void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
355 :
356 : /// Invalidate groups, e.g., in case all blocks in loop will be predicated
357 : /// contrary to original assumption. Although we currently prevent group
358 : /// formation for predicated accesses, we may be able to relax this limitation
359 : /// in the future once we handle more complicated blocks.
360 995 : void reset() {
361 : SmallPtrSet<InterleaveGroup *, 4> DelSet;
362 : // Avoid releasing a pointer twice.
363 1121 : for (auto &I : InterleaveGroupMap)
364 126 : DelSet.insert(I.second);
365 1063 : for (auto *Ptr : DelSet)
366 136 : delete Ptr;
367 995 : InterleaveGroupMap.clear();
368 995 : RequiresScalarEpilogue = false;
369 995 : }
370 :
371 :
372 : /// Check if \p Instr belongs to any interleave group.
373 : bool isInterleaved(Instruction *Instr) const {
374 801 : return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
375 : }
376 :
377 : /// Get the interleave group that \p Instr belongs to.
378 : ///
379 : /// \returns nullptr if doesn't have such group.
380 : InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
381 22236 : auto Group = InterleaveGroupMap.find(Instr);
382 22236 : if (Group == InterleaveGroupMap.end())
383 : return nullptr;
384 797 : return Group->second;
385 : }
386 :
387 : /// Returns true if an interleaved group that may access memory
388 : /// out-of-bounds requires a scalar epilogue iteration for correctness.
389 0 : bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
390 :
391 : private:
392 : /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
393 : /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
394 : /// The interleaved access analysis can also add new predicates (for example
395 : /// by versioning strides of pointers).
396 : PredicatedScalarEvolution &PSE;
397 :
398 : Loop *TheLoop;
399 : DominatorTree *DT;
400 : LoopInfo *LI;
401 : const LoopAccessInfo *LAI;
402 :
403 : /// True if the loop may contain non-reversed interleaved groups with
404 : /// out-of-bounds accesses. We ensure we don't speculatively access memory
405 : /// out-of-bounds by executing at least one scalar epilogue iteration.
406 : bool RequiresScalarEpilogue = false;
407 :
408 : /// Holds the relationships between the members and the interleave group.
409 : DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
410 :
411 : /// Holds dependences among the memory accesses in the loop. It maps a source
412 : /// access to a set of dependent sink accesses.
413 : DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
414 :
415 : /// The descriptor for a strided memory access.
416 : struct StrideDescriptor {
417 : StrideDescriptor() = default;
418 : StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
419 : unsigned Align)
420 : : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
421 :
422 : // The access's stride. It is negative for a reverse access.
423 : int64_t Stride = 0;
424 :
425 : // The scalar expression of this access.
426 : const SCEV *Scev = nullptr;
427 :
428 : // The size of the memory object.
429 : uint64_t Size = 0;
430 :
431 : // The alignment of this access.
432 : unsigned Align = 0;
433 : };
434 :
435 : /// A type for holding instructions and their stride descriptors.
436 : using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
437 :
438 : /// Create a new interleave group with the given instruction \p Instr,
439 : /// stride \p Stride and alignment \p Align.
440 : ///
441 : /// \returns the newly created interleave group.
442 99 : InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
443 : unsigned Align) {
444 : assert(!isInterleaved(Instr) && "Already in an interleaved access group");
445 99 : InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
446 99 : return InterleaveGroupMap[Instr];
447 : }
448 :
449 : /// Release the group and remove all the relationships.
450 31 : void releaseGroup(InterleaveGroup *Group) {
451 107 : for (unsigned i = 0; i < Group->getFactor(); i++)
452 76 : if (Instruction *Member = Group->getMember(i))
453 34 : InterleaveGroupMap.erase(Member);
454 :
455 31 : delete Group;
456 31 : }
457 :
458 : /// Collect all the accesses with a constant stride in program order.
459 : void collectConstStrideAccesses(
460 : MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
461 : const ValueToValueMap &Strides);
462 :
463 : /// Returns true if \p Stride is allowed in an interleaved group.
464 : static bool isStrided(int Stride);
465 :
466 : /// Returns true if \p BB is a predicated block.
467 0 : bool isPredicated(BasicBlock *BB) const {
468 337 : return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
469 : }
470 :
471 : /// Returns true if LoopAccessInfo can be used for dependence queries.
472 0 : bool areDependencesValid() const {
473 0 : return LAI && LAI->getDepChecker().getDependences();
474 : }
475 :
476 : /// Returns true if memory accesses \p A and \p B can be reordered, if
477 : /// necessary, when constructing interleaved groups.
478 : ///
479 : /// \p A must precede \p B in program order. We return false if reordering is
480 : /// not necessary or is prevented because \p A and \p B may be dependent.
481 1176 : bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
482 : StrideEntry *B) const {
483 : // Code motion for interleaved accesses can potentially hoist strided loads
484 : // and sink strided stores. The code below checks the legality of the
485 : // following two conditions:
486 : //
487 : // 1. Potentially moving a strided load (B) before any store (A) that
488 : // precedes B, or
489 : //
490 : // 2. Potentially moving a strided store (A) after any load or store (B)
491 : // that A precedes.
492 : //
493 : // It's legal to reorder A and B if we know there isn't a dependence from A
494 : // to B. Note that this determination is conservative since some
495 : // dependences could potentially be reordered safely.
496 :
497 : // A is potentially the source of a dependence.
498 1176 : auto *Src = A->first;
499 1176 : auto SrcDes = A->second;
500 :
501 : // B is potentially the sink of a dependence.
502 1176 : auto *Sink = B->first;
503 1176 : auto SinkDes = B->second;
504 :
505 : // Code motion for interleaved accesses can't violate WAR dependences.
506 : // Thus, reordering is legal if the source isn't a write.
507 1176 : if (!Src->mayWriteToMemory())
508 : return true;
509 :
510 : // At least one of the accesses must be strided.
511 140 : if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
512 : return true;
513 :
514 : // If dependence information is not available from LoopAccessInfo,
515 : // conservatively assume the instructions can't be reordered.
516 74 : if (!areDependencesValid())
517 : return false;
518 :
519 : // If we know there is a dependence from source to sink, assume the
520 : // instructions can't be reordered. Otherwise, reordering is legal.
521 86 : return Dependences.find(Src) == Dependences.end() ||
522 91 : !Dependences.lookup(Src).count(Sink);
523 : }
524 :
525 : /// Collect the dependences from LoopAccessInfo.
526 : ///
527 : /// We process the dependences once during the interleaved access analysis to
528 : /// enable constant-time dependence queries.
529 457 : void collectDependences() {
530 457 : if (!areDependencesValid())
531 : return;
532 : auto *Deps = LAI->getDepChecker().getDependences();
533 483 : for (auto Dep : *Deps)
534 52 : Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
535 : }
536 : };
537 :
538 : } // llvm namespace
539 :
540 : #endif
|