LLVM  9.0.0svn
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
19 #include "llvm/IR/IRBuilder.h"
20 
21 namespace llvm {
22 
23 template <typename T> class ArrayRef;
24 class DemandedBits;
25 class GetElementPtrInst;
26 template <typename InstTy> class InterleaveGroup;
27 class Loop;
28 class ScalarEvolution;
30 class Type;
31 class Value;
32 
33 namespace Intrinsic {
34 enum ID : unsigned;
35 }
36 
37 /// Identify if the intrinsic is trivially vectorizable.
38 /// This method returns true if the intrinsic's argument types are all
39 /// scalars for the scalar form of the intrinsic and all vectors for
40 /// the vector form of the intrinsic.
42 
43 /// Identifies if the intrinsic has a scalar operand. It checks for
44 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
45 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
46 
47 /// Returns intrinsic ID for call.
48 /// For the input call instruction it finds mapping intrinsic and returns
49 /// its intrinsic ID, in case it does not found it return not_intrinsic.
51  const TargetLibraryInfo *TLI);
52 
53 /// Find the operand of the GEP that should be checked for consecutive
54 /// stores. This ignores trailing indices that have no effect on the final
55 /// pointer.
56 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
57 
58 /// If the argument is a GEP, then returns the operand identified by
59 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
60 /// operand, it returns that instead.
62 
63 /// If a value has only one user that is a CastInst, return it.
64 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
65 
66 /// Get the stride of a pointer access in a loop. Looks for symbolic
67 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
69 
70 /// Given a vector and an element number, see if the scalar value is
71 /// already around as a register, for example if it were inserted then extracted
72 /// from the vector.
73 Value *findScalarElement(Value *V, unsigned EltNo);
74 
75 /// Get splat value if the input is a splat vector or return nullptr.
76 /// The value may be extracted from a splat constants vector or from
77 /// a sequence of instructions that broadcast a single value into a vector.
78 const Value *getSplatValue(const Value *V);
79 
80 /// Compute a map of integer instructions to their minimum legal type
81 /// size.
82 ///
83 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
84 /// type (e.g. i32) whenever arithmetic is performed on them.
85 ///
86 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
87 /// the arithmetic type down again. However InstCombine refuses to create
88 /// illegal types, so for targets without i8 or i16 registers, the lengthening
89 /// and shrinking remains.
90 ///
91 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
92 /// their scalar equivalents do not, so during vectorization it is important to
93 /// remove these lengthens and truncates when deciding the profitability of
94 /// vectorization.
95 ///
96 /// This function analyzes the given range of instructions and determines the
97 /// minimum type size each can be converted to. It attempts to remove or
98 /// minimize type size changes across each def-use chain, so for example in the
99 /// following code:
100 ///
101 /// %1 = load i8, i8*
102 /// %2 = add i8 %1, 2
103 /// %3 = load i16, i16*
104 /// %4 = zext i8 %2 to i32
105 /// %5 = zext i16 %3 to i32
106 /// %6 = add i32 %4, %5
107 /// %7 = trunc i32 %6 to i16
108 ///
109 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
110 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
111 ///
112 /// If the optional TargetTransformInfo is provided, this function tries harder
113 /// to do less work by only looking at illegal types.
116  DemandedBits &DB,
117  const TargetTransformInfo *TTI=nullptr);
118 
119 /// Compute the union of two access-group lists.
120 ///
121 /// If the list contains just one access group, it is returned directly. If the
122 /// list is empty, returns nullptr.
123 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
124 
125 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
126 /// are both in. If either instruction does not access memory at all, it is
127 /// considered to be in every list.
128 ///
129 /// If the list contains just one access group, it is returned directly. If the
130 /// list is empty, returns nullptr.
132  const Instruction *Inst2);
133 
134 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
135 /// MD_nontemporal, MD_access_group].
136 /// For K in Kinds, we get the MDNode for K from each of the
137 /// elements of VL, compute their "intersection" (i.e., the most generic
138 /// metadata value that covers all of the individual values), and set I's
139 /// metadata for M equal to the intersection value.
140 ///
141 /// This function always sets a (possibly null) value for each K in Kinds.
143 
144 /// Create a mask that filters the members of an interleave group where there
145 /// are gaps.
146 ///
147 /// For example, the mask for \p Group with interleave-factor 3
148 /// and \p VF 4, that has only its first member present is:
149 ///
150 /// <1,0,0,1,0,0,1,0,0,1,0,0>
151 ///
152 /// Note: The result is a mask of 0's and 1's, as opposed to the other
153 /// create[*]Mask() utilities which create a shuffle mask (mask that
154 /// consists of indices).
155 Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
156  const InterleaveGroup<Instruction> &Group);
157 
158 /// Create a mask with replicated elements.
159 ///
160 /// This function creates a shuffle mask for replicating each of the \p VF
161 /// elements in a vector \p ReplicationFactor times. It can be used to
162 /// transform a mask of \p VF elements into a mask of
163 /// \p VF * \p ReplicationFactor elements used by a predicated
164 /// interleaved-group of loads/stores whose Interleaved-factor ==
165 /// \p ReplicationFactor.
166 ///
167 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
168 ///
169 /// <0,0,0,1,1,1,2,2,2,3,3,3>
170 Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
171  unsigned VF);
172 
173 /// Create an interleave shuffle mask.
174 ///
175 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
176 /// vectorization factor \p VF into a single wide vector. The mask is of the
177 /// form:
178 ///
179 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
180 ///
181 /// For example, the mask for VF = 4 and NumVecs = 2 is:
182 ///
183 /// <0, 4, 1, 5, 2, 6, 3, 7>.
184 Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
185  unsigned NumVecs);
186 
187 /// Create a stride shuffle mask.
188 ///
189 /// This function creates a shuffle mask whose elements begin at \p Start and
190 /// are incremented by \p Stride. The mask can be used to deinterleave an
191 /// interleaved vector into separate vectors of vectorization factor \p VF. The
192 /// mask is of the form:
193 ///
194 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
195 ///
196 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
197 ///
198 /// <0, 2, 4, 6>
199 Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
200  unsigned Stride, unsigned VF);
201 
202 /// Create a sequential shuffle mask.
203 ///
204 /// This function creates shuffle mask whose elements are sequential and begin
205 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
206 /// NumUndefs undef values. The mask is of the form:
207 ///
208 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
209 ///
210 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
211 ///
212 /// <0, 1, 2, 3, undef, undef, undef, undef>
213 Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
214  unsigned NumInts, unsigned NumUndefs);
215 
216 /// Concatenate a list of vectors.
217 ///
218 /// This function generates code that concatenate the vectors in \p Vecs into a
219 /// single large vector. The number of vectors should be greater than one, and
220 /// their element types should be the same. The number of elements in the
221 /// vectors should also be the same; however, if the last vector has fewer
222 /// elements, it will be padded with undefs.
224 
225 /// The group of interleaved loads/stores sharing the same stride and
226 /// close to each other.
227 ///
228 /// Each member in this group has an index starting from 0, and the largest
229 /// index should be less than interleaved factor, which is equal to the absolute
230 /// value of the access's stride.
231 ///
232 /// E.g. An interleaved load group of factor 4:
233 /// for (unsigned i = 0; i < 1024; i+=4) {
234 /// a = A[i]; // Member of index 0
235 /// b = A[i+1]; // Member of index 1
236 /// d = A[i+3]; // Member of index 3
237 /// ...
238 /// }
239 ///
240 /// An interleaved store group of factor 4:
241 /// for (unsigned i = 0; i < 1024; i+=4) {
242 /// ...
243 /// A[i] = a; // Member of index 0
244 /// A[i+1] = b; // Member of index 1
245 /// A[i+2] = c; // Member of index 2
246 /// A[i+3] = d; // Member of index 3
247 /// }
248 ///
249 /// Note: the interleaved load group could have gaps (missing members), but
250 /// the interleaved store group doesn't allow gaps.
251 template <typename InstTy> class InterleaveGroup {
252 public:
253  InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
254  : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
255 
256  InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
257  : Align(Align), InsertPos(Instr) {
258  assert(Align && "The alignment should be non-zero");
259 
260  Factor = std::abs(Stride);
261  assert(Factor > 1 && "Invalid interleave factor");
262 
263  Reverse = Stride < 0;
264  Members[0] = Instr;
265  }
266 
267  bool isReverse() const { return Reverse; }
268  unsigned getFactor() const { return Factor; }
269  unsigned getAlignment() const { return Align; }
270  unsigned getNumMembers() const { return Members.size(); }
271 
272  /// Try to insert a new member \p Instr with index \p Index and
273  /// alignment \p NewAlign. The index is related to the leader and it could be
274  /// negative if it is the new leader.
275  ///
276  /// \returns false if the instruction doesn't belong to the group.
277  bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) {
278  assert(NewAlign && "The new member's alignment should be non-zero");
279 
280  int Key = Index + SmallestKey;
281 
282  // Skip if there is already a member with the same index.
283  if (Members.find(Key) != Members.end())
284  return false;
285 
286  if (Key > LargestKey) {
287  // The largest index is always less than the interleave factor.
288  if (Index >= static_cast<int>(Factor))
289  return false;
290 
291  LargestKey = Key;
292  } else if (Key < SmallestKey) {
293  // The largest index is always less than the interleave factor.
294  if (LargestKey - Key >= static_cast<int>(Factor))
295  return false;
296 
297  SmallestKey = Key;
298  }
299 
300  // It's always safe to select the minimum alignment.
301  Align = std::min(Align, NewAlign);
302  Members[Key] = Instr;
303  return true;
304  }
305 
306  /// Get the member with the given index \p Index
307  ///
308  /// \returns nullptr if contains no such member.
309  InstTy *getMember(unsigned Index) const {
310  int Key = SmallestKey + Index;
311  auto Member = Members.find(Key);
312  if (Member == Members.end())
313  return nullptr;
314 
315  return Member->second;
316  }
317 
318  /// Get the index for the given member. Unlike the key in the member
319  /// map, the index starts from 0.
320  unsigned getIndex(const InstTy *Instr) const {
321  for (auto I : Members) {
322  if (I.second == Instr)
323  return I.first - SmallestKey;
324  }
325 
326  llvm_unreachable("InterleaveGroup contains no such member");
327  }
328 
329  InstTy *getInsertPos() const { return InsertPos; }
330  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
331 
332  /// Add metadata (e.g. alias info) from the instructions in this group to \p
333  /// NewInst.
334  ///
335  /// FIXME: this function currently does not add noalias metadata a'la
336  /// addNewMedata. To do that we need to compute the intersection of the
337  /// noalias info from all members.
338  void addMetadata(InstTy *NewInst) const;
339 
340  /// Returns true if this Group requires a scalar iteration to handle gaps.
341  bool requiresScalarEpilogue() const {
342  // If the last member of the Group exists, then a scalar epilog is not
343  // needed for this group.
344  if (getMember(getFactor() - 1))
345  return false;
346 
347  // We have a group with gaps. It therefore cannot be a group of stores,
348  // and it can't be a reversed access, because such groups get invalidated.
349  assert(!getMember(0)->mayWriteToMemory() &&
350  "Group should have been invalidated");
351  assert(!isReverse() && "Group should have been invalidated");
352 
353  // This is a group of loads, with gaps, and without a last-member
354  return true;
355  }
356 
357 private:
358  unsigned Factor; // Interleave Factor.
359  bool Reverse;
360  unsigned Align;
361  DenseMap<int, InstTy *> Members;
362  int SmallestKey = 0;
363  int LargestKey = 0;
364 
365  // To avoid breaking dependences, vectorized instructions of an interleave
366  // group should be inserted at either the first load or the last store in
367  // program order.
368  //
369  // E.g. %even = load i32 // Insert Position
370  // %add = add i32 %even // Use of %even
371  // %odd = load i32
372  //
373  // store i32 %even
374  // %odd = add i32 // Def of %odd
375  // store i32 %odd // Insert Position
376  InstTy *InsertPos;
377 };
378 
379 /// Drive the analysis of interleaved memory accesses in the loop.
380 ///
381 /// Use this class to analyze interleaved accesses only when we can vectorize
382 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
383 /// on interleaved accesses is unsafe.
384 ///
385 /// The analysis collects interleave groups and records the relationships
386 /// between the member and the group in a map.
388 public:
390  DominatorTree *DT, LoopInfo *LI,
391  const LoopAccessInfo *LAI)
392  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
393 
394  ~InterleavedAccessInfo() { reset(); }
395 
396  /// Analyze the interleaved accesses and collect them in interleave
397  /// groups. Substitute symbolic strides using \p Strides.
398  /// Consider also predicated loads/stores in the analysis if
399  /// \p EnableMaskedInterleavedGroup is true.
400  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
401 
402  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
403  /// contrary to original assumption. Although we currently prevent group
404  /// formation for predicated accesses, we may be able to relax this limitation
405  /// in the future once we handle more complicated blocks.
406  void reset() {
408  // Avoid releasing a pointer twice.
409  for (auto &I : InterleaveGroupMap)
410  DelSet.insert(I.second);
411  for (auto *Ptr : DelSet)
412  delete Ptr;
413  InterleaveGroupMap.clear();
414  RequiresScalarEpilogue = false;
415  }
416 
417 
418  /// Check if \p Instr belongs to any interleave group.
419  bool isInterleaved(Instruction *Instr) const {
420  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
421  }
422 
423  /// Get the interleave group that \p Instr belongs to.
424  ///
425  /// \returns nullptr if doesn't have such group.
427  getInterleaveGroup(const Instruction *Instr) const {
428  if (InterleaveGroupMap.count(Instr))
429  return InterleaveGroupMap.find(Instr)->second;
430  return nullptr;
431  }
432 
435  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
436  }
437 
438  /// Returns true if an interleaved group that may access memory
439  /// out-of-bounds requires a scalar epilogue iteration for correctness.
440  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
441 
442  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
443  /// happen when optimizing for size forbids a scalar epilogue, and the gap
444  /// cannot be filtered by masking the load/store.
445  void invalidateGroupsRequiringScalarEpilogue();
446 
447 private:
448  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
449  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
450  /// The interleaved access analysis can also add new predicates (for example
451  /// by versioning strides of pointers).
453 
454  Loop *TheLoop;
455  DominatorTree *DT;
456  LoopInfo *LI;
457  const LoopAccessInfo *LAI;
458 
459  /// True if the loop may contain non-reversed interleaved groups with
460  /// out-of-bounds accesses. We ensure we don't speculatively access memory
461  /// out-of-bounds by executing at least one scalar epilogue iteration.
462  bool RequiresScalarEpilogue = false;
463 
464  /// Holds the relationships between the members and the interleave group.
466 
467  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
468 
469  /// Holds dependences among the memory accesses in the loop. It maps a source
470  /// access to a set of dependent sink accesses.
472 
473  /// The descriptor for a strided memory access.
474  struct StrideDescriptor {
475  StrideDescriptor() = default;
476  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
477  unsigned Align)
478  : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
479 
480  // The access's stride. It is negative for a reverse access.
481  int64_t Stride = 0;
482 
483  // The scalar expression of this access.
484  const SCEV *Scev = nullptr;
485 
486  // The size of the memory object.
487  uint64_t Size = 0;
488 
489  // The alignment of this access.
490  unsigned Align = 0;
491  };
492 
493  /// A type for holding instructions and their stride descriptors.
494  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
495 
496  /// Create a new interleave group with the given instruction \p Instr,
497  /// stride \p Stride and alignment \p Align.
498  ///
499  /// \returns the newly created interleave group.
501  createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
502  assert(!InterleaveGroupMap.count(Instr) &&
503  "Already in an interleaved access group");
504  InterleaveGroupMap[Instr] =
505  new InterleaveGroup<Instruction>(Instr, Stride, Align);
506  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
507  return InterleaveGroupMap[Instr];
508  }
509 
510  /// Release the group and remove all the relationships.
511  void releaseGroup(InterleaveGroup<Instruction> *Group) {
512  for (unsigned i = 0; i < Group->getFactor(); i++)
513  if (Instruction *Member = Group->getMember(i))
514  InterleaveGroupMap.erase(Member);
515 
516  InterleaveGroups.erase(Group);
517  delete Group;
518  }
519 
520  /// Collect all the accesses with a constant stride in program order.
521  void collectConstStrideAccesses(
523  const ValueToValueMap &Strides);
524 
525  /// Returns true if \p Stride is allowed in an interleaved group.
526  static bool isStrided(int Stride);
527 
528  /// Returns true if \p BB is a predicated block.
529  bool isPredicated(BasicBlock *BB) const {
530  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
531  }
532 
533  /// Returns true if LoopAccessInfo can be used for dependence queries.
534  bool areDependencesValid() const {
535  return LAI && LAI->getDepChecker().getDependences();
536  }
537 
538  /// Returns true if memory accesses \p A and \p B can be reordered, if
539  /// necessary, when constructing interleaved groups.
540  ///
541  /// \p A must precede \p B in program order. We return false if reordering is
542  /// not necessary or is prevented because \p A and \p B may be dependent.
543  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
544  StrideEntry *B) const {
545  // Code motion for interleaved accesses can potentially hoist strided loads
546  // and sink strided stores. The code below checks the legality of the
547  // following two conditions:
548  //
549  // 1. Potentially moving a strided load (B) before any store (A) that
550  // precedes B, or
551  //
552  // 2. Potentially moving a strided store (A) after any load or store (B)
553  // that A precedes.
554  //
555  // It's legal to reorder A and B if we know there isn't a dependence from A
556  // to B. Note that this determination is conservative since some
557  // dependences could potentially be reordered safely.
558 
559  // A is potentially the source of a dependence.
560  auto *Src = A->first;
561  auto SrcDes = A->second;
562 
563  // B is potentially the sink of a dependence.
564  auto *Sink = B->first;
565  auto SinkDes = B->second;
566 
567  // Code motion for interleaved accesses can't violate WAR dependences.
568  // Thus, reordering is legal if the source isn't a write.
569  if (!Src->mayWriteToMemory())
570  return true;
571 
572  // At least one of the accesses must be strided.
573  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
574  return true;
575 
576  // If dependence information is not available from LoopAccessInfo,
577  // conservatively assume the instructions can't be reordered.
578  if (!areDependencesValid())
579  return false;
580 
581  // If we know there is a dependence from source to sink, assume the
582  // instructions can't be reordered. Otherwise, reordering is legal.
583  return Dependences.find(Src) == Dependences.end() ||
584  !Dependences.lookup(Src).count(Sink);
585  }
586 
587  /// Collect the dependences from LoopAccessInfo.
588  ///
589  /// We process the dependences once during the interleaved access analysis to
590  /// enable constant-time dependence queries.
591  void collectDependences() {
592  if (!areDependencesValid())
593  return;
594  auto *Deps = LAI->getDepChecker().getDependences();
595  for (auto Dep : *Deps)
596  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
597  }
598 };
599 
600 } // llvm namespace
601 
602 #endif
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:434
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:330
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:419
This class represents a function call, abstracting a target machine&#39;s calling convention.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
Metadata node.
Definition: Metadata.h:863
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void reset()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:406
bool isReverse() const
Definition: VectorUtils.h:267
InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
Definition: VectorUtils.h:256
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
Key
PAL metadata keys.
InstTy * getMember(unsigned Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:309
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:387
unsigned getNumMembers() const
Definition: VectorUtils.h:270
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:26
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
Definition: VectorUtils.h:253
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:853
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:427
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
unsigned getAlignment() const
Definition: VectorUtils.h:269
unsigned getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:320
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:440
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:389
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:88
unsigned getFactor() const
Definition: VectorUtils.h:268
A range adaptor for a pair of iterators.
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
uint32_t Size
Definition: Profile.cpp:46
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool insertMember(InstTy *Instr, int Index, unsigned NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:277
InstTy * getInsertPos() const
Definition: VectorUtils.h:329
LLVM Value Representation.
Definition: Value.h:72
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:341
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42