LLVM  9.0.0svn
VectorUtils.h
Go to the documentation of this file.
1 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
14 #define LLVM_ANALYSIS_VECTORUTILS_H
15 
16 #include "llvm/ADT/MapVector.h"
19 #include "llvm/IR/IRBuilder.h"
21 
22 namespace llvm {
23 
24 template <typename T> class ArrayRef;
25 class DemandedBits;
26 class GetElementPtrInst;
27 template <typename InstTy> class InterleaveGroup;
28 class Loop;
29 class ScalarEvolution;
31 class Type;
32 class Value;
33 
34 namespace Intrinsic {
35 enum ID : unsigned;
36 }
37 
38 /// Identify if the intrinsic is trivially vectorizable.
39 /// This method returns true if the intrinsic's argument types are all
40 /// scalars for the scalar form of the intrinsic and all vectors for
41 /// the vector form of the intrinsic.
43 
44 /// Identifies if the intrinsic has a scalar operand. It checks for
45 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
46 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
47 
48 /// Returns intrinsic ID for call.
49 /// For the input call instruction it finds mapping intrinsic and returns
50 /// its intrinsic ID, in case it does not found it return not_intrinsic.
52  const TargetLibraryInfo *TLI);
53 
54 /// Find the operand of the GEP that should be checked for consecutive
55 /// stores. This ignores trailing indices that have no effect on the final
56 /// pointer.
57 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
58 
59 /// If the argument is a GEP, then returns the operand identified by
60 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
61 /// operand, it returns that instead.
63 
64 /// If a value has only one user that is a CastInst, return it.
65 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
66 
67 /// Get the stride of a pointer access in a loop. Looks for symbolic
68 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
70 
71 /// Given a vector and an element number, see if the scalar value is
72 /// already around as a register, for example if it were inserted then extracted
73 /// from the vector.
74 Value *findScalarElement(Value *V, unsigned EltNo);
75 
76 /// Get splat value if the input is a splat vector or return nullptr.
77 /// The value may be extracted from a splat constants vector or from
78 /// a sequence of instructions that broadcast a single value into a vector.
79 const Value *getSplatValue(const Value *V);
80 
81 /// Compute a map of integer instructions to their minimum legal type
82 /// size.
83 ///
84 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
85 /// type (e.g. i32) whenever arithmetic is performed on them.
86 ///
87 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
88 /// the arithmetic type down again. However InstCombine refuses to create
89 /// illegal types, so for targets without i8 or i16 registers, the lengthening
90 /// and shrinking remains.
91 ///
92 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
93 /// their scalar equivalents do not, so during vectorization it is important to
94 /// remove these lengthens and truncates when deciding the profitability of
95 /// vectorization.
96 ///
97 /// This function analyzes the given range of instructions and determines the
98 /// minimum type size each can be converted to. It attempts to remove or
99 /// minimize type size changes across each def-use chain, so for example in the
100 /// following code:
101 ///
102 /// %1 = load i8, i8*
103 /// %2 = add i8 %1, 2
104 /// %3 = load i16, i16*
105 /// %4 = zext i8 %2 to i32
106 /// %5 = zext i16 %3 to i32
107 /// %6 = add i32 %4, %5
108 /// %7 = trunc i32 %6 to i16
109 ///
110 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
111 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
112 ///
113 /// If the optional TargetTransformInfo is provided, this function tries harder
114 /// to do less work by only looking at illegal types.
117  DemandedBits &DB,
118  const TargetTransformInfo *TTI=nullptr);
119 
120 /// Compute the union of two access-group lists.
121 ///
122 /// If the list contains just one access group, it is returned directly. If the
123 /// list is empty, returns nullptr.
124 MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
125 
126 /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
127 /// are both in. If either instruction does not access memory at all, it is
128 /// considered to be in every list.
129 ///
130 /// If the list contains just one access group, it is returned directly. If the
131 /// list is empty, returns nullptr.
133  const Instruction *Inst2);
134 
135 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
136 /// MD_nontemporal, MD_access_group].
137 /// For K in Kinds, we get the MDNode for K from each of the
138 /// elements of VL, compute their "intersection" (i.e., the most generic
139 /// metadata value that covers all of the individual values), and set I's
140 /// metadata for M equal to the intersection value.
141 ///
142 /// This function always sets a (possibly null) value for each K in Kinds.
144 
145 /// Create a mask that filters the members of an interleave group where there
146 /// are gaps.
147 ///
148 /// For example, the mask for \p Group with interleave-factor 3
149 /// and \p VF 4, that has only its first member present is:
150 ///
151 /// <1,0,0,1,0,0,1,0,0,1,0,0>
152 ///
153 /// Note: The result is a mask of 0's and 1's, as opposed to the other
154 /// create[*]Mask() utilities which create a shuffle mask (mask that
155 /// consists of indices).
156 Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
157  const InterleaveGroup<Instruction> &Group);
158 
159 /// Create a mask with replicated elements.
160 ///
161 /// This function creates a shuffle mask for replicating each of the \p VF
162 /// elements in a vector \p ReplicationFactor times. It can be used to
163 /// transform a mask of \p VF elements into a mask of
164 /// \p VF * \p ReplicationFactor elements used by a predicated
165 /// interleaved-group of loads/stores whose Interleaved-factor ==
166 /// \p ReplicationFactor.
167 ///
168 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
169 ///
170 /// <0,0,0,1,1,1,2,2,2,3,3,3>
171 Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
172  unsigned VF);
173 
174 /// Create an interleave shuffle mask.
175 ///
176 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
177 /// vectorization factor \p VF into a single wide vector. The mask is of the
178 /// form:
179 ///
180 /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
181 ///
182 /// For example, the mask for VF = 4 and NumVecs = 2 is:
183 ///
184 /// <0, 4, 1, 5, 2, 6, 3, 7>.
185 Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
186  unsigned NumVecs);
187 
188 /// Create a stride shuffle mask.
189 ///
190 /// This function creates a shuffle mask whose elements begin at \p Start and
191 /// are incremented by \p Stride. The mask can be used to deinterleave an
192 /// interleaved vector into separate vectors of vectorization factor \p VF. The
193 /// mask is of the form:
194 ///
195 /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
196 ///
197 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
198 ///
199 /// <0, 2, 4, 6>
200 Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
201  unsigned Stride, unsigned VF);
202 
203 /// Create a sequential shuffle mask.
204 ///
205 /// This function creates shuffle mask whose elements are sequential and begin
206 /// at \p Start. The mask contains \p NumInts integers and is padded with \p
207 /// NumUndefs undef values. The mask is of the form:
208 ///
209 /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
210 ///
211 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
212 ///
213 /// <0, 1, 2, 3, undef, undef, undef, undef>
214 Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
215  unsigned NumInts, unsigned NumUndefs);
216 
217 /// Concatenate a list of vectors.
218 ///
219 /// This function generates code that concatenate the vectors in \p Vecs into a
220 /// single large vector. The number of vectors should be greater than one, and
221 /// their element types should be the same. The number of elements in the
222 /// vectors should also be the same; however, if the last vector has fewer
223 /// elements, it will be padded with undefs.
225 
226 /// The group of interleaved loads/stores sharing the same stride and
227 /// close to each other.
228 ///
229 /// Each member in this group has an index starting from 0, and the largest
230 /// index should be less than interleaved factor, which is equal to the absolute
231 /// value of the access's stride.
232 ///
233 /// E.g. An interleaved load group of factor 4:
234 /// for (unsigned i = 0; i < 1024; i+=4) {
235 /// a = A[i]; // Member of index 0
236 /// b = A[i+1]; // Member of index 1
237 /// d = A[i+3]; // Member of index 3
238 /// ...
239 /// }
240 ///
241 /// An interleaved store group of factor 4:
242 /// for (unsigned i = 0; i < 1024; i+=4) {
243 /// ...
244 /// A[i] = a; // Member of index 0
245 /// A[i+1] = b; // Member of index 1
246 /// A[i+2] = c; // Member of index 2
247 /// A[i+3] = d; // Member of index 3
248 /// }
249 ///
250 /// Note: the interleaved load group could have gaps (missing members), but
251 /// the interleaved store group doesn't allow gaps.
252 template <typename InstTy> class InterleaveGroup {
253 public:
254  InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
255  : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
256 
257  InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
258  : Align(Align), InsertPos(Instr) {
259  assert(Align && "The alignment should be non-zero");
260 
261  Factor = std::abs(Stride);
262  assert(Factor > 1 && "Invalid interleave factor");
263 
264  Reverse = Stride < 0;
265  Members[0] = Instr;
266  }
267 
268  bool isReverse() const { return Reverse; }
269  uint32_t getFactor() const { return Factor; }
270  uint32_t getAlignment() const { return Align; }
271  uint32_t getNumMembers() const { return Members.size(); }
272 
273  /// Try to insert a new member \p Instr with index \p Index and
274  /// alignment \p NewAlign. The index is related to the leader and it could be
275  /// negative if it is the new leader.
276  ///
277  /// \returns false if the instruction doesn't belong to the group.
278  bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
279  assert(NewAlign && "The new member's alignment should be non-zero");
280 
281  // Make sure the key fits in an int32_t.
282  Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
283  if (!MaybeKey)
284  return false;
285  int32_t Key = *MaybeKey;
286 
287  // Skip if there is already a member with the same index.
288  if (Members.find(Key) != Members.end())
289  return false;
290 
291  if (Key > LargestKey) {
292  // The largest index is always less than the interleave factor.
293  if (Index >= static_cast<int32_t>(Factor))
294  return false;
295 
296  LargestKey = Key;
297  } else if (Key < SmallestKey) {
298 
299  // Make sure the largest index fits in an int32_t.
300  Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
301  if (!MaybeLargestIndex)
302  return false;
303 
304  // The largest index is always less than the interleave factor.
305  if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
306  return false;
307 
308  SmallestKey = Key;
309  }
310 
311  // It's always safe to select the minimum alignment.
312  Align = std::min(Align, NewAlign);
313  Members[Key] = Instr;
314  return true;
315  }
316 
317  /// Get the member with the given index \p Index
318  ///
319  /// \returns nullptr if contains no such member.
320  InstTy *getMember(uint32_t Index) const {
321  int32_t Key = SmallestKey + Index;
322  auto Member = Members.find(Key);
323  if (Member == Members.end())
324  return nullptr;
325 
326  return Member->second;
327  }
328 
329  /// Get the index for the given member. Unlike the key in the member
330  /// map, the index starts from 0.
331  uint32_t getIndex(const InstTy *Instr) const {
332  for (auto I : Members) {
333  if (I.second == Instr)
334  return I.first - SmallestKey;
335  }
336 
337  llvm_unreachable("InterleaveGroup contains no such member");
338  }
339 
340  InstTy *getInsertPos() const { return InsertPos; }
341  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
342 
343  /// Add metadata (e.g. alias info) from the instructions in this group to \p
344  /// NewInst.
345  ///
346  /// FIXME: this function currently does not add noalias metadata a'la
347  /// addNewMedata. To do that we need to compute the intersection of the
348  /// noalias info from all members.
349  void addMetadata(InstTy *NewInst) const;
350 
351  /// Returns true if this Group requires a scalar iteration to handle gaps.
352  bool requiresScalarEpilogue() const {
353  // If the last member of the Group exists, then a scalar epilog is not
354  // needed for this group.
355  if (getMember(getFactor() - 1))
356  return false;
357 
358  // We have a group with gaps. It therefore cannot be a group of stores,
359  // and it can't be a reversed access, because such groups get invalidated.
360  assert(!getMember(0)->mayWriteToMemory() &&
361  "Group should have been invalidated");
362  assert(!isReverse() && "Group should have been invalidated");
363 
364  // This is a group of loads, with gaps, and without a last-member
365  return true;
366  }
367 
368 private:
369  uint32_t Factor; // Interleave Factor.
370  bool Reverse;
371  uint32_t Align;
373  int32_t SmallestKey = 0;
374  int32_t LargestKey = 0;
375 
376  // To avoid breaking dependences, vectorized instructions of an interleave
377  // group should be inserted at either the first load or the last store in
378  // program order.
379  //
380  // E.g. %even = load i32 // Insert Position
381  // %add = add i32 %even // Use of %even
382  // %odd = load i32
383  //
384  // store i32 %even
385  // %odd = add i32 // Def of %odd
386  // store i32 %odd // Insert Position
387  InstTy *InsertPos;
388 };
389 
390 /// Drive the analysis of interleaved memory accesses in the loop.
391 ///
392 /// Use this class to analyze interleaved accesses only when we can vectorize
393 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
394 /// on interleaved accesses is unsafe.
395 ///
396 /// The analysis collects interleave groups and records the relationships
397 /// between the member and the group in a map.
399 public:
401  DominatorTree *DT, LoopInfo *LI,
402  const LoopAccessInfo *LAI)
403  : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
404 
405  ~InterleavedAccessInfo() { reset(); }
406 
407  /// Analyze the interleaved accesses and collect them in interleave
408  /// groups. Substitute symbolic strides using \p Strides.
409  /// Consider also predicated loads/stores in the analysis if
410  /// \p EnableMaskedInterleavedGroup is true.
411  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
412 
413  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
414  /// contrary to original assumption. Although we currently prevent group
415  /// formation for predicated accesses, we may be able to relax this limitation
416  /// in the future once we handle more complicated blocks.
417  void reset() {
419  // Avoid releasing a pointer twice.
420  for (auto &I : InterleaveGroupMap)
421  DelSet.insert(I.second);
422  for (auto *Ptr : DelSet)
423  delete Ptr;
424  InterleaveGroupMap.clear();
425  RequiresScalarEpilogue = false;
426  }
427 
428 
429  /// Check if \p Instr belongs to any interleave group.
430  bool isInterleaved(Instruction *Instr) const {
431  return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
432  }
433 
434  /// Get the interleave group that \p Instr belongs to.
435  ///
436  /// \returns nullptr if doesn't have such group.
438  getInterleaveGroup(const Instruction *Instr) const {
439  if (InterleaveGroupMap.count(Instr))
440  return InterleaveGroupMap.find(Instr)->second;
441  return nullptr;
442  }
443 
446  return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
447  }
448 
449  /// Returns true if an interleaved group that may access memory
450  /// out-of-bounds requires a scalar epilogue iteration for correctness.
451  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
452 
453  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
454  /// happen when optimizing for size forbids a scalar epilogue, and the gap
455  /// cannot be filtered by masking the load/store.
456  void invalidateGroupsRequiringScalarEpilogue();
457 
458 private:
459  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
460  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
461  /// The interleaved access analysis can also add new predicates (for example
462  /// by versioning strides of pointers).
464 
465  Loop *TheLoop;
466  DominatorTree *DT;
467  LoopInfo *LI;
468  const LoopAccessInfo *LAI;
469 
470  /// True if the loop may contain non-reversed interleaved groups with
471  /// out-of-bounds accesses. We ensure we don't speculatively access memory
472  /// out-of-bounds by executing at least one scalar epilogue iteration.
473  bool RequiresScalarEpilogue = false;
474 
475  /// Holds the relationships between the members and the interleave group.
477 
478  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
479 
480  /// Holds dependences among the memory accesses in the loop. It maps a source
481  /// access to a set of dependent sink accesses.
483 
484  /// The descriptor for a strided memory access.
485  struct StrideDescriptor {
486  StrideDescriptor() = default;
487  StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
488  unsigned Align)
489  : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
490 
491  // The access's stride. It is negative for a reverse access.
492  int64_t Stride = 0;
493 
494  // The scalar expression of this access.
495  const SCEV *Scev = nullptr;
496 
497  // The size of the memory object.
498  uint64_t Size = 0;
499 
500  // The alignment of this access.
501  unsigned Align = 0;
502  };
503 
504  /// A type for holding instructions and their stride descriptors.
505  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
506 
507  /// Create a new interleave group with the given instruction \p Instr,
508  /// stride \p Stride and alignment \p Align.
509  ///
510  /// \returns the newly created interleave group.
512  createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
513  assert(!InterleaveGroupMap.count(Instr) &&
514  "Already in an interleaved access group");
515  InterleaveGroupMap[Instr] =
516  new InterleaveGroup<Instruction>(Instr, Stride, Align);
517  InterleaveGroups.insert(InterleaveGroupMap[Instr]);
518  return InterleaveGroupMap[Instr];
519  }
520 
521  /// Release the group and remove all the relationships.
522  void releaseGroup(InterleaveGroup<Instruction> *Group) {
523  for (unsigned i = 0; i < Group->getFactor(); i++)
524  if (Instruction *Member = Group->getMember(i))
525  InterleaveGroupMap.erase(Member);
526 
527  InterleaveGroups.erase(Group);
528  delete Group;
529  }
530 
531  /// Collect all the accesses with a constant stride in program order.
532  void collectConstStrideAccesses(
534  const ValueToValueMap &Strides);
535 
536  /// Returns true if \p Stride is allowed in an interleaved group.
537  static bool isStrided(int Stride);
538 
539  /// Returns true if \p BB is a predicated block.
540  bool isPredicated(BasicBlock *BB) const {
541  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
542  }
543 
544  /// Returns true if LoopAccessInfo can be used for dependence queries.
545  bool areDependencesValid() const {
546  return LAI && LAI->getDepChecker().getDependences();
547  }
548 
549  /// Returns true if memory accesses \p A and \p B can be reordered, if
550  /// necessary, when constructing interleaved groups.
551  ///
552  /// \p A must precede \p B in program order. We return false if reordering is
553  /// not necessary or is prevented because \p A and \p B may be dependent.
554  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
555  StrideEntry *B) const {
556  // Code motion for interleaved accesses can potentially hoist strided loads
557  // and sink strided stores. The code below checks the legality of the
558  // following two conditions:
559  //
560  // 1. Potentially moving a strided load (B) before any store (A) that
561  // precedes B, or
562  //
563  // 2. Potentially moving a strided store (A) after any load or store (B)
564  // that A precedes.
565  //
566  // It's legal to reorder A and B if we know there isn't a dependence from A
567  // to B. Note that this determination is conservative since some
568  // dependences could potentially be reordered safely.
569 
570  // A is potentially the source of a dependence.
571  auto *Src = A->first;
572  auto SrcDes = A->second;
573 
574  // B is potentially the sink of a dependence.
575  auto *Sink = B->first;
576  auto SinkDes = B->second;
577 
578  // Code motion for interleaved accesses can't violate WAR dependences.
579  // Thus, reordering is legal if the source isn't a write.
580  if (!Src->mayWriteToMemory())
581  return true;
582 
583  // At least one of the accesses must be strided.
584  if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
585  return true;
586 
587  // If dependence information is not available from LoopAccessInfo,
588  // conservatively assume the instructions can't be reordered.
589  if (!areDependencesValid())
590  return false;
591 
592  // If we know there is a dependence from source to sink, assume the
593  // instructions can't be reordered. Otherwise, reordering is legal.
594  return Dependences.find(Src) == Dependences.end() ||
595  !Dependences.lookup(Src).count(Sink);
596  }
597 
598  /// Collect the dependences from LoopAccessInfo.
599  ///
600  /// We process the dependences once during the interleaved access analysis to
601  /// enable constant-time dependence queries.
602  void collectDependences() {
603  if (!areDependencesValid())
604  return;
605  auto *Deps = LAI->getDepChecker().getDependences();
606  for (auto Dep : *Deps)
607  Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
608  }
609 };
610 
611 } // llvm namespace
612 
613 #endif
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:445
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:341
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:430
This class represents a function call, abstracting a target machine&#39;s calling convention.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
Metadata node.
Definition: Metadata.h:863
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void reset()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:417
bool isReverse() const
Definition: VectorUtils.h:268
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
Key
PAL metadata keys.
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:398
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:27
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:320
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:438
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:331
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
Definition: VectorUtils.h:254
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:451
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:400
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:278
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:90
uint32_t getAlignment() const
Definition: VectorUtils.h:270
A range adaptor for a pair of iterators.
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
uint32_t getFactor() const
Definition: VectorUtils.h:269
uint32_t Size
Definition: Profile.cpp:46
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
Definition: VectorUtils.h:257
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InstTy * getInsertPos() const
Definition: VectorUtils.h:340
LLVM Value Representation.
Definition: Value.h:72
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
std::enable_if< std::is_signed< T >::value, llvm::Optional< T > >::type checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
uint32_t getNumMembers() const
Definition: VectorUtils.h:271
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:352
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42