LLVM  9.0.0svn
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
27 
28 #define DEBUG_TYPE "vectorutils"
29 
30 using namespace llvm;
31 using namespace llvm::PatternMatch;
32 
33 /// Maximum factor for an interleaved memory access.
35  "max-interleave-group-factor", cl::Hidden,
36  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
37  cl::init(8));
38 
39 /// Return true if all of the intrinsic's arguments and return type are scalars
40 /// for the scalar form of the intrinsic and vectors for the vector form of the
41 /// intrinsic.
43  switch (ID) {
44  case Intrinsic::bswap: // Begin integer bit-manipulation.
45  case Intrinsic::bitreverse:
46  case Intrinsic::ctpop:
47  case Intrinsic::ctlz:
48  case Intrinsic::cttz:
49  case Intrinsic::fshl:
50  case Intrinsic::fshr:
51  case Intrinsic::sadd_sat:
52  case Intrinsic::ssub_sat:
53  case Intrinsic::uadd_sat:
54  case Intrinsic::usub_sat:
55  case Intrinsic::sqrt: // Begin floating-point.
56  case Intrinsic::sin:
57  case Intrinsic::cos:
58  case Intrinsic::exp:
59  case Intrinsic::exp2:
60  case Intrinsic::log:
61  case Intrinsic::log10:
62  case Intrinsic::log2:
63  case Intrinsic::fabs:
64  case Intrinsic::minnum:
65  case Intrinsic::maxnum:
66  case Intrinsic::minimum:
67  case Intrinsic::maximum:
68  case Intrinsic::copysign:
69  case Intrinsic::floor:
70  case Intrinsic::ceil:
71  case Intrinsic::trunc:
72  case Intrinsic::rint:
73  case Intrinsic::nearbyint:
74  case Intrinsic::round:
75  case Intrinsic::pow:
76  case Intrinsic::fma:
77  case Intrinsic::fmuladd:
78  case Intrinsic::powi:
79  case Intrinsic::canonicalize:
80  return true;
81  default:
82  return false;
83  }
84 }
85 
86 /// Identifies if the intrinsic has a scalar operand. It check for
87 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
89  unsigned ScalarOpdIdx) {
90  switch (ID) {
91  case Intrinsic::ctlz:
92  case Intrinsic::cttz:
93  case Intrinsic::powi:
94  return (ScalarOpdIdx == 1);
95  default:
96  return false;
97  }
98 }
99 
100 /// Returns intrinsic ID for call.
101 /// For the input call instruction it finds mapping intrinsic and returns
102 /// its ID, in case it does not found it return not_intrinsic.
104  const TargetLibraryInfo *TLI) {
106  if (ID == Intrinsic::not_intrinsic)
108 
109  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
110  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
111  ID == Intrinsic::sideeffect)
112  return ID;
114 }
115 
116 /// Find the operand of the GEP that should be checked for consecutive
117 /// stores. This ignores trailing indices that have no effect on the final
118 /// pointer.
120  const DataLayout &DL = Gep->getModule()->getDataLayout();
121  unsigned LastOperand = Gep->getNumOperands() - 1;
122  unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
123 
124  // Walk backwards and try to peel off zeros.
125  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
126  // Find the type we're currently indexing into.
127  gep_type_iterator GEPTI = gep_type_begin(Gep);
128  std::advance(GEPTI, LastOperand - 2);
129 
130  // If it's a type with the same allocation size as the result of the GEP we
131  // can peel off the zero index.
132  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
133  break;
134  --LastOperand;
135  }
136 
137  return LastOperand;
138 }
139 
140 /// If the argument is a GEP, then returns the operand identified by
141 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
142 /// operand, it returns that instead.
145  if (!GEP)
146  return Ptr;
147 
148  unsigned InductionOperand = getGEPInductionOperand(GEP);
149 
150  // Check that all of the gep indices are uniform except for our induction
151  // operand.
152  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
153  if (i != InductionOperand &&
154  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
155  return Ptr;
156  return GEP->getOperand(InductionOperand);
157 }
158 
159 /// If a value has only one user that is a CastInst, return it.
161  Value *UniqueCast = nullptr;
162  for (User *U : Ptr->users()) {
163  CastInst *CI = dyn_cast<CastInst>(U);
164  if (CI && CI->getType() == Ty) {
165  if (!UniqueCast)
166  UniqueCast = CI;
167  else
168  return nullptr;
169  }
170  }
171  return UniqueCast;
172 }
173 
174 /// Get the stride of a pointer access in a loop. Looks for symbolic
175 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
177  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
178  if (!PtrTy || PtrTy->isAggregateType())
179  return nullptr;
180 
181  // Try to remove a gep instruction to make the pointer (actually index at this
182  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
183  // pointer, otherwise, we are analyzing the index.
184  Value *OrigPtr = Ptr;
185 
186  // The size of the pointer access.
187  int64_t PtrAccessSize = 1;
188 
189  Ptr = stripGetElementPtr(Ptr, SE, Lp);
190  const SCEV *V = SE->getSCEV(Ptr);
191 
192  if (Ptr != OrigPtr)
193  // Strip off casts.
194  while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
195  V = C->getOperand();
196 
197  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
198  if (!S)
199  return nullptr;
200 
201  V = S->getStepRecurrence(*SE);
202  if (!V)
203  return nullptr;
204 
205  // Strip off the size of access multiplication if we are still analyzing the
206  // pointer.
207  if (OrigPtr == Ptr) {
208  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
209  if (M->getOperand(0)->getSCEVType() != scConstant)
210  return nullptr;
211 
212  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
213 
214  // Huge step value - give up.
215  if (APStepVal.getBitWidth() > 64)
216  return nullptr;
217 
218  int64_t StepVal = APStepVal.getSExtValue();
219  if (PtrAccessSize != StepVal)
220  return nullptr;
221  V = M->getOperand(1);
222  }
223  }
224 
225  // Strip off casts.
226  Type *StripedOffRecurrenceCast = nullptr;
227  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
228  StripedOffRecurrenceCast = C->getType();
229  V = C->getOperand();
230  }
231 
232  // Look for the loop invariant symbolic value.
233  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
234  if (!U)
235  return nullptr;
236 
237  Value *Stride = U->getValue();
238  if (!Lp->isLoopInvariant(Stride))
239  return nullptr;
240 
241  // If we have stripped off the recurrence cast we have to make sure that we
242  // return the value that is used in this loop so that we can replace it later.
243  if (StripedOffRecurrenceCast)
244  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
245 
246  return Stride;
247 }
248 
249 /// Given a vector and an element number, see if the scalar value is
250 /// already around as a register, for example if it were inserted then extracted
251 /// from the vector.
252 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
253  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
254  VectorType *VTy = cast<VectorType>(V->getType());
255  unsigned Width = VTy->getNumElements();
256  if (EltNo >= Width) // Out of range access.
257  return UndefValue::get(VTy->getElementType());
258 
259  if (Constant *C = dyn_cast<Constant>(V))
260  return C->getAggregateElement(EltNo);
261 
262  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
263  // If this is an insert to a variable element, we don't know what it is.
264  if (!isa<ConstantInt>(III->getOperand(2)))
265  return nullptr;
266  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
267 
268  // If this is an insert to the element we are looking for, return the
269  // inserted value.
270  if (EltNo == IIElt)
271  return III->getOperand(1);
272 
273  // Otherwise, the insertelement doesn't modify the value, recurse on its
274  // vector input.
275  return findScalarElement(III->getOperand(0), EltNo);
276  }
277 
278  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
279  unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
280  int InEl = SVI->getMaskValue(EltNo);
281  if (InEl < 0)
282  return UndefValue::get(VTy->getElementType());
283  if (InEl < (int)LHSWidth)
284  return findScalarElement(SVI->getOperand(0), InEl);
285  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
286  }
287 
288  // Extract a value from a vector add operation with a constant zero.
289  // TODO: Use getBinOpIdentity() to generalize this.
290  Value *Val; Constant *C;
291  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
292  if (Constant *Elt = C->getAggregateElement(EltNo))
293  if (Elt->isNullValue())
294  return findScalarElement(Val, EltNo);
295 
296  // Otherwise, we don't know.
297  return nullptr;
298 }
299 
300 /// Get splat value if the input is a splat vector or return nullptr.
301 /// This function is not fully general. It checks only 2 cases:
302 /// the input value is (1) a splat constants vector or (2) a sequence
303 /// of instructions that broadcast a single value into a vector.
304 ///
306 
307  if (auto *C = dyn_cast<Constant>(V))
308  if (isa<VectorType>(V->getType()))
309  return C->getSplatValue();
310 
311  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
312  if (!ShuffleInst)
313  return nullptr;
314  // All-zero (or undef) shuffle mask elements.
315  for (int MaskElt : ShuffleInst->getShuffleMask())
316  if (MaskElt != 0 && MaskElt != -1)
317  return nullptr;
318  // The first shuffle source is 'insertelement' with index 0.
319  auto *InsertEltInst =
320  dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
321  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
322  !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
323  return nullptr;
324 
325  return InsertEltInst->getOperand(1);
326 }
327 
330  const TargetTransformInfo *TTI) {
331 
332  // DemandedBits will give us every value's live-out bits. But we want
333  // to ensure no extra casts would need to be inserted, so every DAG
334  // of connected values must have the same minimum bitwidth.
336  SmallVector<Value *, 16> Worklist;
338  SmallPtrSet<Value *, 16> Visited;
340  SmallPtrSet<Instruction *, 4> InstructionSet;
342 
343  // Determine the roots. We work bottom-up, from truncs or icmps.
344  bool SeenExtFromIllegalType = false;
345  for (auto *BB : Blocks)
346  for (auto &I : *BB) {
347  InstructionSet.insert(&I);
348 
349  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
350  !TTI->isTypeLegal(I.getOperand(0)->getType()))
351  SeenExtFromIllegalType = true;
352 
353  // Only deal with non-vector integers up to 64-bits wide.
354  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
355  !I.getType()->isVectorTy() &&
356  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
357  // Don't make work for ourselves. If we know the loaded type is legal,
358  // don't add it to the worklist.
359  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
360  continue;
361 
362  Worklist.push_back(&I);
363  Roots.insert(&I);
364  }
365  }
366  // Early exit.
367  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
368  return MinBWs;
369 
370  // Now proceed breadth-first, unioning values together.
371  while (!Worklist.empty()) {
372  Value *Val = Worklist.pop_back_val();
373  Value *Leader = ECs.getOrInsertLeaderValue(Val);
374 
375  if (Visited.count(Val))
376  continue;
377  Visited.insert(Val);
378 
379  // Non-instructions terminate a chain successfully.
380  if (!isa<Instruction>(Val))
381  continue;
382  Instruction *I = cast<Instruction>(Val);
383 
384  // If we encounter a type that is larger than 64 bits, we can't represent
385  // it so bail out.
386  if (DB.getDemandedBits(I).getBitWidth() > 64)
388 
389  uint64_t V = DB.getDemandedBits(I).getZExtValue();
390  DBits[Leader] |= V;
391  DBits[I] = V;
392 
393  // Casts, loads and instructions outside of our range terminate a chain
394  // successfully.
395  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
396  !InstructionSet.count(I))
397  continue;
398 
399  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
400  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
401  // transform anything that relies on them.
402  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
403  !I->getType()->isIntegerTy()) {
404  DBits[Leader] |= ~0ULL;
405  continue;
406  }
407 
408  // We don't modify the types of PHIs. Reductions will already have been
409  // truncated if possible, and inductions' sizes will have been chosen by
410  // indvars.
411  if (isa<PHINode>(I))
412  continue;
413 
414  if (DBits[Leader] == ~0ULL)
415  // All bits demanded, no point continuing.
416  continue;
417 
418  for (Value *O : cast<User>(I)->operands()) {
419  ECs.unionSets(Leader, O);
420  Worklist.push_back(O);
421  }
422  }
423 
424  // Now we've discovered all values, walk them to see if there are
425  // any users we didn't see. If there are, we can't optimize that
426  // chain.
427  for (auto &I : DBits)
428  for (auto *U : I.first->users())
429  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
430  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
431 
432  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
433  uint64_t LeaderDemandedBits = 0;
434  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
435  LeaderDemandedBits |= DBits[*MI];
436 
437  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
438  llvm::countLeadingZeros(LeaderDemandedBits);
439  // Round up to a power of 2
440  if (!isPowerOf2_64((uint64_t)MinBW))
441  MinBW = NextPowerOf2(MinBW);
442 
443  // We don't modify the types of PHIs. Reductions will already have been
444  // truncated if possible, and inductions' sizes will have been chosen by
445  // indvars.
446  // If we are required to shrink a PHI, abandon this entire equivalence class.
447  bool Abort = false;
448  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
449  if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
450  Abort = true;
451  break;
452  }
453  if (Abort)
454  continue;
455 
456  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
457  if (!isa<Instruction>(*MI))
458  continue;
459  Type *Ty = (*MI)->getType();
460  if (Roots.count(*MI))
461  Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
462  if (MinBW < Ty->getScalarSizeInBits())
463  MinBWs[cast<Instruction>(*MI)] = MinBW;
464  }
465  }
466 
467  return MinBWs;
468 }
469 
470 /// Add all access groups in @p AccGroups to @p List.
471 template <typename ListT>
472 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
473  // Interpret an access group as a list containing itself.
474  if (AccGroups->getNumOperands() == 0) {
475  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
476  List.insert(AccGroups);
477  return;
478  }
479 
480  for (auto &AccGroupListOp : AccGroups->operands()) {
481  auto *Item = cast<MDNode>(AccGroupListOp.get());
482  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
483  List.insert(Item);
484  }
485 }
486 
487 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
488  if (!AccGroups1)
489  return AccGroups2;
490  if (!AccGroups2)
491  return AccGroups1;
492  if (AccGroups1 == AccGroups2)
493  return AccGroups1;
494 
496  addToAccessGroupList(Union, AccGroups1);
497  addToAccessGroupList(Union, AccGroups2);
498 
499  if (Union.size() == 0)
500  return nullptr;
501  if (Union.size() == 1)
502  return cast<MDNode>(Union.front());
503 
504  LLVMContext &Ctx = AccGroups1->getContext();
505  return MDNode::get(Ctx, Union.getArrayRef());
506 }
507 
509  const Instruction *Inst2) {
510  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
511  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
512 
513  if (!MayAccessMem1 && !MayAccessMem2)
514  return nullptr;
515  if (!MayAccessMem1)
517  if (!MayAccessMem2)
519 
522  if (!MD1 || !MD2)
523  return nullptr;
524  if (MD1 == MD2)
525  return MD1;
526 
527  // Use set for scalable 'contains' check.
528  SmallPtrSet<Metadata *, 4> AccGroupSet2;
529  addToAccessGroupList(AccGroupSet2, MD2);
530 
531  SmallVector<Metadata *, 4> Intersection;
532  if (MD1->getNumOperands() == 0) {
533  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
534  if (AccGroupSet2.count(MD1))
535  Intersection.push_back(MD1);
536  } else {
537  for (const MDOperand &Node : MD1->operands()) {
538  auto *Item = cast<MDNode>(Node.get());
539  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
540  if (AccGroupSet2.count(Item))
541  Intersection.push_back(Item);
542  }
543  }
544 
545  if (Intersection.size() == 0)
546  return nullptr;
547  if (Intersection.size() == 1)
548  return cast<MDNode>(Intersection.front());
549 
550  LLVMContext &Ctx = Inst1->getContext();
551  return MDNode::get(Ctx, Intersection);
552 }
553 
554 /// \returns \p I after propagating metadata from \p VL.
556  Instruction *I0 = cast<Instruction>(VL[0]);
558  I0->getAllMetadataOtherThanDebugLoc(Metadata);
559 
564  MDNode *MD = I0->getMetadata(Kind);
565 
566  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
567  const Instruction *IJ = cast<Instruction>(VL[J]);
568  MDNode *IMD = IJ->getMetadata(Kind);
569  switch (Kind) {
571  MD = MDNode::getMostGenericTBAA(MD, IMD);
572  break;
574  MD = MDNode::getMostGenericAliasScope(MD, IMD);
575  break;
577  MD = MDNode::getMostGenericFPMath(MD, IMD);
578  break;
582  MD = MDNode::intersect(MD, IMD);
583  break;
585  MD = intersectAccessGroups(Inst, IJ);
586  break;
587  default:
588  llvm_unreachable("unhandled metadata");
589  }
590  }
591 
592  Inst->setMetadata(Kind, MD);
593  }
594 
595  return Inst;
596 }
597 
598 Constant *
600  const InterleaveGroup<Instruction> &Group) {
601  // All 1's means mask is not needed.
602  if (Group.getNumMembers() == Group.getFactor())
603  return nullptr;
604 
605  // TODO: support reversed access.
606  assert(!Group.isReverse() && "Reversed group not supported.");
607 
609  for (unsigned i = 0; i < VF; i++)
610  for (unsigned j = 0; j < Group.getFactor(); ++j) {
611  unsigned HasMember = Group.getMember(j) ? 1 : 0;
612  Mask.push_back(Builder.getInt1(HasMember));
613  }
614 
615  return ConstantVector::get(Mask);
616 }
617 
619  unsigned ReplicationFactor, unsigned VF) {
621  for (unsigned i = 0; i < VF; i++)
622  for (unsigned j = 0; j < ReplicationFactor; j++)
623  MaskVec.push_back(Builder.getInt32(i));
624 
625  return ConstantVector::get(MaskVec);
626 }
627 
629  unsigned NumVecs) {
631  for (unsigned i = 0; i < VF; i++)
632  for (unsigned j = 0; j < NumVecs; j++)
633  Mask.push_back(Builder.getInt32(j * VF + i));
634 
635  return ConstantVector::get(Mask);
636 }
637 
638 Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
639  unsigned Stride, unsigned VF) {
641  for (unsigned i = 0; i < VF; i++)
642  Mask.push_back(Builder.getInt32(Start + i * Stride));
643 
644  return ConstantVector::get(Mask);
645 }
646 
648  unsigned NumInts, unsigned NumUndefs) {
650  for (unsigned i = 0; i < NumInts; i++)
651  Mask.push_back(Builder.getInt32(Start + i));
652 
654  for (unsigned i = 0; i < NumUndefs; i++)
655  Mask.push_back(Undef);
656 
657  return ConstantVector::get(Mask);
658 }
659 
660 /// A helper function for concatenating vectors. This function concatenates two
661 /// vectors having the same element type. If the second vector has fewer
662 /// elements than the first, it is padded with undefs.
664  Value *V2) {
665  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
666  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
667  assert(VecTy1 && VecTy2 &&
668  VecTy1->getScalarType() == VecTy2->getScalarType() &&
669  "Expect two vectors with the same element type");
670 
671  unsigned NumElts1 = VecTy1->getNumElements();
672  unsigned NumElts2 = VecTy2->getNumElements();
673  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
674 
675  if (NumElts1 > NumElts2) {
676  // Extend with UNDEFs.
677  Constant *ExtMask =
678  createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
679  V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
680  }
681 
682  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
683  return Builder.CreateShuffleVector(V1, V2, Mask);
684 }
685 
687  unsigned NumVecs = Vecs.size();
688  assert(NumVecs > 1 && "Should be at least two vectors");
689 
690  SmallVector<Value *, 8> ResList;
691  ResList.append(Vecs.begin(), Vecs.end());
692  do {
693  SmallVector<Value *, 8> TmpList;
694  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
695  Value *V0 = ResList[i], *V1 = ResList[i + 1];
696  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
697  "Only the last vector may have a different type");
698 
699  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
700  }
701 
702  // Push the last vector if the total number of vectors is odd.
703  if (NumVecs % 2 != 0)
704  TmpList.push_back(ResList[NumVecs - 1]);
705 
706  ResList = TmpList;
707  NumVecs = ResList.size();
708  } while (NumVecs > 1);
709 
710  return ResList[0];
711 }
712 
713 bool InterleavedAccessInfo::isStrided(int Stride) {
714  unsigned Factor = std::abs(Stride);
715  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
716 }
717 
718 void InterleavedAccessInfo::collectConstStrideAccesses(
720  const ValueToValueMap &Strides) {
721  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
722 
723  // Since it's desired that the load/store instructions be maintained in
724  // "program order" for the interleaved access analysis, we have to visit the
725  // blocks in the loop in reverse postorder (i.e., in a topological order).
726  // Such an ordering will ensure that any load/store that may be executed
727  // before a second load/store will precede the second load/store in
728  // AccessStrideInfo.
729  LoopBlocksDFS DFS(TheLoop);
730  DFS.perform(LI);
731  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
732  for (auto &I : *BB) {
733  auto *LI = dyn_cast<LoadInst>(&I);
734  auto *SI = dyn_cast<StoreInst>(&I);
735  if (!LI && !SI)
736  continue;
737 
739  // We don't check wrapping here because we don't know yet if Ptr will be
740  // part of a full group or a group with gaps. Checking wrapping for all
741  // pointers (even those that end up in groups with no gaps) will be overly
742  // conservative. For full groups, wrapping should be ok since if we would
743  // wrap around the address space we would do a memory access at nullptr
744  // even without the transformation. The wrapping checks are therefore
745  // deferred until after we've formed the interleaved groups.
746  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
747  /*Assume=*/true, /*ShouldCheckWrap=*/false);
748 
749  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
750  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
751  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
752 
753  // An alignment of 0 means target ABI alignment.
754  unsigned Align = getLoadStoreAlignment(&I);
755  if (!Align)
756  Align = DL.getABITypeAlignment(PtrTy->getElementType());
757 
758  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
759  }
760 }
761 
762 // Analyze interleaved accesses and collect them into interleaved load and
763 // store groups.
764 //
765 // When generating code for an interleaved load group, we effectively hoist all
766 // loads in the group to the location of the first load in program order. When
767 // generating code for an interleaved store group, we sink all stores to the
768 // location of the last store. This code motion can change the order of load
769 // and store instructions and may break dependences.
770 //
771 // The code generation strategy mentioned above ensures that we won't violate
772 // any write-after-read (WAR) dependences.
773 //
774 // E.g., for the WAR dependence: a = A[i]; // (1)
775 // A[i] = b; // (2)
776 //
777 // The store group of (2) is always inserted at or below (2), and the load
778 // group of (1) is always inserted at or above (1). Thus, the instructions will
779 // never be reordered. All other dependences are checked to ensure the
780 // correctness of the instruction reordering.
781 //
782 // The algorithm visits all memory accesses in the loop in bottom-up program
783 // order. Program order is established by traversing the blocks in the loop in
784 // reverse postorder when collecting the accesses.
785 //
786 // We visit the memory accesses in bottom-up order because it can simplify the
787 // construction of store groups in the presence of write-after-write (WAW)
788 // dependences.
789 //
790 // E.g., for the WAW dependence: A[i] = a; // (1)
791 // A[i] = b; // (2)
792 // A[i + 1] = c; // (3)
793 //
794 // We will first create a store group with (3) and (2). (1) can't be added to
795 // this group because it and (2) are dependent. However, (1) can be grouped
796 // with other accesses that may precede it in program order. Note that a
797 // bottom-up order does not imply that WAW dependences should not be checked.
799  bool EnablePredicatedInterleavedMemAccesses) {
800  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
801  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
802 
803  // Holds all accesses with a constant stride.
805  collectConstStrideAccesses(AccessStrideInfo, Strides);
806 
807  if (AccessStrideInfo.empty())
808  return;
809 
810  // Collect the dependences in the loop.
811  collectDependences();
812 
813  // Holds all interleaved store groups temporarily.
815  // Holds all interleaved load groups temporarily.
817 
818  // Search in bottom-up program order for pairs of accesses (A and B) that can
819  // form interleaved load or store groups. In the algorithm below, access A
820  // precedes access B in program order. We initialize a group for B in the
821  // outer loop of the algorithm, and then in the inner loop, we attempt to
822  // insert each A into B's group if:
823  //
824  // 1. A and B have the same stride,
825  // 2. A and B have the same memory object size, and
826  // 3. A belongs in B's group according to its distance from B.
827  //
828  // Special care is taken to ensure group formation will not break any
829  // dependences.
830  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
831  BI != E; ++BI) {
832  Instruction *B = BI->first;
833  StrideDescriptor DesB = BI->second;
834 
835  // Initialize a group for B if it has an allowable stride. Even if we don't
836  // create a group for B, we continue with the bottom-up algorithm to ensure
837  // we don't break any of B's dependences.
838  InterleaveGroup<Instruction> *Group = nullptr;
839  if (isStrided(DesB.Stride) &&
840  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
841  Group = getInterleaveGroup(B);
842  if (!Group) {
843  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
844  << '\n');
845  Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
846  }
847  if (B->mayWriteToMemory())
848  StoreGroups.insert(Group);
849  else
850  LoadGroups.insert(Group);
851  }
852 
853  for (auto AI = std::next(BI); AI != E; ++AI) {
854  Instruction *A = AI->first;
855  StrideDescriptor DesA = AI->second;
856 
857  // Our code motion strategy implies that we can't have dependences
858  // between accesses in an interleaved group and other accesses located
859  // between the first and last member of the group. Note that this also
860  // means that a group can't have more than one member at a given offset.
861  // The accesses in a group can have dependences with other accesses, but
862  // we must ensure we don't extend the boundaries of the group such that
863  // we encompass those dependent accesses.
864  //
865  // For example, assume we have the sequence of accesses shown below in a
866  // stride-2 loop:
867  //
868  // (1, 2) is a group | A[i] = a; // (1)
869  // | A[i-1] = b; // (2) |
870  // A[i-3] = c; // (3)
871  // A[i] = d; // (4) | (2, 4) is not a group
872  //
873  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
874  // but not with (4). If we did, the dependent access (3) would be within
875  // the boundaries of the (2, 4) group.
876  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
877  // If a dependence exists and A is already in a group, we know that A
878  // must be a store since A precedes B and WAR dependences are allowed.
879  // Thus, A would be sunk below B. We release A's group to prevent this
880  // illegal code motion. A will then be free to form another group with
881  // instructions that precede it.
882  if (isInterleaved(A)) {
883  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
884  StoreGroups.remove(StoreGroup);
885  releaseGroup(StoreGroup);
886  }
887 
888  // If a dependence exists and A is not already in a group (or it was
889  // and we just released it), B might be hoisted above A (if B is a
890  // load) or another store might be sunk below A (if B is a store). In
891  // either case, we can't add additional instructions to B's group. B
892  // will only form a group with instructions that it precedes.
893  break;
894  }
895 
896  // At this point, we've checked for illegal code motion. If either A or B
897  // isn't strided, there's nothing left to do.
898  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
899  continue;
900 
901  // Ignore A if it's already in a group or isn't the same kind of memory
902  // operation as B.
903  // Note that mayReadFromMemory() isn't mutually exclusive to
904  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
905  // here, canVectorizeMemory() should have returned false - except for the
906  // case we asked for optimization remarks.
907  if (isInterleaved(A) ||
908  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
909  (A->mayWriteToMemory() != B->mayWriteToMemory()))
910  continue;
911 
912  // Check rules 1 and 2. Ignore A if its stride or size is different from
913  // that of B.
914  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
915  continue;
916 
917  // Ignore A if the memory object of A and B don't belong to the same
918  // address space
920  continue;
921 
922  // Calculate the distance from A to B.
923  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
924  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
925  if (!DistToB)
926  continue;
927  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
928 
929  // Check rule 3. Ignore A if its distance to B is not a multiple of the
930  // size.
931  if (DistanceToB % static_cast<int64_t>(DesB.Size))
932  continue;
933 
934  // All members of a predicated interleave-group must have the same predicate,
935  // and currently must reside in the same BB.
936  BasicBlock *BlockA = A->getParent();
937  BasicBlock *BlockB = B->getParent();
938  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
939  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
940  continue;
941 
942  // The index of A is the index of B plus A's distance to B in multiples
943  // of the size.
944  int IndexA =
945  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
946 
947  // Try to insert A into B's group.
948  if (Group->insertMember(A, IndexA, DesA.Align)) {
949  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
950  << " into the interleave group with" << *B
951  << '\n');
952  InterleaveGroupMap[A] = Group;
953 
954  // Set the first load in program order as the insert position.
955  if (A->mayReadFromMemory())
956  Group->setInsertPos(A);
957  }
958  } // Iteration over A accesses.
959  } // Iteration over B accesses.
960 
961  // Remove interleaved store groups with gaps.
962  for (auto *Group : StoreGroups)
963  if (Group->getNumMembers() != Group->getFactor()) {
964  LLVM_DEBUG(
965  dbgs() << "LV: Invalidate candidate interleaved store group due "
966  "to gaps.\n");
967  releaseGroup(Group);
968  }
969  // Remove interleaved groups with gaps (currently only loads) whose memory
970  // accesses may wrap around. We have to revisit the getPtrStride analysis,
971  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
972  // not check wrapping (see documentation there).
973  // FORNOW we use Assume=false;
974  // TODO: Change to Assume=true but making sure we don't exceed the threshold
975  // of runtime SCEV assumptions checks (thereby potentially failing to
976  // vectorize altogether).
977  // Additional optional optimizations:
978  // TODO: If we are peeling the loop and we know that the first pointer doesn't
979  // wrap then we can deduce that all pointers in the group don't wrap.
980  // This means that we can forcefully peel the loop in order to only have to
981  // check the first pointer for no-wrap. When we'll change to use Assume=true
982  // we'll only need at most one runtime check per interleaved group.
983  for (auto *Group : LoadGroups) {
984  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
985  // load would wrap around the address space we would do a memory access at
986  // nullptr even without the transformation.
987  if (Group->getNumMembers() == Group->getFactor())
988  continue;
989 
990  // Case 2: If first and last members of the group don't wrap this implies
991  // that all the pointers in the group don't wrap.
992  // So we check only group member 0 (which is always guaranteed to exist),
993  // and group member Factor - 1; If the latter doesn't exist we rely on
994  // peeling (if it is a non-reversed accsess -- see Case 3).
995  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
996  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
997  /*ShouldCheckWrap=*/true)) {
998  LLVM_DEBUG(
999  dbgs() << "LV: Invalidate candidate interleaved group due to "
1000  "first group member potentially pointer-wrapping.\n");
1001  releaseGroup(Group);
1002  continue;
1003  }
1004  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1005  if (LastMember) {
1006  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1007  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1008  /*ShouldCheckWrap=*/true)) {
1009  LLVM_DEBUG(
1010  dbgs() << "LV: Invalidate candidate interleaved group due to "
1011  "last group member potentially pointer-wrapping.\n");
1012  releaseGroup(Group);
1013  }
1014  } else {
1015  // Case 3: A non-reversed interleaved load group with gaps: We need
1016  // to execute at least one scalar epilogue iteration. This will ensure
1017  // we don't speculatively access memory out-of-bounds. We only need
1018  // to look for a member at index factor - 1, since every group must have
1019  // a member at index zero.
1020  if (Group->isReverse()) {
1021  LLVM_DEBUG(
1022  dbgs() << "LV: Invalidate candidate interleaved group due to "
1023  "a reverse access with gaps.\n");
1024  releaseGroup(Group);
1025  continue;
1026  }
1027  LLVM_DEBUG(
1028  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1029  RequiresScalarEpilogue = true;
1030  }
1031  }
1032 }
1033 
1035  // If no group had triggered the requirement to create an epilogue loop,
1036  // there is nothing to do.
1037  if (!requiresScalarEpilogue())
1038  return;
1039 
1040  // Avoid releasing a Group twice.
1042  for (auto &I : InterleaveGroupMap) {
1043  InterleaveGroup<Instruction> *Group = I.second;
1044  if (Group->requiresScalarEpilogue())
1045  DelSet.insert(Group);
1046  }
1047  for (auto *Ptr : DelSet) {
1048  LLVM_DEBUG(
1049  dbgs()
1050  << "LV: Invalidate candidate interleaved group due to gaps that "
1051  "require a scalar epilogue (not allowed under optsize) and cannot "
1052  "be masked (not enabled). \n");
1053  releaseGroup(Ptr);
1054  }
1055 
1056  RequiresScalarEpilogue = false;
1057 }
1058 
1059 template <typename InstT>
1060 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1061  llvm_unreachable("addMetadata can only be used for Instruction");
1062 }
1063 
1064 namespace llvm {
1065 template <>
1068  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1069  [](std::pair<int, Instruction *> p) { return p.second; });
1070  propagateMetadata(NewInst, VL);
1071 }
1072 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:257
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:710
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:330
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:375
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
const T & front() const
Return the first element of the SetVector.
Definition: SetVector.h:122
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
iterator begin() const
Definition: ArrayRef.h:136
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
reverse_iterator rend()
Definition: MapVector.h:76
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:922
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:863
An instruction for reading from memory.
Definition: Instructions.h:167
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1261
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:346
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
bool isReverse() const
Definition: VectorUtils.h:267
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1248
bool empty() const
Definition: MapVector.h:79
RPOIterator endRPO() const
Definition: LoopIterator.h:140
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:641
const APInt & getAPInt() const
InstTy * getMember(unsigned Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:309
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
op_range operands() const
Definition: Metadata.h:1066
unsigned getNumMembers() const
Definition: VectorUtils.h:270
LLVMContext & getContext() const
Definition: Metadata.h:923
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:26
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:909
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:834
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Value * getOperand(unsigned i) const
Definition: User.h:169
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Class to represent pointers.
Definition: DerivedTypes.h:498
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:334
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:281
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned getScalarSizeInBits(Type *Ty)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
unsigned getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:320
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static double log2(double V)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:639
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
size_t size() const
Definition: SmallVector.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1225
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1237
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:137
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:373
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:306
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2104
reverse_iterator rbegin()
Definition: MapVector.h:74
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:88
unsigned getFactor() const
Definition: VectorUtils.h:268
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
Class to represent vector types.
Definition: DerivedTypes.h:424
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:386
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:435
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:545
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
const NodeList & List
Definition: RDFGraph.cpp:209
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
Definition: Instructions.h:975
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
uint32_t Size
Definition: Profile.cpp:46
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1267
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
const unsigned Kind
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool insertMember(InstTy *Instr, int Index, unsigned NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:277
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
IRTranslator LLVM IR MI
This pass exposes codegen information to IR-level passes.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:929
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1078
Type * getElementType() const
Definition: DerivedTypes.h:517
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1226
std::vector< uint32_t > Metadata
PAL metadata represented as a vector.
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:737
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:341
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:535