LLVM  9.0.0svn
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
27 
28 #define DEBUG_TYPE "vectorutils"
29 
30 using namespace llvm;
31 using namespace llvm::PatternMatch;
32 
33 /// Maximum factor for an interleaved memory access.
35  "max-interleave-group-factor", cl::Hidden,
36  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
37  cl::init(8));
38 
39 /// Return true if all of the intrinsic's arguments and return type are scalars
40 /// for the scalar form of the intrinsic and vectors for the vector form of the
41 /// intrinsic.
43  switch (ID) {
44  case Intrinsic::bswap: // Begin integer bit-manipulation.
45  case Intrinsic::bitreverse:
46  case Intrinsic::ctpop:
47  case Intrinsic::ctlz:
48  case Intrinsic::cttz:
49  case Intrinsic::fshl:
50  case Intrinsic::fshr:
51  case Intrinsic::sadd_sat:
52  case Intrinsic::ssub_sat:
53  case Intrinsic::uadd_sat:
54  case Intrinsic::usub_sat:
55  case Intrinsic::smul_fix:
56  case Intrinsic::umul_fix:
57  case Intrinsic::sqrt: // Begin floating-point.
58  case Intrinsic::sin:
59  case Intrinsic::cos:
60  case Intrinsic::exp:
61  case Intrinsic::exp2:
62  case Intrinsic::log:
63  case Intrinsic::log10:
64  case Intrinsic::log2:
65  case Intrinsic::fabs:
66  case Intrinsic::minnum:
67  case Intrinsic::maxnum:
68  case Intrinsic::minimum:
69  case Intrinsic::maximum:
70  case Intrinsic::copysign:
71  case Intrinsic::floor:
72  case Intrinsic::ceil:
73  case Intrinsic::trunc:
74  case Intrinsic::rint:
75  case Intrinsic::nearbyint:
76  case Intrinsic::round:
77  case Intrinsic::pow:
78  case Intrinsic::fma:
79  case Intrinsic::fmuladd:
80  case Intrinsic::powi:
81  case Intrinsic::canonicalize:
82  return true;
83  default:
84  return false;
85  }
86 }
87 
88 /// Identifies if the intrinsic has a scalar operand. It check for
89 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
91  unsigned ScalarOpdIdx) {
92  switch (ID) {
93  case Intrinsic::ctlz:
94  case Intrinsic::cttz:
95  case Intrinsic::powi:
96  return (ScalarOpdIdx == 1);
97  case Intrinsic::smul_fix:
98  case Intrinsic::umul_fix:
99  return (ScalarOpdIdx == 2);
100  default:
101  return false;
102  }
103 }
104 
105 /// Returns intrinsic ID for call.
106 /// For the input call instruction it finds mapping intrinsic and returns
107 /// its ID, in case it does not found it return not_intrinsic.
109  const TargetLibraryInfo *TLI) {
111  if (ID == Intrinsic::not_intrinsic)
113 
114  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
115  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
116  ID == Intrinsic::sideeffect)
117  return ID;
119 }
120 
121 /// Find the operand of the GEP that should be checked for consecutive
122 /// stores. This ignores trailing indices that have no effect on the final
123 /// pointer.
125  const DataLayout &DL = Gep->getModule()->getDataLayout();
126  unsigned LastOperand = Gep->getNumOperands() - 1;
127  unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
128 
129  // Walk backwards and try to peel off zeros.
130  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
131  // Find the type we're currently indexing into.
132  gep_type_iterator GEPTI = gep_type_begin(Gep);
133  std::advance(GEPTI, LastOperand - 2);
134 
135  // If it's a type with the same allocation size as the result of the GEP we
136  // can peel off the zero index.
137  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
138  break;
139  --LastOperand;
140  }
141 
142  return LastOperand;
143 }
144 
145 /// If the argument is a GEP, then returns the operand identified by
146 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
147 /// operand, it returns that instead.
150  if (!GEP)
151  return Ptr;
152 
153  unsigned InductionOperand = getGEPInductionOperand(GEP);
154 
155  // Check that all of the gep indices are uniform except for our induction
156  // operand.
157  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
158  if (i != InductionOperand &&
159  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
160  return Ptr;
161  return GEP->getOperand(InductionOperand);
162 }
163 
164 /// If a value has only one user that is a CastInst, return it.
166  Value *UniqueCast = nullptr;
167  for (User *U : Ptr->users()) {
168  CastInst *CI = dyn_cast<CastInst>(U);
169  if (CI && CI->getType() == Ty) {
170  if (!UniqueCast)
171  UniqueCast = CI;
172  else
173  return nullptr;
174  }
175  }
176  return UniqueCast;
177 }
178 
179 /// Get the stride of a pointer access in a loop. Looks for symbolic
180 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
182  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
183  if (!PtrTy || PtrTy->isAggregateType())
184  return nullptr;
185 
186  // Try to remove a gep instruction to make the pointer (actually index at this
187  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
188  // pointer, otherwise, we are analyzing the index.
189  Value *OrigPtr = Ptr;
190 
191  // The size of the pointer access.
192  int64_t PtrAccessSize = 1;
193 
194  Ptr = stripGetElementPtr(Ptr, SE, Lp);
195  const SCEV *V = SE->getSCEV(Ptr);
196 
197  if (Ptr != OrigPtr)
198  // Strip off casts.
199  while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
200  V = C->getOperand();
201 
202  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
203  if (!S)
204  return nullptr;
205 
206  V = S->getStepRecurrence(*SE);
207  if (!V)
208  return nullptr;
209 
210  // Strip off the size of access multiplication if we are still analyzing the
211  // pointer.
212  if (OrigPtr == Ptr) {
213  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
214  if (M->getOperand(0)->getSCEVType() != scConstant)
215  return nullptr;
216 
217  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
218 
219  // Huge step value - give up.
220  if (APStepVal.getBitWidth() > 64)
221  return nullptr;
222 
223  int64_t StepVal = APStepVal.getSExtValue();
224  if (PtrAccessSize != StepVal)
225  return nullptr;
226  V = M->getOperand(1);
227  }
228  }
229 
230  // Strip off casts.
231  Type *StripedOffRecurrenceCast = nullptr;
232  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
233  StripedOffRecurrenceCast = C->getType();
234  V = C->getOperand();
235  }
236 
237  // Look for the loop invariant symbolic value.
238  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
239  if (!U)
240  return nullptr;
241 
242  Value *Stride = U->getValue();
243  if (!Lp->isLoopInvariant(Stride))
244  return nullptr;
245 
246  // If we have stripped off the recurrence cast we have to make sure that we
247  // return the value that is used in this loop so that we can replace it later.
248  if (StripedOffRecurrenceCast)
249  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
250 
251  return Stride;
252 }
253 
254 /// Given a vector and an element number, see if the scalar value is
255 /// already around as a register, for example if it were inserted then extracted
256 /// from the vector.
257 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
258  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
259  VectorType *VTy = cast<VectorType>(V->getType());
260  unsigned Width = VTy->getNumElements();
261  if (EltNo >= Width) // Out of range access.
262  return UndefValue::get(VTy->getElementType());
263 
264  if (Constant *C = dyn_cast<Constant>(V))
265  return C->getAggregateElement(EltNo);
266 
267  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
268  // If this is an insert to a variable element, we don't know what it is.
269  if (!isa<ConstantInt>(III->getOperand(2)))
270  return nullptr;
271  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
272 
273  // If this is an insert to the element we are looking for, return the
274  // inserted value.
275  if (EltNo == IIElt)
276  return III->getOperand(1);
277 
278  // Otherwise, the insertelement doesn't modify the value, recurse on its
279  // vector input.
280  return findScalarElement(III->getOperand(0), EltNo);
281  }
282 
283  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
284  unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
285  int InEl = SVI->getMaskValue(EltNo);
286  if (InEl < 0)
287  return UndefValue::get(VTy->getElementType());
288  if (InEl < (int)LHSWidth)
289  return findScalarElement(SVI->getOperand(0), InEl);
290  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
291  }
292 
293  // Extract a value from a vector add operation with a constant zero.
294  // TODO: Use getBinOpIdentity() to generalize this.
295  Value *Val; Constant *C;
296  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
297  if (Constant *Elt = C->getAggregateElement(EltNo))
298  if (Elt->isNullValue())
299  return findScalarElement(Val, EltNo);
300 
301  // Otherwise, we don't know.
302  return nullptr;
303 }
304 
305 /// Get splat value if the input is a splat vector or return nullptr.
306 /// This function is not fully general. It checks only 2 cases:
307 /// the input value is (1) a splat constant vector or (2) a sequence
308 /// of instructions that broadcasts a scalar at element 0.
310  if (isa<VectorType>(V->getType()))
311  if (auto *C = dyn_cast<Constant>(V))
312  return C->getSplatValue();
313 
314  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
315  Value *Splat;
317  m_ZeroInt()),
318  m_Value(), m_ZeroInt())))
319  return Splat;
320 
321  return nullptr;
322 }
323 
324 // This setting is based on its counterpart in value tracking, but it could be
325 // adjusted if needed.
326 const unsigned MaxDepth = 6;
327 
328 bool llvm::isSplatValue(const Value *V, unsigned Depth) {
329  assert(Depth <= MaxDepth && "Limit Search Depth");
330 
331  if (isa<VectorType>(V->getType())) {
332  if (isa<UndefValue>(V))
333  return true;
334  // FIXME: Constant splat analysis does not allow undef elements.
335  if (auto *C = dyn_cast<Constant>(V))
336  return C->getSplatValue() != nullptr;
337  }
338 
339  // FIXME: Constant splat analysis does not allow undef elements.
340  Constant *Mask;
341  if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask))))
342  return Mask->getSplatValue() != nullptr;
343 
344  // The remaining tests are all recursive, so bail out if we hit the limit.
345  if (Depth++ == MaxDepth)
346  return false;
347 
348  // If both operands of a binop are splats, the result is a splat.
349  Value *X, *Y, *Z;
350  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
351  return isSplatValue(X, Depth) && isSplatValue(Y, Depth);
352 
353  // If all operands of a select are splats, the result is a splat.
354  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
355  return isSplatValue(X, Depth) && isSplatValue(Y, Depth) &&
356  isSplatValue(Z, Depth);
357 
358  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
359 
360  return false;
361 }
362 
365  const TargetTransformInfo *TTI) {
366 
367  // DemandedBits will give us every value's live-out bits. But we want
368  // to ensure no extra casts would need to be inserted, so every DAG
369  // of connected values must have the same minimum bitwidth.
371  SmallVector<Value *, 16> Worklist;
373  SmallPtrSet<Value *, 16> Visited;
375  SmallPtrSet<Instruction *, 4> InstructionSet;
377 
378  // Determine the roots. We work bottom-up, from truncs or icmps.
379  bool SeenExtFromIllegalType = false;
380  for (auto *BB : Blocks)
381  for (auto &I : *BB) {
382  InstructionSet.insert(&I);
383 
384  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
385  !TTI->isTypeLegal(I.getOperand(0)->getType()))
386  SeenExtFromIllegalType = true;
387 
388  // Only deal with non-vector integers up to 64-bits wide.
389  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
390  !I.getType()->isVectorTy() &&
391  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
392  // Don't make work for ourselves. If we know the loaded type is legal,
393  // don't add it to the worklist.
394  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
395  continue;
396 
397  Worklist.push_back(&I);
398  Roots.insert(&I);
399  }
400  }
401  // Early exit.
402  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
403  return MinBWs;
404 
405  // Now proceed breadth-first, unioning values together.
406  while (!Worklist.empty()) {
407  Value *Val = Worklist.pop_back_val();
408  Value *Leader = ECs.getOrInsertLeaderValue(Val);
409 
410  if (Visited.count(Val))
411  continue;
412  Visited.insert(Val);
413 
414  // Non-instructions terminate a chain successfully.
415  if (!isa<Instruction>(Val))
416  continue;
417  Instruction *I = cast<Instruction>(Val);
418 
419  // If we encounter a type that is larger than 64 bits, we can't represent
420  // it so bail out.
421  if (DB.getDemandedBits(I).getBitWidth() > 64)
423 
424  uint64_t V = DB.getDemandedBits(I).getZExtValue();
425  DBits[Leader] |= V;
426  DBits[I] = V;
427 
428  // Casts, loads and instructions outside of our range terminate a chain
429  // successfully.
430  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
431  !InstructionSet.count(I))
432  continue;
433 
434  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
435  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
436  // transform anything that relies on them.
437  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
438  !I->getType()->isIntegerTy()) {
439  DBits[Leader] |= ~0ULL;
440  continue;
441  }
442 
443  // We don't modify the types of PHIs. Reductions will already have been
444  // truncated if possible, and inductions' sizes will have been chosen by
445  // indvars.
446  if (isa<PHINode>(I))
447  continue;
448 
449  if (DBits[Leader] == ~0ULL)
450  // All bits demanded, no point continuing.
451  continue;
452 
453  for (Value *O : cast<User>(I)->operands()) {
454  ECs.unionSets(Leader, O);
455  Worklist.push_back(O);
456  }
457  }
458 
459  // Now we've discovered all values, walk them to see if there are
460  // any users we didn't see. If there are, we can't optimize that
461  // chain.
462  for (auto &I : DBits)
463  for (auto *U : I.first->users())
464  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
465  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
466 
467  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
468  uint64_t LeaderDemandedBits = 0;
469  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
470  LeaderDemandedBits |= DBits[*MI];
471 
472  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
473  llvm::countLeadingZeros(LeaderDemandedBits);
474  // Round up to a power of 2
475  if (!isPowerOf2_64((uint64_t)MinBW))
476  MinBW = NextPowerOf2(MinBW);
477 
478  // We don't modify the types of PHIs. Reductions will already have been
479  // truncated if possible, and inductions' sizes will have been chosen by
480  // indvars.
481  // If we are required to shrink a PHI, abandon this entire equivalence class.
482  bool Abort = false;
483  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
484  if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
485  Abort = true;
486  break;
487  }
488  if (Abort)
489  continue;
490 
491  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
492  if (!isa<Instruction>(*MI))
493  continue;
494  Type *Ty = (*MI)->getType();
495  if (Roots.count(*MI))
496  Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
497  if (MinBW < Ty->getScalarSizeInBits())
498  MinBWs[cast<Instruction>(*MI)] = MinBW;
499  }
500  }
501 
502  return MinBWs;
503 }
504 
505 /// Add all access groups in @p AccGroups to @p List.
506 template <typename ListT>
507 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
508  // Interpret an access group as a list containing itself.
509  if (AccGroups->getNumOperands() == 0) {
510  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
511  List.insert(AccGroups);
512  return;
513  }
514 
515  for (auto &AccGroupListOp : AccGroups->operands()) {
516  auto *Item = cast<MDNode>(AccGroupListOp.get());
517  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
518  List.insert(Item);
519  }
520 }
521 
522 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
523  if (!AccGroups1)
524  return AccGroups2;
525  if (!AccGroups2)
526  return AccGroups1;
527  if (AccGroups1 == AccGroups2)
528  return AccGroups1;
529 
531  addToAccessGroupList(Union, AccGroups1);
532  addToAccessGroupList(Union, AccGroups2);
533 
534  if (Union.size() == 0)
535  return nullptr;
536  if (Union.size() == 1)
537  return cast<MDNode>(Union.front());
538 
539  LLVMContext &Ctx = AccGroups1->getContext();
540  return MDNode::get(Ctx, Union.getArrayRef());
541 }
542 
544  const Instruction *Inst2) {
545  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
546  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
547 
548  if (!MayAccessMem1 && !MayAccessMem2)
549  return nullptr;
550  if (!MayAccessMem1)
552  if (!MayAccessMem2)
554 
557  if (!MD1 || !MD2)
558  return nullptr;
559  if (MD1 == MD2)
560  return MD1;
561 
562  // Use set for scalable 'contains' check.
563  SmallPtrSet<Metadata *, 4> AccGroupSet2;
564  addToAccessGroupList(AccGroupSet2, MD2);
565 
566  SmallVector<Metadata *, 4> Intersection;
567  if (MD1->getNumOperands() == 0) {
568  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
569  if (AccGroupSet2.count(MD1))
570  Intersection.push_back(MD1);
571  } else {
572  for (const MDOperand &Node : MD1->operands()) {
573  auto *Item = cast<MDNode>(Node.get());
574  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
575  if (AccGroupSet2.count(Item))
576  Intersection.push_back(Item);
577  }
578  }
579 
580  if (Intersection.size() == 0)
581  return nullptr;
582  if (Intersection.size() == 1)
583  return cast<MDNode>(Intersection.front());
584 
585  LLVMContext &Ctx = Inst1->getContext();
586  return MDNode::get(Ctx, Intersection);
587 }
588 
589 /// \returns \p I after propagating metadata from \p VL.
591  Instruction *I0 = cast<Instruction>(VL[0]);
593  I0->getAllMetadataOtherThanDebugLoc(Metadata);
594 
599  MDNode *MD = I0->getMetadata(Kind);
600 
601  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
602  const Instruction *IJ = cast<Instruction>(VL[J]);
603  MDNode *IMD = IJ->getMetadata(Kind);
604  switch (Kind) {
606  MD = MDNode::getMostGenericTBAA(MD, IMD);
607  break;
609  MD = MDNode::getMostGenericAliasScope(MD, IMD);
610  break;
612  MD = MDNode::getMostGenericFPMath(MD, IMD);
613  break;
617  MD = MDNode::intersect(MD, IMD);
618  break;
620  MD = intersectAccessGroups(Inst, IJ);
621  break;
622  default:
623  llvm_unreachable("unhandled metadata");
624  }
625  }
626 
627  Inst->setMetadata(Kind, MD);
628  }
629 
630  return Inst;
631 }
632 
633 Constant *
635  const InterleaveGroup<Instruction> &Group) {
636  // All 1's means mask is not needed.
637  if (Group.getNumMembers() == Group.getFactor())
638  return nullptr;
639 
640  // TODO: support reversed access.
641  assert(!Group.isReverse() && "Reversed group not supported.");
642 
644  for (unsigned i = 0; i < VF; i++)
645  for (unsigned j = 0; j < Group.getFactor(); ++j) {
646  unsigned HasMember = Group.getMember(j) ? 1 : 0;
647  Mask.push_back(Builder.getInt1(HasMember));
648  }
649 
650  return ConstantVector::get(Mask);
651 }
652 
654  unsigned ReplicationFactor, unsigned VF) {
656  for (unsigned i = 0; i < VF; i++)
657  for (unsigned j = 0; j < ReplicationFactor; j++)
658  MaskVec.push_back(Builder.getInt32(i));
659 
660  return ConstantVector::get(MaskVec);
661 }
662 
664  unsigned NumVecs) {
666  for (unsigned i = 0; i < VF; i++)
667  for (unsigned j = 0; j < NumVecs; j++)
668  Mask.push_back(Builder.getInt32(j * VF + i));
669 
670  return ConstantVector::get(Mask);
671 }
672 
673 Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
674  unsigned Stride, unsigned VF) {
676  for (unsigned i = 0; i < VF; i++)
677  Mask.push_back(Builder.getInt32(Start + i * Stride));
678 
679  return ConstantVector::get(Mask);
680 }
681 
683  unsigned NumInts, unsigned NumUndefs) {
685  for (unsigned i = 0; i < NumInts; i++)
686  Mask.push_back(Builder.getInt32(Start + i));
687 
689  for (unsigned i = 0; i < NumUndefs; i++)
690  Mask.push_back(Undef);
691 
692  return ConstantVector::get(Mask);
693 }
694 
695 /// A helper function for concatenating vectors. This function concatenates two
696 /// vectors having the same element type. If the second vector has fewer
697 /// elements than the first, it is padded with undefs.
699  Value *V2) {
700  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
701  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
702  assert(VecTy1 && VecTy2 &&
703  VecTy1->getScalarType() == VecTy2->getScalarType() &&
704  "Expect two vectors with the same element type");
705 
706  unsigned NumElts1 = VecTy1->getNumElements();
707  unsigned NumElts2 = VecTy2->getNumElements();
708  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
709 
710  if (NumElts1 > NumElts2) {
711  // Extend with UNDEFs.
712  Constant *ExtMask =
713  createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
714  V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
715  }
716 
717  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
718  return Builder.CreateShuffleVector(V1, V2, Mask);
719 }
720 
722  unsigned NumVecs = Vecs.size();
723  assert(NumVecs > 1 && "Should be at least two vectors");
724 
725  SmallVector<Value *, 8> ResList;
726  ResList.append(Vecs.begin(), Vecs.end());
727  do {
728  SmallVector<Value *, 8> TmpList;
729  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
730  Value *V0 = ResList[i], *V1 = ResList[i + 1];
731  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
732  "Only the last vector may have a different type");
733 
734  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
735  }
736 
737  // Push the last vector if the total number of vectors is odd.
738  if (NumVecs % 2 != 0)
739  TmpList.push_back(ResList[NumVecs - 1]);
740 
741  ResList = TmpList;
742  NumVecs = ResList.size();
743  } while (NumVecs > 1);
744 
745  return ResList[0];
746 }
747 
749  auto *ConstMask = dyn_cast<Constant>(Mask);
750  if (!ConstMask)
751  return false;
752  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
753  return true;
754  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
755  ++I) {
756  if (auto *MaskElt = ConstMask->getAggregateElement(I))
757  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
758  continue;
759  return false;
760  }
761  return true;
762 }
763 
764 
766  auto *ConstMask = dyn_cast<Constant>(Mask);
767  if (!ConstMask)
768  return false;
769  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
770  return true;
771  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
772  ++I) {
773  if (auto *MaskElt = ConstMask->getAggregateElement(I))
774  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
775  continue;
776  return false;
777  }
778  return true;
779 }
780 
781 /// TODO: This is a lot like known bits, but for
782 /// vectors. Is there something we can common this with?
784 
785  const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
786  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
787  if (auto *CV = dyn_cast<ConstantVector>(Mask))
788  for (unsigned i = 0; i < VWidth; i++)
789  if (CV->getAggregateElement(i)->isNullValue())
790  DemandedElts.clearBit(i);
791  return DemandedElts;
792 }
793 
794 bool InterleavedAccessInfo::isStrided(int Stride) {
795  unsigned Factor = std::abs(Stride);
796  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
797 }
798 
799 void InterleavedAccessInfo::collectConstStrideAccesses(
801  const ValueToValueMap &Strides) {
802  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
803 
804  // Since it's desired that the load/store instructions be maintained in
805  // "program order" for the interleaved access analysis, we have to visit the
806  // blocks in the loop in reverse postorder (i.e., in a topological order).
807  // Such an ordering will ensure that any load/store that may be executed
808  // before a second load/store will precede the second load/store in
809  // AccessStrideInfo.
810  LoopBlocksDFS DFS(TheLoop);
811  DFS.perform(LI);
812  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
813  for (auto &I : *BB) {
814  auto *LI = dyn_cast<LoadInst>(&I);
815  auto *SI = dyn_cast<StoreInst>(&I);
816  if (!LI && !SI)
817  continue;
818 
820  // We don't check wrapping here because we don't know yet if Ptr will be
821  // part of a full group or a group with gaps. Checking wrapping for all
822  // pointers (even those that end up in groups with no gaps) will be overly
823  // conservative. For full groups, wrapping should be ok since if we would
824  // wrap around the address space we would do a memory access at nullptr
825  // even without the transformation. The wrapping checks are therefore
826  // deferred until after we've formed the interleaved groups.
827  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
828  /*Assume=*/true, /*ShouldCheckWrap=*/false);
829 
830  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
831  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
832  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
833 
834  // An alignment of 0 means target ABI alignment.
835  unsigned Align = getLoadStoreAlignment(&I);
836  if (!Align)
837  Align = DL.getABITypeAlignment(PtrTy->getElementType());
838 
839  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
840  }
841 }
842 
843 // Analyze interleaved accesses and collect them into interleaved load and
844 // store groups.
845 //
846 // When generating code for an interleaved load group, we effectively hoist all
847 // loads in the group to the location of the first load in program order. When
848 // generating code for an interleaved store group, we sink all stores to the
849 // location of the last store. This code motion can change the order of load
850 // and store instructions and may break dependences.
851 //
852 // The code generation strategy mentioned above ensures that we won't violate
853 // any write-after-read (WAR) dependences.
854 //
855 // E.g., for the WAR dependence: a = A[i]; // (1)
856 // A[i] = b; // (2)
857 //
858 // The store group of (2) is always inserted at or below (2), and the load
859 // group of (1) is always inserted at or above (1). Thus, the instructions will
860 // never be reordered. All other dependences are checked to ensure the
861 // correctness of the instruction reordering.
862 //
863 // The algorithm visits all memory accesses in the loop in bottom-up program
864 // order. Program order is established by traversing the blocks in the loop in
865 // reverse postorder when collecting the accesses.
866 //
867 // We visit the memory accesses in bottom-up order because it can simplify the
868 // construction of store groups in the presence of write-after-write (WAW)
869 // dependences.
870 //
871 // E.g., for the WAW dependence: A[i] = a; // (1)
872 // A[i] = b; // (2)
873 // A[i + 1] = c; // (3)
874 //
875 // We will first create a store group with (3) and (2). (1) can't be added to
876 // this group because it and (2) are dependent. However, (1) can be grouped
877 // with other accesses that may precede it in program order. Note that a
878 // bottom-up order does not imply that WAW dependences should not be checked.
880  bool EnablePredicatedInterleavedMemAccesses) {
881  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
882  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
883 
884  // Holds all accesses with a constant stride.
886  collectConstStrideAccesses(AccessStrideInfo, Strides);
887 
888  if (AccessStrideInfo.empty())
889  return;
890 
891  // Collect the dependences in the loop.
892  collectDependences();
893 
894  // Holds all interleaved store groups temporarily.
896  // Holds all interleaved load groups temporarily.
898 
899  // Search in bottom-up program order for pairs of accesses (A and B) that can
900  // form interleaved load or store groups. In the algorithm below, access A
901  // precedes access B in program order. We initialize a group for B in the
902  // outer loop of the algorithm, and then in the inner loop, we attempt to
903  // insert each A into B's group if:
904  //
905  // 1. A and B have the same stride,
906  // 2. A and B have the same memory object size, and
907  // 3. A belongs in B's group according to its distance from B.
908  //
909  // Special care is taken to ensure group formation will not break any
910  // dependences.
911  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
912  BI != E; ++BI) {
913  Instruction *B = BI->first;
914  StrideDescriptor DesB = BI->second;
915 
916  // Initialize a group for B if it has an allowable stride. Even if we don't
917  // create a group for B, we continue with the bottom-up algorithm to ensure
918  // we don't break any of B's dependences.
919  InterleaveGroup<Instruction> *Group = nullptr;
920  if (isStrided(DesB.Stride) &&
921  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
922  Group = getInterleaveGroup(B);
923  if (!Group) {
924  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
925  << '\n');
926  Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
927  }
928  if (B->mayWriteToMemory())
929  StoreGroups.insert(Group);
930  else
931  LoadGroups.insert(Group);
932  }
933 
934  for (auto AI = std::next(BI); AI != E; ++AI) {
935  Instruction *A = AI->first;
936  StrideDescriptor DesA = AI->second;
937 
938  // Our code motion strategy implies that we can't have dependences
939  // between accesses in an interleaved group and other accesses located
940  // between the first and last member of the group. Note that this also
941  // means that a group can't have more than one member at a given offset.
942  // The accesses in a group can have dependences with other accesses, but
943  // we must ensure we don't extend the boundaries of the group such that
944  // we encompass those dependent accesses.
945  //
946  // For example, assume we have the sequence of accesses shown below in a
947  // stride-2 loop:
948  //
949  // (1, 2) is a group | A[i] = a; // (1)
950  // | A[i-1] = b; // (2) |
951  // A[i-3] = c; // (3)
952  // A[i] = d; // (4) | (2, 4) is not a group
953  //
954  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
955  // but not with (4). If we did, the dependent access (3) would be within
956  // the boundaries of the (2, 4) group.
957  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
958  // If a dependence exists and A is already in a group, we know that A
959  // must be a store since A precedes B and WAR dependences are allowed.
960  // Thus, A would be sunk below B. We release A's group to prevent this
961  // illegal code motion. A will then be free to form another group with
962  // instructions that precede it.
963  if (isInterleaved(A)) {
964  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
965  StoreGroups.remove(StoreGroup);
966  releaseGroup(StoreGroup);
967  }
968 
969  // If a dependence exists and A is not already in a group (or it was
970  // and we just released it), B might be hoisted above A (if B is a
971  // load) or another store might be sunk below A (if B is a store). In
972  // either case, we can't add additional instructions to B's group. B
973  // will only form a group with instructions that it precedes.
974  break;
975  }
976 
977  // At this point, we've checked for illegal code motion. If either A or B
978  // isn't strided, there's nothing left to do.
979  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
980  continue;
981 
982  // Ignore A if it's already in a group or isn't the same kind of memory
983  // operation as B.
984  // Note that mayReadFromMemory() isn't mutually exclusive to
985  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
986  // here, canVectorizeMemory() should have returned false - except for the
987  // case we asked for optimization remarks.
988  if (isInterleaved(A) ||
989  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
990  (A->mayWriteToMemory() != B->mayWriteToMemory()))
991  continue;
992 
993  // Check rules 1 and 2. Ignore A if its stride or size is different from
994  // that of B.
995  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
996  continue;
997 
998  // Ignore A if the memory object of A and B don't belong to the same
999  // address space
1001  continue;
1002 
1003  // Calculate the distance from A to B.
1004  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1005  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1006  if (!DistToB)
1007  continue;
1008  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1009 
1010  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1011  // size.
1012  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1013  continue;
1014 
1015  // All members of a predicated interleave-group must have the same predicate,
1016  // and currently must reside in the same BB.
1017  BasicBlock *BlockA = A->getParent();
1018  BasicBlock *BlockB = B->getParent();
1019  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1020  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1021  continue;
1022 
1023  // The index of A is the index of B plus A's distance to B in multiples
1024  // of the size.
1025  int IndexA =
1026  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1027 
1028  // Try to insert A into B's group.
1029  if (Group->insertMember(A, IndexA, DesA.Align)) {
1030  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1031  << " into the interleave group with" << *B
1032  << '\n');
1033  InterleaveGroupMap[A] = Group;
1034 
1035  // Set the first load in program order as the insert position.
1036  if (A->mayReadFromMemory())
1037  Group->setInsertPos(A);
1038  }
1039  } // Iteration over A accesses.
1040  } // Iteration over B accesses.
1041 
1042  // Remove interleaved store groups with gaps.
1043  for (auto *Group : StoreGroups)
1044  if (Group->getNumMembers() != Group->getFactor()) {
1045  LLVM_DEBUG(
1046  dbgs() << "LV: Invalidate candidate interleaved store group due "
1047  "to gaps.\n");
1048  releaseGroup(Group);
1049  }
1050  // Remove interleaved groups with gaps (currently only loads) whose memory
1051  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1052  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1053  // not check wrapping (see documentation there).
1054  // FORNOW we use Assume=false;
1055  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1056  // of runtime SCEV assumptions checks (thereby potentially failing to
1057  // vectorize altogether).
1058  // Additional optional optimizations:
1059  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1060  // wrap then we can deduce that all pointers in the group don't wrap.
1061  // This means that we can forcefully peel the loop in order to only have to
1062  // check the first pointer for no-wrap. When we'll change to use Assume=true
1063  // we'll only need at most one runtime check per interleaved group.
1064  for (auto *Group : LoadGroups) {
1065  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1066  // load would wrap around the address space we would do a memory access at
1067  // nullptr even without the transformation.
1068  if (Group->getNumMembers() == Group->getFactor())
1069  continue;
1070 
1071  // Case 2: If first and last members of the group don't wrap this implies
1072  // that all the pointers in the group don't wrap.
1073  // So we check only group member 0 (which is always guaranteed to exist),
1074  // and group member Factor - 1; If the latter doesn't exist we rely on
1075  // peeling (if it is a non-reversed accsess -- see Case 3).
1076  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
1077  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
1078  /*ShouldCheckWrap=*/true)) {
1079  LLVM_DEBUG(
1080  dbgs() << "LV: Invalidate candidate interleaved group due to "
1081  "first group member potentially pointer-wrapping.\n");
1082  releaseGroup(Group);
1083  continue;
1084  }
1085  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1086  if (LastMember) {
1087  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1088  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1089  /*ShouldCheckWrap=*/true)) {
1090  LLVM_DEBUG(
1091  dbgs() << "LV: Invalidate candidate interleaved group due to "
1092  "last group member potentially pointer-wrapping.\n");
1093  releaseGroup(Group);
1094  }
1095  } else {
1096  // Case 3: A non-reversed interleaved load group with gaps: We need
1097  // to execute at least one scalar epilogue iteration. This will ensure
1098  // we don't speculatively access memory out-of-bounds. We only need
1099  // to look for a member at index factor - 1, since every group must have
1100  // a member at index zero.
1101  if (Group->isReverse()) {
1102  LLVM_DEBUG(
1103  dbgs() << "LV: Invalidate candidate interleaved group due to "
1104  "a reverse access with gaps.\n");
1105  releaseGroup(Group);
1106  continue;
1107  }
1108  LLVM_DEBUG(
1109  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1110  RequiresScalarEpilogue = true;
1111  }
1112  }
1113 }
1114 
1116  // If no group had triggered the requirement to create an epilogue loop,
1117  // there is nothing to do.
1118  if (!requiresScalarEpilogue())
1119  return;
1120 
1121  // Avoid releasing a Group twice.
1123  for (auto &I : InterleaveGroupMap) {
1124  InterleaveGroup<Instruction> *Group = I.second;
1125  if (Group->requiresScalarEpilogue())
1126  DelSet.insert(Group);
1127  }
1128  for (auto *Ptr : DelSet) {
1129  LLVM_DEBUG(
1130  dbgs()
1131  << "LV: Invalidate candidate interleaved group due to gaps that "
1132  "require a scalar epilogue (not allowed under optsize) and cannot "
1133  "be masked (not enabled). \n");
1134  releaseGroup(Ptr);
1135  }
1136 
1137  RequiresScalarEpilogue = false;
1138 }
1139 
1140 template <typename InstT>
1141 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1142  llvm_unreachable("addMetadata can only be used for Instruction");
1143 }
1144 
1145 namespace llvm {
1146 template <>
1149  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1150  [](std::pair<int, Instruction *> p) { return p.second; });
1151  propagateMetadata(NewInst, VL);
1152 }
1153 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:257
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:710
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:361
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:375
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
const T & front() const
Return the first element of the SetVector.
Definition: SetVector.h:122
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
iterator begin() const
Definition: ArrayRef.h:136
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
reverse_iterator rend()
Definition: MapVector.h:76
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:922
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:720
const unsigned MaxDepth
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:863
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
An instruction for reading from memory.
Definition: Instructions.h:167
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1272
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:363
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:346
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
bool isReverse() const
Definition: VectorUtils.h:288
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1259
bool empty() const
Definition: MapVector.h:79
RPOIterator endRPO() const
Definition: LoopIterator.h:140
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:641
const APInt & getAPInt() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
op_range operands() const
Definition: Metadata.h:1066
LLVMContext & getContext() const
Definition: Metadata.h:923
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1461
bool isSplatValue(const Value *V, unsigned Depth=0)
Return true if the input value is known to be a vector with all identical elements (potentially inclu...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:27
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:909
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1061
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Value * getOperand(unsigned i) const
Definition: User.h:169
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:340
Class to represent pointers.
Definition: DerivedTypes.h:544
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:281
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned getScalarSizeInBits(Type *Ty)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:351
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
static double log2(double V)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:639
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
size_t size() const
Definition: SmallVector.h:52
Constant * getSplatValue() const
If this is a splat vector constant, meaning that all of the elements have the same value...
Definition: Constants.cpp:1377
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1248
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:298
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:137
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:306
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2148
reverse_iterator rbegin()
Definition: MapVector.h:74
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:90
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
Class to represent vector types.
Definition: DerivedTypes.h:427
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
const NodeList & List
Definition: RDFGraph.cpp:201
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1223
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
Definition: Instructions.h:975
uint32_t getFactor() const
Definition: VectorUtils.h:289
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1274
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
IRTranslator LLVM IR MI
This pass exposes codegen information to IR-level passes.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:929
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Root of the metadata hierarchy.
Definition: Metadata.h:57
uint32_t getNumMembers() const
Definition: VectorUtils.h:291
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1088
Type * getElementType() const
Definition: DerivedTypes.h:563
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1237
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:964
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:372
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:532