LLVM  9.0.0svn
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
27 
28 #define DEBUG_TYPE "vectorutils"
29 
30 using namespace llvm;
31 using namespace llvm::PatternMatch;
32 
33 /// Maximum factor for an interleaved memory access.
35  "max-interleave-group-factor", cl::Hidden,
36  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
37  cl::init(8));
38 
39 /// Return true if all of the intrinsic's arguments and return type are scalars
40 /// for the scalar form of the intrinsic, and vectors for the vector form of the
41 /// intrinsic (except operands that are marked as always being scalar by
42 /// hasVectorInstrinsicScalarOpd).
44  switch (ID) {
45  case Intrinsic::bswap: // Begin integer bit-manipulation.
46  case Intrinsic::bitreverse:
47  case Intrinsic::ctpop:
48  case Intrinsic::ctlz:
49  case Intrinsic::cttz:
50  case Intrinsic::fshl:
51  case Intrinsic::fshr:
52  case Intrinsic::sadd_sat:
53  case Intrinsic::ssub_sat:
54  case Intrinsic::uadd_sat:
55  case Intrinsic::usub_sat:
56  case Intrinsic::smul_fix:
57  case Intrinsic::smul_fix_sat:
58  case Intrinsic::umul_fix:
59  case Intrinsic::sqrt: // Begin floating-point.
60  case Intrinsic::sin:
61  case Intrinsic::cos:
62  case Intrinsic::exp:
63  case Intrinsic::exp2:
64  case Intrinsic::log:
65  case Intrinsic::log10:
66  case Intrinsic::log2:
67  case Intrinsic::fabs:
68  case Intrinsic::minnum:
69  case Intrinsic::maxnum:
70  case Intrinsic::minimum:
71  case Intrinsic::maximum:
72  case Intrinsic::copysign:
73  case Intrinsic::floor:
74  case Intrinsic::ceil:
75  case Intrinsic::trunc:
76  case Intrinsic::rint:
77  case Intrinsic::nearbyint:
78  case Intrinsic::round:
79  case Intrinsic::pow:
80  case Intrinsic::fma:
81  case Intrinsic::fmuladd:
82  case Intrinsic::powi:
83  case Intrinsic::canonicalize:
84  return true;
85  default:
86  return false;
87  }
88 }
89 
90 /// Identifies if the vector form of the intrinsic has a scalar operand.
92  unsigned ScalarOpdIdx) {
93  switch (ID) {
94  case Intrinsic::ctlz:
95  case Intrinsic::cttz:
96  case Intrinsic::powi:
97  return (ScalarOpdIdx == 1);
98  case Intrinsic::smul_fix:
99  case Intrinsic::smul_fix_sat:
100  case Intrinsic::umul_fix:
101  return (ScalarOpdIdx == 2);
102  default:
103  return false;
104  }
105 }
106 
107 /// Returns intrinsic ID for call.
108 /// For the input call instruction it finds mapping intrinsic and returns
109 /// its ID, in case it does not found it return not_intrinsic.
111  const TargetLibraryInfo *TLI) {
113  if (ID == Intrinsic::not_intrinsic)
115 
116  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
117  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
118  ID == Intrinsic::sideeffect)
119  return ID;
121 }
122 
123 /// Find the operand of the GEP that should be checked for consecutive
124 /// stores. This ignores trailing indices that have no effect on the final
125 /// pointer.
127  const DataLayout &DL = Gep->getModule()->getDataLayout();
128  unsigned LastOperand = Gep->getNumOperands() - 1;
129  unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
130 
131  // Walk backwards and try to peel off zeros.
132  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
133  // Find the type we're currently indexing into.
134  gep_type_iterator GEPTI = gep_type_begin(Gep);
135  std::advance(GEPTI, LastOperand - 2);
136 
137  // If it's a type with the same allocation size as the result of the GEP we
138  // can peel off the zero index.
139  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
140  break;
141  --LastOperand;
142  }
143 
144  return LastOperand;
145 }
146 
147 /// If the argument is a GEP, then returns the operand identified by
148 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
149 /// operand, it returns that instead.
152  if (!GEP)
153  return Ptr;
154 
155  unsigned InductionOperand = getGEPInductionOperand(GEP);
156 
157  // Check that all of the gep indices are uniform except for our induction
158  // operand.
159  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
160  if (i != InductionOperand &&
161  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
162  return Ptr;
163  return GEP->getOperand(InductionOperand);
164 }
165 
166 /// If a value has only one user that is a CastInst, return it.
168  Value *UniqueCast = nullptr;
169  for (User *U : Ptr->users()) {
170  CastInst *CI = dyn_cast<CastInst>(U);
171  if (CI && CI->getType() == Ty) {
172  if (!UniqueCast)
173  UniqueCast = CI;
174  else
175  return nullptr;
176  }
177  }
178  return UniqueCast;
179 }
180 
181 /// Get the stride of a pointer access in a loop. Looks for symbolic
182 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
184  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
185  if (!PtrTy || PtrTy->isAggregateType())
186  return nullptr;
187 
188  // Try to remove a gep instruction to make the pointer (actually index at this
189  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
190  // pointer, otherwise, we are analyzing the index.
191  Value *OrigPtr = Ptr;
192 
193  // The size of the pointer access.
194  int64_t PtrAccessSize = 1;
195 
196  Ptr = stripGetElementPtr(Ptr, SE, Lp);
197  const SCEV *V = SE->getSCEV(Ptr);
198 
199  if (Ptr != OrigPtr)
200  // Strip off casts.
201  while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
202  V = C->getOperand();
203 
204  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
205  if (!S)
206  return nullptr;
207 
208  V = S->getStepRecurrence(*SE);
209  if (!V)
210  return nullptr;
211 
212  // Strip off the size of access multiplication if we are still analyzing the
213  // pointer.
214  if (OrigPtr == Ptr) {
215  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
216  if (M->getOperand(0)->getSCEVType() != scConstant)
217  return nullptr;
218 
219  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
220 
221  // Huge step value - give up.
222  if (APStepVal.getBitWidth() > 64)
223  return nullptr;
224 
225  int64_t StepVal = APStepVal.getSExtValue();
226  if (PtrAccessSize != StepVal)
227  return nullptr;
228  V = M->getOperand(1);
229  }
230  }
231 
232  // Strip off casts.
233  Type *StripedOffRecurrenceCast = nullptr;
234  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
235  StripedOffRecurrenceCast = C->getType();
236  V = C->getOperand();
237  }
238 
239  // Look for the loop invariant symbolic value.
240  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
241  if (!U)
242  return nullptr;
243 
244  Value *Stride = U->getValue();
245  if (!Lp->isLoopInvariant(Stride))
246  return nullptr;
247 
248  // If we have stripped off the recurrence cast we have to make sure that we
249  // return the value that is used in this loop so that we can replace it later.
250  if (StripedOffRecurrenceCast)
251  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
252 
253  return Stride;
254 }
255 
256 /// Given a vector and an element number, see if the scalar value is
257 /// already around as a register, for example if it were inserted then extracted
258 /// from the vector.
259 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
260  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
261  VectorType *VTy = cast<VectorType>(V->getType());
262  unsigned Width = VTy->getNumElements();
263  if (EltNo >= Width) // Out of range access.
264  return UndefValue::get(VTy->getElementType());
265 
266  if (Constant *C = dyn_cast<Constant>(V))
267  return C->getAggregateElement(EltNo);
268 
269  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
270  // If this is an insert to a variable element, we don't know what it is.
271  if (!isa<ConstantInt>(III->getOperand(2)))
272  return nullptr;
273  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
274 
275  // If this is an insert to the element we are looking for, return the
276  // inserted value.
277  if (EltNo == IIElt)
278  return III->getOperand(1);
279 
280  // Otherwise, the insertelement doesn't modify the value, recurse on its
281  // vector input.
282  return findScalarElement(III->getOperand(0), EltNo);
283  }
284 
285  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
286  unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
287  int InEl = SVI->getMaskValue(EltNo);
288  if (InEl < 0)
289  return UndefValue::get(VTy->getElementType());
290  if (InEl < (int)LHSWidth)
291  return findScalarElement(SVI->getOperand(0), InEl);
292  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
293  }
294 
295  // Extract a value from a vector add operation with a constant zero.
296  // TODO: Use getBinOpIdentity() to generalize this.
297  Value *Val; Constant *C;
298  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
299  if (Constant *Elt = C->getAggregateElement(EltNo))
300  if (Elt->isNullValue())
301  return findScalarElement(Val, EltNo);
302 
303  // Otherwise, we don't know.
304  return nullptr;
305 }
306 
307 /// Get splat value if the input is a splat vector or return nullptr.
308 /// This function is not fully general. It checks only 2 cases:
309 /// the input value is (1) a splat constant vector or (2) a sequence
310 /// of instructions that broadcasts a scalar at element 0.
312  if (isa<VectorType>(V->getType()))
313  if (auto *C = dyn_cast<Constant>(V))
314  return C->getSplatValue();
315 
316  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
317  Value *Splat;
319  m_ZeroInt()),
320  m_Value(), m_ZeroInt())))
321  return Splat;
322 
323  return nullptr;
324 }
325 
326 // This setting is based on its counterpart in value tracking, but it could be
327 // adjusted if needed.
328 const unsigned MaxDepth = 6;
329 
330 bool llvm::isSplatValue(const Value *V, unsigned Depth) {
331  assert(Depth <= MaxDepth && "Limit Search Depth");
332 
333  if (isa<VectorType>(V->getType())) {
334  if (isa<UndefValue>(V))
335  return true;
336  // FIXME: Constant splat analysis does not allow undef elements.
337  if (auto *C = dyn_cast<Constant>(V))
338  return C->getSplatValue() != nullptr;
339  }
340 
341  // FIXME: Constant splat analysis does not allow undef elements.
342  Constant *Mask;
343  if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask))))
344  return Mask->getSplatValue() != nullptr;
345 
346  // The remaining tests are all recursive, so bail out if we hit the limit.
347  if (Depth++ == MaxDepth)
348  return false;
349 
350  // If both operands of a binop are splats, the result is a splat.
351  Value *X, *Y, *Z;
352  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
353  return isSplatValue(X, Depth) && isSplatValue(Y, Depth);
354 
355  // If all operands of a select are splats, the result is a splat.
356  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
357  return isSplatValue(X, Depth) && isSplatValue(Y, Depth) &&
358  isSplatValue(Z, Depth);
359 
360  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
361 
362  return false;
363 }
364 
367  const TargetTransformInfo *TTI) {
368 
369  // DemandedBits will give us every value's live-out bits. But we want
370  // to ensure no extra casts would need to be inserted, so every DAG
371  // of connected values must have the same minimum bitwidth.
373  SmallVector<Value *, 16> Worklist;
375  SmallPtrSet<Value *, 16> Visited;
377  SmallPtrSet<Instruction *, 4> InstructionSet;
379 
380  // Determine the roots. We work bottom-up, from truncs or icmps.
381  bool SeenExtFromIllegalType = false;
382  for (auto *BB : Blocks)
383  for (auto &I : *BB) {
384  InstructionSet.insert(&I);
385 
386  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
387  !TTI->isTypeLegal(I.getOperand(0)->getType()))
388  SeenExtFromIllegalType = true;
389 
390  // Only deal with non-vector integers up to 64-bits wide.
391  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
392  !I.getType()->isVectorTy() &&
393  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
394  // Don't make work for ourselves. If we know the loaded type is legal,
395  // don't add it to the worklist.
396  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
397  continue;
398 
399  Worklist.push_back(&I);
400  Roots.insert(&I);
401  }
402  }
403  // Early exit.
404  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
405  return MinBWs;
406 
407  // Now proceed breadth-first, unioning values together.
408  while (!Worklist.empty()) {
409  Value *Val = Worklist.pop_back_val();
410  Value *Leader = ECs.getOrInsertLeaderValue(Val);
411 
412  if (Visited.count(Val))
413  continue;
414  Visited.insert(Val);
415 
416  // Non-instructions terminate a chain successfully.
417  if (!isa<Instruction>(Val))
418  continue;
419  Instruction *I = cast<Instruction>(Val);
420 
421  // If we encounter a type that is larger than 64 bits, we can't represent
422  // it so bail out.
423  if (DB.getDemandedBits(I).getBitWidth() > 64)
425 
426  uint64_t V = DB.getDemandedBits(I).getZExtValue();
427  DBits[Leader] |= V;
428  DBits[I] = V;
429 
430  // Casts, loads and instructions outside of our range terminate a chain
431  // successfully.
432  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
433  !InstructionSet.count(I))
434  continue;
435 
436  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
437  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
438  // transform anything that relies on them.
439  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
440  !I->getType()->isIntegerTy()) {
441  DBits[Leader] |= ~0ULL;
442  continue;
443  }
444 
445  // We don't modify the types of PHIs. Reductions will already have been
446  // truncated if possible, and inductions' sizes will have been chosen by
447  // indvars.
448  if (isa<PHINode>(I))
449  continue;
450 
451  if (DBits[Leader] == ~0ULL)
452  // All bits demanded, no point continuing.
453  continue;
454 
455  for (Value *O : cast<User>(I)->operands()) {
456  ECs.unionSets(Leader, O);
457  Worklist.push_back(O);
458  }
459  }
460 
461  // Now we've discovered all values, walk them to see if there are
462  // any users we didn't see. If there are, we can't optimize that
463  // chain.
464  for (auto &I : DBits)
465  for (auto *U : I.first->users())
466  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
467  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
468 
469  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
470  uint64_t LeaderDemandedBits = 0;
471  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
472  LeaderDemandedBits |= DBits[*MI];
473 
474  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
475  llvm::countLeadingZeros(LeaderDemandedBits);
476  // Round up to a power of 2
477  if (!isPowerOf2_64((uint64_t)MinBW))
478  MinBW = NextPowerOf2(MinBW);
479 
480  // We don't modify the types of PHIs. Reductions will already have been
481  // truncated if possible, and inductions' sizes will have been chosen by
482  // indvars.
483  // If we are required to shrink a PHI, abandon this entire equivalence class.
484  bool Abort = false;
485  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
486  if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
487  Abort = true;
488  break;
489  }
490  if (Abort)
491  continue;
492 
493  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
494  if (!isa<Instruction>(*MI))
495  continue;
496  Type *Ty = (*MI)->getType();
497  if (Roots.count(*MI))
498  Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
499  if (MinBW < Ty->getScalarSizeInBits())
500  MinBWs[cast<Instruction>(*MI)] = MinBW;
501  }
502  }
503 
504  return MinBWs;
505 }
506 
507 /// Add all access groups in @p AccGroups to @p List.
508 template <typename ListT>
509 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
510  // Interpret an access group as a list containing itself.
511  if (AccGroups->getNumOperands() == 0) {
512  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
513  List.insert(AccGroups);
514  return;
515  }
516 
517  for (auto &AccGroupListOp : AccGroups->operands()) {
518  auto *Item = cast<MDNode>(AccGroupListOp.get());
519  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
520  List.insert(Item);
521  }
522 }
523 
524 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
525  if (!AccGroups1)
526  return AccGroups2;
527  if (!AccGroups2)
528  return AccGroups1;
529  if (AccGroups1 == AccGroups2)
530  return AccGroups1;
531 
533  addToAccessGroupList(Union, AccGroups1);
534  addToAccessGroupList(Union, AccGroups2);
535 
536  if (Union.size() == 0)
537  return nullptr;
538  if (Union.size() == 1)
539  return cast<MDNode>(Union.front());
540 
541  LLVMContext &Ctx = AccGroups1->getContext();
542  return MDNode::get(Ctx, Union.getArrayRef());
543 }
544 
546  const Instruction *Inst2) {
547  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
548  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
549 
550  if (!MayAccessMem1 && !MayAccessMem2)
551  return nullptr;
552  if (!MayAccessMem1)
554  if (!MayAccessMem2)
556 
559  if (!MD1 || !MD2)
560  return nullptr;
561  if (MD1 == MD2)
562  return MD1;
563 
564  // Use set for scalable 'contains' check.
565  SmallPtrSet<Metadata *, 4> AccGroupSet2;
566  addToAccessGroupList(AccGroupSet2, MD2);
567 
568  SmallVector<Metadata *, 4> Intersection;
569  if (MD1->getNumOperands() == 0) {
570  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
571  if (AccGroupSet2.count(MD1))
572  Intersection.push_back(MD1);
573  } else {
574  for (const MDOperand &Node : MD1->operands()) {
575  auto *Item = cast<MDNode>(Node.get());
576  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
577  if (AccGroupSet2.count(Item))
578  Intersection.push_back(Item);
579  }
580  }
581 
582  if (Intersection.size() == 0)
583  return nullptr;
584  if (Intersection.size() == 1)
585  return cast<MDNode>(Intersection.front());
586 
587  LLVMContext &Ctx = Inst1->getContext();
588  return MDNode::get(Ctx, Intersection);
589 }
590 
591 /// \returns \p I after propagating metadata from \p VL.
593  Instruction *I0 = cast<Instruction>(VL[0]);
595  I0->getAllMetadataOtherThanDebugLoc(Metadata);
596 
601  MDNode *MD = I0->getMetadata(Kind);
602 
603  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
604  const Instruction *IJ = cast<Instruction>(VL[J]);
605  MDNode *IMD = IJ->getMetadata(Kind);
606  switch (Kind) {
608  MD = MDNode::getMostGenericTBAA(MD, IMD);
609  break;
611  MD = MDNode::getMostGenericAliasScope(MD, IMD);
612  break;
614  MD = MDNode::getMostGenericFPMath(MD, IMD);
615  break;
619  MD = MDNode::intersect(MD, IMD);
620  break;
622  MD = intersectAccessGroups(Inst, IJ);
623  break;
624  default:
625  llvm_unreachable("unhandled metadata");
626  }
627  }
628 
629  Inst->setMetadata(Kind, MD);
630  }
631 
632  return Inst;
633 }
634 
635 Constant *
637  const InterleaveGroup<Instruction> &Group) {
638  // All 1's means mask is not needed.
639  if (Group.getNumMembers() == Group.getFactor())
640  return nullptr;
641 
642  // TODO: support reversed access.
643  assert(!Group.isReverse() && "Reversed group not supported.");
644 
646  for (unsigned i = 0; i < VF; i++)
647  for (unsigned j = 0; j < Group.getFactor(); ++j) {
648  unsigned HasMember = Group.getMember(j) ? 1 : 0;
649  Mask.push_back(Builder.getInt1(HasMember));
650  }
651 
652  return ConstantVector::get(Mask);
653 }
654 
656  unsigned ReplicationFactor, unsigned VF) {
658  for (unsigned i = 0; i < VF; i++)
659  for (unsigned j = 0; j < ReplicationFactor; j++)
660  MaskVec.push_back(Builder.getInt32(i));
661 
662  return ConstantVector::get(MaskVec);
663 }
664 
666  unsigned NumVecs) {
668  for (unsigned i = 0; i < VF; i++)
669  for (unsigned j = 0; j < NumVecs; j++)
670  Mask.push_back(Builder.getInt32(j * VF + i));
671 
672  return ConstantVector::get(Mask);
673 }
674 
675 Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
676  unsigned Stride, unsigned VF) {
678  for (unsigned i = 0; i < VF; i++)
679  Mask.push_back(Builder.getInt32(Start + i * Stride));
680 
681  return ConstantVector::get(Mask);
682 }
683 
685  unsigned NumInts, unsigned NumUndefs) {
687  for (unsigned i = 0; i < NumInts; i++)
688  Mask.push_back(Builder.getInt32(Start + i));
689 
691  for (unsigned i = 0; i < NumUndefs; i++)
692  Mask.push_back(Undef);
693 
694  return ConstantVector::get(Mask);
695 }
696 
697 /// A helper function for concatenating vectors. This function concatenates two
698 /// vectors having the same element type. If the second vector has fewer
699 /// elements than the first, it is padded with undefs.
701  Value *V2) {
702  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
703  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
704  assert(VecTy1 && VecTy2 &&
705  VecTy1->getScalarType() == VecTy2->getScalarType() &&
706  "Expect two vectors with the same element type");
707 
708  unsigned NumElts1 = VecTy1->getNumElements();
709  unsigned NumElts2 = VecTy2->getNumElements();
710  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
711 
712  if (NumElts1 > NumElts2) {
713  // Extend with UNDEFs.
714  Constant *ExtMask =
715  createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
716  V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
717  }
718 
719  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
720  return Builder.CreateShuffleVector(V1, V2, Mask);
721 }
722 
724  unsigned NumVecs = Vecs.size();
725  assert(NumVecs > 1 && "Should be at least two vectors");
726 
727  SmallVector<Value *, 8> ResList;
728  ResList.append(Vecs.begin(), Vecs.end());
729  do {
730  SmallVector<Value *, 8> TmpList;
731  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
732  Value *V0 = ResList[i], *V1 = ResList[i + 1];
733  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
734  "Only the last vector may have a different type");
735 
736  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
737  }
738 
739  // Push the last vector if the total number of vectors is odd.
740  if (NumVecs % 2 != 0)
741  TmpList.push_back(ResList[NumVecs - 1]);
742 
743  ResList = TmpList;
744  NumVecs = ResList.size();
745  } while (NumVecs > 1);
746 
747  return ResList[0];
748 }
749 
751  auto *ConstMask = dyn_cast<Constant>(Mask);
752  if (!ConstMask)
753  return false;
754  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
755  return true;
756  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
757  ++I) {
758  if (auto *MaskElt = ConstMask->getAggregateElement(I))
759  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
760  continue;
761  return false;
762  }
763  return true;
764 }
765 
766 
768  auto *ConstMask = dyn_cast<Constant>(Mask);
769  if (!ConstMask)
770  return false;
771  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
772  return true;
773  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
774  ++I) {
775  if (auto *MaskElt = ConstMask->getAggregateElement(I))
776  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
777  continue;
778  return false;
779  }
780  return true;
781 }
782 
783 /// TODO: This is a lot like known bits, but for
784 /// vectors. Is there something we can common this with?
786 
787  const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
788  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
789  if (auto *CV = dyn_cast<ConstantVector>(Mask))
790  for (unsigned i = 0; i < VWidth; i++)
791  if (CV->getAggregateElement(i)->isNullValue())
792  DemandedElts.clearBit(i);
793  return DemandedElts;
794 }
795 
796 bool InterleavedAccessInfo::isStrided(int Stride) {
797  unsigned Factor = std::abs(Stride);
798  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
799 }
800 
801 void InterleavedAccessInfo::collectConstStrideAccesses(
803  const ValueToValueMap &Strides) {
804  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
805 
806  // Since it's desired that the load/store instructions be maintained in
807  // "program order" for the interleaved access analysis, we have to visit the
808  // blocks in the loop in reverse postorder (i.e., in a topological order).
809  // Such an ordering will ensure that any load/store that may be executed
810  // before a second load/store will precede the second load/store in
811  // AccessStrideInfo.
812  LoopBlocksDFS DFS(TheLoop);
813  DFS.perform(LI);
814  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
815  for (auto &I : *BB) {
816  auto *LI = dyn_cast<LoadInst>(&I);
817  auto *SI = dyn_cast<StoreInst>(&I);
818  if (!LI && !SI)
819  continue;
820 
822  // We don't check wrapping here because we don't know yet if Ptr will be
823  // part of a full group or a group with gaps. Checking wrapping for all
824  // pointers (even those that end up in groups with no gaps) will be overly
825  // conservative. For full groups, wrapping should be ok since if we would
826  // wrap around the address space we would do a memory access at nullptr
827  // even without the transformation. The wrapping checks are therefore
828  // deferred until after we've formed the interleaved groups.
829  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
830  /*Assume=*/true, /*ShouldCheckWrap=*/false);
831 
832  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
833  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
834  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
835 
836  // An alignment of 0 means target ABI alignment.
837  unsigned Align = getLoadStoreAlignment(&I);
838  if (!Align)
839  Align = DL.getABITypeAlignment(PtrTy->getElementType());
840 
841  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
842  }
843 }
844 
845 // Analyze interleaved accesses and collect them into interleaved load and
846 // store groups.
847 //
848 // When generating code for an interleaved load group, we effectively hoist all
849 // loads in the group to the location of the first load in program order. When
850 // generating code for an interleaved store group, we sink all stores to the
851 // location of the last store. This code motion can change the order of load
852 // and store instructions and may break dependences.
853 //
854 // The code generation strategy mentioned above ensures that we won't violate
855 // any write-after-read (WAR) dependences.
856 //
857 // E.g., for the WAR dependence: a = A[i]; // (1)
858 // A[i] = b; // (2)
859 //
860 // The store group of (2) is always inserted at or below (2), and the load
861 // group of (1) is always inserted at or above (1). Thus, the instructions will
862 // never be reordered. All other dependences are checked to ensure the
863 // correctness of the instruction reordering.
864 //
865 // The algorithm visits all memory accesses in the loop in bottom-up program
866 // order. Program order is established by traversing the blocks in the loop in
867 // reverse postorder when collecting the accesses.
868 //
869 // We visit the memory accesses in bottom-up order because it can simplify the
870 // construction of store groups in the presence of write-after-write (WAW)
871 // dependences.
872 //
873 // E.g., for the WAW dependence: A[i] = a; // (1)
874 // A[i] = b; // (2)
875 // A[i + 1] = c; // (3)
876 //
877 // We will first create a store group with (3) and (2). (1) can't be added to
878 // this group because it and (2) are dependent. However, (1) can be grouped
879 // with other accesses that may precede it in program order. Note that a
880 // bottom-up order does not imply that WAW dependences should not be checked.
882  bool EnablePredicatedInterleavedMemAccesses) {
883  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
884  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
885 
886  // Holds all accesses with a constant stride.
888  collectConstStrideAccesses(AccessStrideInfo, Strides);
889 
890  if (AccessStrideInfo.empty())
891  return;
892 
893  // Collect the dependences in the loop.
894  collectDependences();
895 
896  // Holds all interleaved store groups temporarily.
898  // Holds all interleaved load groups temporarily.
900 
901  // Search in bottom-up program order for pairs of accesses (A and B) that can
902  // form interleaved load or store groups. In the algorithm below, access A
903  // precedes access B in program order. We initialize a group for B in the
904  // outer loop of the algorithm, and then in the inner loop, we attempt to
905  // insert each A into B's group if:
906  //
907  // 1. A and B have the same stride,
908  // 2. A and B have the same memory object size, and
909  // 3. A belongs in B's group according to its distance from B.
910  //
911  // Special care is taken to ensure group formation will not break any
912  // dependences.
913  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
914  BI != E; ++BI) {
915  Instruction *B = BI->first;
916  StrideDescriptor DesB = BI->second;
917 
918  // Initialize a group for B if it has an allowable stride. Even if we don't
919  // create a group for B, we continue with the bottom-up algorithm to ensure
920  // we don't break any of B's dependences.
921  InterleaveGroup<Instruction> *Group = nullptr;
922  if (isStrided(DesB.Stride) &&
923  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
924  Group = getInterleaveGroup(B);
925  if (!Group) {
926  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
927  << '\n');
928  Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
929  }
930  if (B->mayWriteToMemory())
931  StoreGroups.insert(Group);
932  else
933  LoadGroups.insert(Group);
934  }
935 
936  for (auto AI = std::next(BI); AI != E; ++AI) {
937  Instruction *A = AI->first;
938  StrideDescriptor DesA = AI->second;
939 
940  // Our code motion strategy implies that we can't have dependences
941  // between accesses in an interleaved group and other accesses located
942  // between the first and last member of the group. Note that this also
943  // means that a group can't have more than one member at a given offset.
944  // The accesses in a group can have dependences with other accesses, but
945  // we must ensure we don't extend the boundaries of the group such that
946  // we encompass those dependent accesses.
947  //
948  // For example, assume we have the sequence of accesses shown below in a
949  // stride-2 loop:
950  //
951  // (1, 2) is a group | A[i] = a; // (1)
952  // | A[i-1] = b; // (2) |
953  // A[i-3] = c; // (3)
954  // A[i] = d; // (4) | (2, 4) is not a group
955  //
956  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
957  // but not with (4). If we did, the dependent access (3) would be within
958  // the boundaries of the (2, 4) group.
959  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
960  // If a dependence exists and A is already in a group, we know that A
961  // must be a store since A precedes B and WAR dependences are allowed.
962  // Thus, A would be sunk below B. We release A's group to prevent this
963  // illegal code motion. A will then be free to form another group with
964  // instructions that precede it.
965  if (isInterleaved(A)) {
966  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
967  StoreGroups.remove(StoreGroup);
968  releaseGroup(StoreGroup);
969  }
970 
971  // If a dependence exists and A is not already in a group (or it was
972  // and we just released it), B might be hoisted above A (if B is a
973  // load) or another store might be sunk below A (if B is a store). In
974  // either case, we can't add additional instructions to B's group. B
975  // will only form a group with instructions that it precedes.
976  break;
977  }
978 
979  // At this point, we've checked for illegal code motion. If either A or B
980  // isn't strided, there's nothing left to do.
981  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
982  continue;
983 
984  // Ignore A if it's already in a group or isn't the same kind of memory
985  // operation as B.
986  // Note that mayReadFromMemory() isn't mutually exclusive to
987  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
988  // here, canVectorizeMemory() should have returned false - except for the
989  // case we asked for optimization remarks.
990  if (isInterleaved(A) ||
991  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
992  (A->mayWriteToMemory() != B->mayWriteToMemory()))
993  continue;
994 
995  // Check rules 1 and 2. Ignore A if its stride or size is different from
996  // that of B.
997  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
998  continue;
999 
1000  // Ignore A if the memory object of A and B don't belong to the same
1001  // address space
1003  continue;
1004 
1005  // Calculate the distance from A to B.
1006  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1007  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1008  if (!DistToB)
1009  continue;
1010  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1011 
1012  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1013  // size.
1014  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1015  continue;
1016 
1017  // All members of a predicated interleave-group must have the same predicate,
1018  // and currently must reside in the same BB.
1019  BasicBlock *BlockA = A->getParent();
1020  BasicBlock *BlockB = B->getParent();
1021  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1022  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1023  continue;
1024 
1025  // The index of A is the index of B plus A's distance to B in multiples
1026  // of the size.
1027  int IndexA =
1028  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1029 
1030  // Try to insert A into B's group.
1031  if (Group->insertMember(A, IndexA, DesA.Align)) {
1032  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1033  << " into the interleave group with" << *B
1034  << '\n');
1035  InterleaveGroupMap[A] = Group;
1036 
1037  // Set the first load in program order as the insert position.
1038  if (A->mayReadFromMemory())
1039  Group->setInsertPos(A);
1040  }
1041  } // Iteration over A accesses.
1042  } // Iteration over B accesses.
1043 
1044  // Remove interleaved store groups with gaps.
1045  for (auto *Group : StoreGroups)
1046  if (Group->getNumMembers() != Group->getFactor()) {
1047  LLVM_DEBUG(
1048  dbgs() << "LV: Invalidate candidate interleaved store group due "
1049  "to gaps.\n");
1050  releaseGroup(Group);
1051  }
1052  // Remove interleaved groups with gaps (currently only loads) whose memory
1053  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1054  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1055  // not check wrapping (see documentation there).
1056  // FORNOW we use Assume=false;
1057  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1058  // of runtime SCEV assumptions checks (thereby potentially failing to
1059  // vectorize altogether).
1060  // Additional optional optimizations:
1061  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1062  // wrap then we can deduce that all pointers in the group don't wrap.
1063  // This means that we can forcefully peel the loop in order to only have to
1064  // check the first pointer for no-wrap. When we'll change to use Assume=true
1065  // we'll only need at most one runtime check per interleaved group.
1066  for (auto *Group : LoadGroups) {
1067  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1068  // load would wrap around the address space we would do a memory access at
1069  // nullptr even without the transformation.
1070  if (Group->getNumMembers() == Group->getFactor())
1071  continue;
1072 
1073  // Case 2: If first and last members of the group don't wrap this implies
1074  // that all the pointers in the group don't wrap.
1075  // So we check only group member 0 (which is always guaranteed to exist),
1076  // and group member Factor - 1; If the latter doesn't exist we rely on
1077  // peeling (if it is a non-reversed accsess -- see Case 3).
1078  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
1079  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
1080  /*ShouldCheckWrap=*/true)) {
1081  LLVM_DEBUG(
1082  dbgs() << "LV: Invalidate candidate interleaved group due to "
1083  "first group member potentially pointer-wrapping.\n");
1084  releaseGroup(Group);
1085  continue;
1086  }
1087  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1088  if (LastMember) {
1089  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1090  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1091  /*ShouldCheckWrap=*/true)) {
1092  LLVM_DEBUG(
1093  dbgs() << "LV: Invalidate candidate interleaved group due to "
1094  "last group member potentially pointer-wrapping.\n");
1095  releaseGroup(Group);
1096  }
1097  } else {
1098  // Case 3: A non-reversed interleaved load group with gaps: We need
1099  // to execute at least one scalar epilogue iteration. This will ensure
1100  // we don't speculatively access memory out-of-bounds. We only need
1101  // to look for a member at index factor - 1, since every group must have
1102  // a member at index zero.
1103  if (Group->isReverse()) {
1104  LLVM_DEBUG(
1105  dbgs() << "LV: Invalidate candidate interleaved group due to "
1106  "a reverse access with gaps.\n");
1107  releaseGroup(Group);
1108  continue;
1109  }
1110  LLVM_DEBUG(
1111  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1112  RequiresScalarEpilogue = true;
1113  }
1114  }
1115 }
1116 
1118  // If no group had triggered the requirement to create an epilogue loop,
1119  // there is nothing to do.
1120  if (!requiresScalarEpilogue())
1121  return;
1122 
1123  // Avoid releasing a Group twice.
1125  for (auto &I : InterleaveGroupMap) {
1126  InterleaveGroup<Instruction> *Group = I.second;
1127  if (Group->requiresScalarEpilogue())
1128  DelSet.insert(Group);
1129  }
1130  for (auto *Ptr : DelSet) {
1131  LLVM_DEBUG(
1132  dbgs()
1133  << "LV: Invalidate candidate interleaved group due to gaps that "
1134  "require a scalar epilogue (not allowed under optsize) and cannot "
1135  "be masked (not enabled). \n");
1136  releaseGroup(Ptr);
1137  }
1138 
1139  RequiresScalarEpilogue = false;
1140 }
1141 
1142 template <typename InstT>
1143 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1144  llvm_unreachable("addMetadata can only be used for Instruction");
1145 }
1146 
1147 namespace llvm {
1148 template <>
1151  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1152  [](std::pair<int, Instruction *> p) { return p.second; });
1153  propagateMetadata(NewInst, VL);
1154 }
1155 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:257
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:710
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:360
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:375
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
const T & front() const
Return the first element of the SetVector.
Definition: SetVector.h:122
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
iterator begin() const
Definition: ArrayRef.h:136
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
reverse_iterator rend()
Definition: MapVector.h:76
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:922
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
const unsigned MaxDepth
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:863
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
An instruction for reading from memory.
Definition: Instructions.h:167
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1272
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:363
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:383
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
bool isReverse() const
Definition: VectorUtils.h:287
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1259
bool empty() const
Definition: MapVector.h:79
RPOIterator endRPO() const
Definition: LoopIterator.h:140
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:681
const APInt & getAPInt() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
op_range operands() const
Definition: Metadata.h:1066
LLVMContext & getContext() const
Definition: Metadata.h:923
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1461
bool isSplatValue(const Value *V, unsigned Depth=0)
Return true if the input value is known to be a vector with all identical elements (potentially inclu...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:27
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:909
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1061
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Value * getOperand(unsigned i) const
Definition: User.h:169
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:339
Class to represent pointers.
Definition: DerivedTypes.h:544
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:318
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned getScalarSizeInBits(Type *Ty)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:350
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
static double log2(double V)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:639
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
size_t size() const
Definition: SmallVector.h:52
Constant * getSplatValue() const
If this is a splat vector constant, meaning that all of the elements have the same value...
Definition: Constants.cpp:1377
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1248
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:297
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:137
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2290
reverse_iterator rbegin()
Definition: MapVector.h:74
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:91
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
Class to represent vector types.
Definition: DerivedTypes.h:427
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:510
const NodeList & List
Definition: RDFGraph.cpp:201
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1223
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
Definition: Instructions.h:977
uint32_t getFactor() const
Definition: VectorUtils.h:288
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1274
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
IRTranslator LLVM IR MI
This pass exposes codegen information to IR-level passes.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:929
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Root of the metadata hierarchy.
Definition: Metadata.h:57
uint32_t getNumMembers() const
Definition: VectorUtils.h:290
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1088
Type * getElementType() const
Definition: DerivedTypes.h:563
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1237
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:964
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:371
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:43
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:532