LLVM  9.0.0svn
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
27 
28 #define DEBUG_TYPE "vectorutils"
29 
30 using namespace llvm;
31 using namespace llvm::PatternMatch;
32 
33 /// Maximum factor for an interleaved memory access.
35  "max-interleave-group-factor", cl::Hidden,
36  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
37  cl::init(8));
38 
39 /// Return true if all of the intrinsic's arguments and return type are scalars
40 /// for the scalar form of the intrinsic and vectors for the vector form of the
41 /// intrinsic.
43  switch (ID) {
44  case Intrinsic::bswap: // Begin integer bit-manipulation.
45  case Intrinsic::bitreverse:
46  case Intrinsic::ctpop:
47  case Intrinsic::ctlz:
48  case Intrinsic::cttz:
49  case Intrinsic::fshl:
50  case Intrinsic::fshr:
51  case Intrinsic::sadd_sat:
52  case Intrinsic::ssub_sat:
53  case Intrinsic::uadd_sat:
54  case Intrinsic::usub_sat:
55  case Intrinsic::smul_fix:
56  case Intrinsic::umul_fix:
57  case Intrinsic::sqrt: // Begin floating-point.
58  case Intrinsic::sin:
59  case Intrinsic::cos:
60  case Intrinsic::exp:
61  case Intrinsic::exp2:
62  case Intrinsic::log:
63  case Intrinsic::log10:
64  case Intrinsic::log2:
65  case Intrinsic::fabs:
66  case Intrinsic::minnum:
67  case Intrinsic::maxnum:
68  case Intrinsic::minimum:
69  case Intrinsic::maximum:
70  case Intrinsic::copysign:
71  case Intrinsic::floor:
72  case Intrinsic::ceil:
73  case Intrinsic::trunc:
74  case Intrinsic::rint:
75  case Intrinsic::nearbyint:
76  case Intrinsic::round:
77  case Intrinsic::pow:
78  case Intrinsic::fma:
79  case Intrinsic::fmuladd:
80  case Intrinsic::powi:
81  case Intrinsic::canonicalize:
82  return true;
83  default:
84  return false;
85  }
86 }
87 
88 /// Identifies if the intrinsic has a scalar operand. It check for
89 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
91  unsigned ScalarOpdIdx) {
92  switch (ID) {
93  case Intrinsic::ctlz:
94  case Intrinsic::cttz:
95  case Intrinsic::powi:
96  return (ScalarOpdIdx == 1);
97  case Intrinsic::smul_fix:
98  case Intrinsic::umul_fix:
99  return (ScalarOpdIdx == 2);
100  default:
101  return false;
102  }
103 }
104 
105 /// Returns intrinsic ID for call.
106 /// For the input call instruction it finds mapping intrinsic and returns
107 /// its ID, in case it does not found it return not_intrinsic.
109  const TargetLibraryInfo *TLI) {
111  if (ID == Intrinsic::not_intrinsic)
113 
114  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
115  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
116  ID == Intrinsic::sideeffect)
117  return ID;
119 }
120 
121 /// Find the operand of the GEP that should be checked for consecutive
122 /// stores. This ignores trailing indices that have no effect on the final
123 /// pointer.
125  const DataLayout &DL = Gep->getModule()->getDataLayout();
126  unsigned LastOperand = Gep->getNumOperands() - 1;
127  unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
128 
129  // Walk backwards and try to peel off zeros.
130  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
131  // Find the type we're currently indexing into.
132  gep_type_iterator GEPTI = gep_type_begin(Gep);
133  std::advance(GEPTI, LastOperand - 2);
134 
135  // If it's a type with the same allocation size as the result of the GEP we
136  // can peel off the zero index.
137  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
138  break;
139  --LastOperand;
140  }
141 
142  return LastOperand;
143 }
144 
145 /// If the argument is a GEP, then returns the operand identified by
146 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
147 /// operand, it returns that instead.
150  if (!GEP)
151  return Ptr;
152 
153  unsigned InductionOperand = getGEPInductionOperand(GEP);
154 
155  // Check that all of the gep indices are uniform except for our induction
156  // operand.
157  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
158  if (i != InductionOperand &&
159  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
160  return Ptr;
161  return GEP->getOperand(InductionOperand);
162 }
163 
164 /// If a value has only one user that is a CastInst, return it.
166  Value *UniqueCast = nullptr;
167  for (User *U : Ptr->users()) {
168  CastInst *CI = dyn_cast<CastInst>(U);
169  if (CI && CI->getType() == Ty) {
170  if (!UniqueCast)
171  UniqueCast = CI;
172  else
173  return nullptr;
174  }
175  }
176  return UniqueCast;
177 }
178 
179 /// Get the stride of a pointer access in a loop. Looks for symbolic
180 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
182  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
183  if (!PtrTy || PtrTy->isAggregateType())
184  return nullptr;
185 
186  // Try to remove a gep instruction to make the pointer (actually index at this
187  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
188  // pointer, otherwise, we are analyzing the index.
189  Value *OrigPtr = Ptr;
190 
191  // The size of the pointer access.
192  int64_t PtrAccessSize = 1;
193 
194  Ptr = stripGetElementPtr(Ptr, SE, Lp);
195  const SCEV *V = SE->getSCEV(Ptr);
196 
197  if (Ptr != OrigPtr)
198  // Strip off casts.
199  while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
200  V = C->getOperand();
201 
202  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
203  if (!S)
204  return nullptr;
205 
206  V = S->getStepRecurrence(*SE);
207  if (!V)
208  return nullptr;
209 
210  // Strip off the size of access multiplication if we are still analyzing the
211  // pointer.
212  if (OrigPtr == Ptr) {
213  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
214  if (M->getOperand(0)->getSCEVType() != scConstant)
215  return nullptr;
216 
217  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
218 
219  // Huge step value - give up.
220  if (APStepVal.getBitWidth() > 64)
221  return nullptr;
222 
223  int64_t StepVal = APStepVal.getSExtValue();
224  if (PtrAccessSize != StepVal)
225  return nullptr;
226  V = M->getOperand(1);
227  }
228  }
229 
230  // Strip off casts.
231  Type *StripedOffRecurrenceCast = nullptr;
232  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
233  StripedOffRecurrenceCast = C->getType();
234  V = C->getOperand();
235  }
236 
237  // Look for the loop invariant symbolic value.
238  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
239  if (!U)
240  return nullptr;
241 
242  Value *Stride = U->getValue();
243  if (!Lp->isLoopInvariant(Stride))
244  return nullptr;
245 
246  // If we have stripped off the recurrence cast we have to make sure that we
247  // return the value that is used in this loop so that we can replace it later.
248  if (StripedOffRecurrenceCast)
249  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
250 
251  return Stride;
252 }
253 
254 /// Given a vector and an element number, see if the scalar value is
255 /// already around as a register, for example if it were inserted then extracted
256 /// from the vector.
257 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
258  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
259  VectorType *VTy = cast<VectorType>(V->getType());
260  unsigned Width = VTy->getNumElements();
261  if (EltNo >= Width) // Out of range access.
262  return UndefValue::get(VTy->getElementType());
263 
264  if (Constant *C = dyn_cast<Constant>(V))
265  return C->getAggregateElement(EltNo);
266 
267  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
268  // If this is an insert to a variable element, we don't know what it is.
269  if (!isa<ConstantInt>(III->getOperand(2)))
270  return nullptr;
271  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
272 
273  // If this is an insert to the element we are looking for, return the
274  // inserted value.
275  if (EltNo == IIElt)
276  return III->getOperand(1);
277 
278  // Otherwise, the insertelement doesn't modify the value, recurse on its
279  // vector input.
280  return findScalarElement(III->getOperand(0), EltNo);
281  }
282 
283  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
284  unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
285  int InEl = SVI->getMaskValue(EltNo);
286  if (InEl < 0)
287  return UndefValue::get(VTy->getElementType());
288  if (InEl < (int)LHSWidth)
289  return findScalarElement(SVI->getOperand(0), InEl);
290  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
291  }
292 
293  // Extract a value from a vector add operation with a constant zero.
294  // TODO: Use getBinOpIdentity() to generalize this.
295  Value *Val; Constant *C;
296  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
297  if (Constant *Elt = C->getAggregateElement(EltNo))
298  if (Elt->isNullValue())
299  return findScalarElement(Val, EltNo);
300 
301  // Otherwise, we don't know.
302  return nullptr;
303 }
304 
305 /// Get splat value if the input is a splat vector or return nullptr.
306 /// This function is not fully general. It checks only 2 cases:
307 /// the input value is (1) a splat constants vector or (2) a sequence
308 /// of instructions that broadcast a single value into a vector.
309 ///
311 
312  if (auto *C = dyn_cast<Constant>(V))
313  if (isa<VectorType>(V->getType()))
314  return C->getSplatValue();
315 
316  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
317  if (!ShuffleInst)
318  return nullptr;
319  // All-zero (or undef) shuffle mask elements.
320  for (int MaskElt : ShuffleInst->getShuffleMask())
321  if (MaskElt != 0 && MaskElt != -1)
322  return nullptr;
323  // The first shuffle source is 'insertelement' with index 0.
324  auto *InsertEltInst =
325  dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
326  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
327  !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
328  return nullptr;
329 
330  return InsertEltInst->getOperand(1);
331 }
332 
335  const TargetTransformInfo *TTI) {
336 
337  // DemandedBits will give us every value's live-out bits. But we want
338  // to ensure no extra casts would need to be inserted, so every DAG
339  // of connected values must have the same minimum bitwidth.
341  SmallVector<Value *, 16> Worklist;
343  SmallPtrSet<Value *, 16> Visited;
345  SmallPtrSet<Instruction *, 4> InstructionSet;
347 
348  // Determine the roots. We work bottom-up, from truncs or icmps.
349  bool SeenExtFromIllegalType = false;
350  for (auto *BB : Blocks)
351  for (auto &I : *BB) {
352  InstructionSet.insert(&I);
353 
354  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
355  !TTI->isTypeLegal(I.getOperand(0)->getType()))
356  SeenExtFromIllegalType = true;
357 
358  // Only deal with non-vector integers up to 64-bits wide.
359  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
360  !I.getType()->isVectorTy() &&
361  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
362  // Don't make work for ourselves. If we know the loaded type is legal,
363  // don't add it to the worklist.
364  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
365  continue;
366 
367  Worklist.push_back(&I);
368  Roots.insert(&I);
369  }
370  }
371  // Early exit.
372  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
373  return MinBWs;
374 
375  // Now proceed breadth-first, unioning values together.
376  while (!Worklist.empty()) {
377  Value *Val = Worklist.pop_back_val();
378  Value *Leader = ECs.getOrInsertLeaderValue(Val);
379 
380  if (Visited.count(Val))
381  continue;
382  Visited.insert(Val);
383 
384  // Non-instructions terminate a chain successfully.
385  if (!isa<Instruction>(Val))
386  continue;
387  Instruction *I = cast<Instruction>(Val);
388 
389  // If we encounter a type that is larger than 64 bits, we can't represent
390  // it so bail out.
391  if (DB.getDemandedBits(I).getBitWidth() > 64)
393 
394  uint64_t V = DB.getDemandedBits(I).getZExtValue();
395  DBits[Leader] |= V;
396  DBits[I] = V;
397 
398  // Casts, loads and instructions outside of our range terminate a chain
399  // successfully.
400  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
401  !InstructionSet.count(I))
402  continue;
403 
404  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
405  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
406  // transform anything that relies on them.
407  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
408  !I->getType()->isIntegerTy()) {
409  DBits[Leader] |= ~0ULL;
410  continue;
411  }
412 
413  // We don't modify the types of PHIs. Reductions will already have been
414  // truncated if possible, and inductions' sizes will have been chosen by
415  // indvars.
416  if (isa<PHINode>(I))
417  continue;
418 
419  if (DBits[Leader] == ~0ULL)
420  // All bits demanded, no point continuing.
421  continue;
422 
423  for (Value *O : cast<User>(I)->operands()) {
424  ECs.unionSets(Leader, O);
425  Worklist.push_back(O);
426  }
427  }
428 
429  // Now we've discovered all values, walk them to see if there are
430  // any users we didn't see. If there are, we can't optimize that
431  // chain.
432  for (auto &I : DBits)
433  for (auto *U : I.first->users())
434  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
435  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
436 
437  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
438  uint64_t LeaderDemandedBits = 0;
439  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
440  LeaderDemandedBits |= DBits[*MI];
441 
442  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
443  llvm::countLeadingZeros(LeaderDemandedBits);
444  // Round up to a power of 2
445  if (!isPowerOf2_64((uint64_t)MinBW))
446  MinBW = NextPowerOf2(MinBW);
447 
448  // We don't modify the types of PHIs. Reductions will already have been
449  // truncated if possible, and inductions' sizes will have been chosen by
450  // indvars.
451  // If we are required to shrink a PHI, abandon this entire equivalence class.
452  bool Abort = false;
453  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
454  if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
455  Abort = true;
456  break;
457  }
458  if (Abort)
459  continue;
460 
461  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
462  if (!isa<Instruction>(*MI))
463  continue;
464  Type *Ty = (*MI)->getType();
465  if (Roots.count(*MI))
466  Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
467  if (MinBW < Ty->getScalarSizeInBits())
468  MinBWs[cast<Instruction>(*MI)] = MinBW;
469  }
470  }
471 
472  return MinBWs;
473 }
474 
475 /// Add all access groups in @p AccGroups to @p List.
476 template <typename ListT>
477 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
478  // Interpret an access group as a list containing itself.
479  if (AccGroups->getNumOperands() == 0) {
480  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
481  List.insert(AccGroups);
482  return;
483  }
484 
485  for (auto &AccGroupListOp : AccGroups->operands()) {
486  auto *Item = cast<MDNode>(AccGroupListOp.get());
487  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
488  List.insert(Item);
489  }
490 }
491 
492 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
493  if (!AccGroups1)
494  return AccGroups2;
495  if (!AccGroups2)
496  return AccGroups1;
497  if (AccGroups1 == AccGroups2)
498  return AccGroups1;
499 
501  addToAccessGroupList(Union, AccGroups1);
502  addToAccessGroupList(Union, AccGroups2);
503 
504  if (Union.size() == 0)
505  return nullptr;
506  if (Union.size() == 1)
507  return cast<MDNode>(Union.front());
508 
509  LLVMContext &Ctx = AccGroups1->getContext();
510  return MDNode::get(Ctx, Union.getArrayRef());
511 }
512 
514  const Instruction *Inst2) {
515  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
516  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
517 
518  if (!MayAccessMem1 && !MayAccessMem2)
519  return nullptr;
520  if (!MayAccessMem1)
522  if (!MayAccessMem2)
524 
527  if (!MD1 || !MD2)
528  return nullptr;
529  if (MD1 == MD2)
530  return MD1;
531 
532  // Use set for scalable 'contains' check.
533  SmallPtrSet<Metadata *, 4> AccGroupSet2;
534  addToAccessGroupList(AccGroupSet2, MD2);
535 
536  SmallVector<Metadata *, 4> Intersection;
537  if (MD1->getNumOperands() == 0) {
538  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
539  if (AccGroupSet2.count(MD1))
540  Intersection.push_back(MD1);
541  } else {
542  for (const MDOperand &Node : MD1->operands()) {
543  auto *Item = cast<MDNode>(Node.get());
544  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
545  if (AccGroupSet2.count(Item))
546  Intersection.push_back(Item);
547  }
548  }
549 
550  if (Intersection.size() == 0)
551  return nullptr;
552  if (Intersection.size() == 1)
553  return cast<MDNode>(Intersection.front());
554 
555  LLVMContext &Ctx = Inst1->getContext();
556  return MDNode::get(Ctx, Intersection);
557 }
558 
559 /// \returns \p I after propagating metadata from \p VL.
561  Instruction *I0 = cast<Instruction>(VL[0]);
563  I0->getAllMetadataOtherThanDebugLoc(Metadata);
564 
569  MDNode *MD = I0->getMetadata(Kind);
570 
571  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
572  const Instruction *IJ = cast<Instruction>(VL[J]);
573  MDNode *IMD = IJ->getMetadata(Kind);
574  switch (Kind) {
576  MD = MDNode::getMostGenericTBAA(MD, IMD);
577  break;
579  MD = MDNode::getMostGenericAliasScope(MD, IMD);
580  break;
582  MD = MDNode::getMostGenericFPMath(MD, IMD);
583  break;
587  MD = MDNode::intersect(MD, IMD);
588  break;
590  MD = intersectAccessGroups(Inst, IJ);
591  break;
592  default:
593  llvm_unreachable("unhandled metadata");
594  }
595  }
596 
597  Inst->setMetadata(Kind, MD);
598  }
599 
600  return Inst;
601 }
602 
603 Constant *
605  const InterleaveGroup<Instruction> &Group) {
606  // All 1's means mask is not needed.
607  if (Group.getNumMembers() == Group.getFactor())
608  return nullptr;
609 
610  // TODO: support reversed access.
611  assert(!Group.isReverse() && "Reversed group not supported.");
612 
614  for (unsigned i = 0; i < VF; i++)
615  for (unsigned j = 0; j < Group.getFactor(); ++j) {
616  unsigned HasMember = Group.getMember(j) ? 1 : 0;
617  Mask.push_back(Builder.getInt1(HasMember));
618  }
619 
620  return ConstantVector::get(Mask);
621 }
622 
624  unsigned ReplicationFactor, unsigned VF) {
626  for (unsigned i = 0; i < VF; i++)
627  for (unsigned j = 0; j < ReplicationFactor; j++)
628  MaskVec.push_back(Builder.getInt32(i));
629 
630  return ConstantVector::get(MaskVec);
631 }
632 
634  unsigned NumVecs) {
636  for (unsigned i = 0; i < VF; i++)
637  for (unsigned j = 0; j < NumVecs; j++)
638  Mask.push_back(Builder.getInt32(j * VF + i));
639 
640  return ConstantVector::get(Mask);
641 }
642 
643 Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
644  unsigned Stride, unsigned VF) {
646  for (unsigned i = 0; i < VF; i++)
647  Mask.push_back(Builder.getInt32(Start + i * Stride));
648 
649  return ConstantVector::get(Mask);
650 }
651 
653  unsigned NumInts, unsigned NumUndefs) {
655  for (unsigned i = 0; i < NumInts; i++)
656  Mask.push_back(Builder.getInt32(Start + i));
657 
659  for (unsigned i = 0; i < NumUndefs; i++)
660  Mask.push_back(Undef);
661 
662  return ConstantVector::get(Mask);
663 }
664 
665 /// A helper function for concatenating vectors. This function concatenates two
666 /// vectors having the same element type. If the second vector has fewer
667 /// elements than the first, it is padded with undefs.
669  Value *V2) {
670  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
671  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
672  assert(VecTy1 && VecTy2 &&
673  VecTy1->getScalarType() == VecTy2->getScalarType() &&
674  "Expect two vectors with the same element type");
675 
676  unsigned NumElts1 = VecTy1->getNumElements();
677  unsigned NumElts2 = VecTy2->getNumElements();
678  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
679 
680  if (NumElts1 > NumElts2) {
681  // Extend with UNDEFs.
682  Constant *ExtMask =
683  createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
684  V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
685  }
686 
687  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
688  return Builder.CreateShuffleVector(V1, V2, Mask);
689 }
690 
692  unsigned NumVecs = Vecs.size();
693  assert(NumVecs > 1 && "Should be at least two vectors");
694 
695  SmallVector<Value *, 8> ResList;
696  ResList.append(Vecs.begin(), Vecs.end());
697  do {
698  SmallVector<Value *, 8> TmpList;
699  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
700  Value *V0 = ResList[i], *V1 = ResList[i + 1];
701  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
702  "Only the last vector may have a different type");
703 
704  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
705  }
706 
707  // Push the last vector if the total number of vectors is odd.
708  if (NumVecs % 2 != 0)
709  TmpList.push_back(ResList[NumVecs - 1]);
710 
711  ResList = TmpList;
712  NumVecs = ResList.size();
713  } while (NumVecs > 1);
714 
715  return ResList[0];
716 }
717 
719  auto *ConstMask = dyn_cast<Constant>(Mask);
720  if (!ConstMask)
721  return false;
722  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
723  return true;
724  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
725  ++I) {
726  if (auto *MaskElt = ConstMask->getAggregateElement(I))
727  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
728  continue;
729  return false;
730  }
731  return true;
732 }
733 
734 
736  auto *ConstMask = dyn_cast<Constant>(Mask);
737  if (!ConstMask)
738  return false;
739  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
740  return true;
741  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
742  ++I) {
743  if (auto *MaskElt = ConstMask->getAggregateElement(I))
744  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
745  continue;
746  return false;
747  }
748  return true;
749 }
750 
751 /// TODO: This is a lot like known bits, but for
752 /// vectors. Is there something we can common this with?
754 
755  const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
756  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
757  if (auto *CV = dyn_cast<ConstantVector>(Mask))
758  for (unsigned i = 0; i < VWidth; i++)
759  if (CV->getAggregateElement(i)->isNullValue())
760  DemandedElts.clearBit(i);
761  return DemandedElts;
762 }
763 
764 bool InterleavedAccessInfo::isStrided(int Stride) {
765  unsigned Factor = std::abs(Stride);
766  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
767 }
768 
769 void InterleavedAccessInfo::collectConstStrideAccesses(
771  const ValueToValueMap &Strides) {
772  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
773 
774  // Since it's desired that the load/store instructions be maintained in
775  // "program order" for the interleaved access analysis, we have to visit the
776  // blocks in the loop in reverse postorder (i.e., in a topological order).
777  // Such an ordering will ensure that any load/store that may be executed
778  // before a second load/store will precede the second load/store in
779  // AccessStrideInfo.
780  LoopBlocksDFS DFS(TheLoop);
781  DFS.perform(LI);
782  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
783  for (auto &I : *BB) {
784  auto *LI = dyn_cast<LoadInst>(&I);
785  auto *SI = dyn_cast<StoreInst>(&I);
786  if (!LI && !SI)
787  continue;
788 
790  // We don't check wrapping here because we don't know yet if Ptr will be
791  // part of a full group or a group with gaps. Checking wrapping for all
792  // pointers (even those that end up in groups with no gaps) will be overly
793  // conservative. For full groups, wrapping should be ok since if we would
794  // wrap around the address space we would do a memory access at nullptr
795  // even without the transformation. The wrapping checks are therefore
796  // deferred until after we've formed the interleaved groups.
797  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
798  /*Assume=*/true, /*ShouldCheckWrap=*/false);
799 
800  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
801  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
802  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
803 
804  // An alignment of 0 means target ABI alignment.
805  unsigned Align = getLoadStoreAlignment(&I);
806  if (!Align)
807  Align = DL.getABITypeAlignment(PtrTy->getElementType());
808 
809  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
810  }
811 }
812 
813 // Analyze interleaved accesses and collect them into interleaved load and
814 // store groups.
815 //
816 // When generating code for an interleaved load group, we effectively hoist all
817 // loads in the group to the location of the first load in program order. When
818 // generating code for an interleaved store group, we sink all stores to the
819 // location of the last store. This code motion can change the order of load
820 // and store instructions and may break dependences.
821 //
822 // The code generation strategy mentioned above ensures that we won't violate
823 // any write-after-read (WAR) dependences.
824 //
825 // E.g., for the WAR dependence: a = A[i]; // (1)
826 // A[i] = b; // (2)
827 //
828 // The store group of (2) is always inserted at or below (2), and the load
829 // group of (1) is always inserted at or above (1). Thus, the instructions will
830 // never be reordered. All other dependences are checked to ensure the
831 // correctness of the instruction reordering.
832 //
833 // The algorithm visits all memory accesses in the loop in bottom-up program
834 // order. Program order is established by traversing the blocks in the loop in
835 // reverse postorder when collecting the accesses.
836 //
837 // We visit the memory accesses in bottom-up order because it can simplify the
838 // construction of store groups in the presence of write-after-write (WAW)
839 // dependences.
840 //
841 // E.g., for the WAW dependence: A[i] = a; // (1)
842 // A[i] = b; // (2)
843 // A[i + 1] = c; // (3)
844 //
845 // We will first create a store group with (3) and (2). (1) can't be added to
846 // this group because it and (2) are dependent. However, (1) can be grouped
847 // with other accesses that may precede it in program order. Note that a
848 // bottom-up order does not imply that WAW dependences should not be checked.
850  bool EnablePredicatedInterleavedMemAccesses) {
851  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
852  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
853 
854  // Holds all accesses with a constant stride.
856  collectConstStrideAccesses(AccessStrideInfo, Strides);
857 
858  if (AccessStrideInfo.empty())
859  return;
860 
861  // Collect the dependences in the loop.
862  collectDependences();
863 
864  // Holds all interleaved store groups temporarily.
866  // Holds all interleaved load groups temporarily.
868 
869  // Search in bottom-up program order for pairs of accesses (A and B) that can
870  // form interleaved load or store groups. In the algorithm below, access A
871  // precedes access B in program order. We initialize a group for B in the
872  // outer loop of the algorithm, and then in the inner loop, we attempt to
873  // insert each A into B's group if:
874  //
875  // 1. A and B have the same stride,
876  // 2. A and B have the same memory object size, and
877  // 3. A belongs in B's group according to its distance from B.
878  //
879  // Special care is taken to ensure group formation will not break any
880  // dependences.
881  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
882  BI != E; ++BI) {
883  Instruction *B = BI->first;
884  StrideDescriptor DesB = BI->second;
885 
886  // Initialize a group for B if it has an allowable stride. Even if we don't
887  // create a group for B, we continue with the bottom-up algorithm to ensure
888  // we don't break any of B's dependences.
889  InterleaveGroup<Instruction> *Group = nullptr;
890  if (isStrided(DesB.Stride) &&
891  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
892  Group = getInterleaveGroup(B);
893  if (!Group) {
894  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
895  << '\n');
896  Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
897  }
898  if (B->mayWriteToMemory())
899  StoreGroups.insert(Group);
900  else
901  LoadGroups.insert(Group);
902  }
903 
904  for (auto AI = std::next(BI); AI != E; ++AI) {
905  Instruction *A = AI->first;
906  StrideDescriptor DesA = AI->second;
907 
908  // Our code motion strategy implies that we can't have dependences
909  // between accesses in an interleaved group and other accesses located
910  // between the first and last member of the group. Note that this also
911  // means that a group can't have more than one member at a given offset.
912  // The accesses in a group can have dependences with other accesses, but
913  // we must ensure we don't extend the boundaries of the group such that
914  // we encompass those dependent accesses.
915  //
916  // For example, assume we have the sequence of accesses shown below in a
917  // stride-2 loop:
918  //
919  // (1, 2) is a group | A[i] = a; // (1)
920  // | A[i-1] = b; // (2) |
921  // A[i-3] = c; // (3)
922  // A[i] = d; // (4) | (2, 4) is not a group
923  //
924  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
925  // but not with (4). If we did, the dependent access (3) would be within
926  // the boundaries of the (2, 4) group.
927  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
928  // If a dependence exists and A is already in a group, we know that A
929  // must be a store since A precedes B and WAR dependences are allowed.
930  // Thus, A would be sunk below B. We release A's group to prevent this
931  // illegal code motion. A will then be free to form another group with
932  // instructions that precede it.
933  if (isInterleaved(A)) {
934  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
935  StoreGroups.remove(StoreGroup);
936  releaseGroup(StoreGroup);
937  }
938 
939  // If a dependence exists and A is not already in a group (or it was
940  // and we just released it), B might be hoisted above A (if B is a
941  // load) or another store might be sunk below A (if B is a store). In
942  // either case, we can't add additional instructions to B's group. B
943  // will only form a group with instructions that it precedes.
944  break;
945  }
946 
947  // At this point, we've checked for illegal code motion. If either A or B
948  // isn't strided, there's nothing left to do.
949  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
950  continue;
951 
952  // Ignore A if it's already in a group or isn't the same kind of memory
953  // operation as B.
954  // Note that mayReadFromMemory() isn't mutually exclusive to
955  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
956  // here, canVectorizeMemory() should have returned false - except for the
957  // case we asked for optimization remarks.
958  if (isInterleaved(A) ||
959  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
960  (A->mayWriteToMemory() != B->mayWriteToMemory()))
961  continue;
962 
963  // Check rules 1 and 2. Ignore A if its stride or size is different from
964  // that of B.
965  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
966  continue;
967 
968  // Ignore A if the memory object of A and B don't belong to the same
969  // address space
971  continue;
972 
973  // Calculate the distance from A to B.
974  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
975  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
976  if (!DistToB)
977  continue;
978  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
979 
980  // Check rule 3. Ignore A if its distance to B is not a multiple of the
981  // size.
982  if (DistanceToB % static_cast<int64_t>(DesB.Size))
983  continue;
984 
985  // All members of a predicated interleave-group must have the same predicate,
986  // and currently must reside in the same BB.
987  BasicBlock *BlockA = A->getParent();
988  BasicBlock *BlockB = B->getParent();
989  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
990  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
991  continue;
992 
993  // The index of A is the index of B plus A's distance to B in multiples
994  // of the size.
995  int IndexA =
996  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
997 
998  // Try to insert A into B's group.
999  if (Group->insertMember(A, IndexA, DesA.Align)) {
1000  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1001  << " into the interleave group with" << *B
1002  << '\n');
1003  InterleaveGroupMap[A] = Group;
1004 
1005  // Set the first load in program order as the insert position.
1006  if (A->mayReadFromMemory())
1007  Group->setInsertPos(A);
1008  }
1009  } // Iteration over A accesses.
1010  } // Iteration over B accesses.
1011 
1012  // Remove interleaved store groups with gaps.
1013  for (auto *Group : StoreGroups)
1014  if (Group->getNumMembers() != Group->getFactor()) {
1015  LLVM_DEBUG(
1016  dbgs() << "LV: Invalidate candidate interleaved store group due "
1017  "to gaps.\n");
1018  releaseGroup(Group);
1019  }
1020  // Remove interleaved groups with gaps (currently only loads) whose memory
1021  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1022  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1023  // not check wrapping (see documentation there).
1024  // FORNOW we use Assume=false;
1025  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1026  // of runtime SCEV assumptions checks (thereby potentially failing to
1027  // vectorize altogether).
1028  // Additional optional optimizations:
1029  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1030  // wrap then we can deduce that all pointers in the group don't wrap.
1031  // This means that we can forcefully peel the loop in order to only have to
1032  // check the first pointer for no-wrap. When we'll change to use Assume=true
1033  // we'll only need at most one runtime check per interleaved group.
1034  for (auto *Group : LoadGroups) {
1035  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1036  // load would wrap around the address space we would do a memory access at
1037  // nullptr even without the transformation.
1038  if (Group->getNumMembers() == Group->getFactor())
1039  continue;
1040 
1041  // Case 2: If first and last members of the group don't wrap this implies
1042  // that all the pointers in the group don't wrap.
1043  // So we check only group member 0 (which is always guaranteed to exist),
1044  // and group member Factor - 1; If the latter doesn't exist we rely on
1045  // peeling (if it is a non-reversed accsess -- see Case 3).
1046  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
1047  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
1048  /*ShouldCheckWrap=*/true)) {
1049  LLVM_DEBUG(
1050  dbgs() << "LV: Invalidate candidate interleaved group due to "
1051  "first group member potentially pointer-wrapping.\n");
1052  releaseGroup(Group);
1053  continue;
1054  }
1055  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1056  if (LastMember) {
1057  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1058  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1059  /*ShouldCheckWrap=*/true)) {
1060  LLVM_DEBUG(
1061  dbgs() << "LV: Invalidate candidate interleaved group due to "
1062  "last group member potentially pointer-wrapping.\n");
1063  releaseGroup(Group);
1064  }
1065  } else {
1066  // Case 3: A non-reversed interleaved load group with gaps: We need
1067  // to execute at least one scalar epilogue iteration. This will ensure
1068  // we don't speculatively access memory out-of-bounds. We only need
1069  // to look for a member at index factor - 1, since every group must have
1070  // a member at index zero.
1071  if (Group->isReverse()) {
1072  LLVM_DEBUG(
1073  dbgs() << "LV: Invalidate candidate interleaved group due to "
1074  "a reverse access with gaps.\n");
1075  releaseGroup(Group);
1076  continue;
1077  }
1078  LLVM_DEBUG(
1079  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1080  RequiresScalarEpilogue = true;
1081  }
1082  }
1083 }
1084 
1086  // If no group had triggered the requirement to create an epilogue loop,
1087  // there is nothing to do.
1088  if (!requiresScalarEpilogue())
1089  return;
1090 
1091  // Avoid releasing a Group twice.
1093  for (auto &I : InterleaveGroupMap) {
1094  InterleaveGroup<Instruction> *Group = I.second;
1095  if (Group->requiresScalarEpilogue())
1096  DelSet.insert(Group);
1097  }
1098  for (auto *Ptr : DelSet) {
1099  LLVM_DEBUG(
1100  dbgs()
1101  << "LV: Invalidate candidate interleaved group due to gaps that "
1102  "require a scalar epilogue (not allowed under optsize) and cannot "
1103  "be masked (not enabled). \n");
1104  releaseGroup(Ptr);
1105  }
1106 
1107  RequiresScalarEpilogue = false;
1108 }
1109 
1110 template <typename InstT>
1111 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1112  llvm_unreachable("addMetadata can only be used for Instruction");
1113 }
1114 
1115 namespace llvm {
1116 template <>
1119  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1120  [](std::pair<int, Instruction *> p) { return p.second; });
1121  propagateMetadata(NewInst, VL);
1122 }
1123 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:257
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:710
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:355
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:375
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
const T & front() const
Return the first element of the SetVector.
Definition: SetVector.h:122
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
iterator begin() const
Definition: ArrayRef.h:136
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
reverse_iterator rend()
Definition: MapVector.h:76
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:922
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:863
An instruction for reading from memory.
Definition: Instructions.h:167
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1261
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:346
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:416
bool isReverse() const
Definition: VectorUtils.h:282
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1248
bool empty() const
Definition: MapVector.h:79
RPOIterator endRPO() const
Definition: LoopIterator.h:140
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:641
const APInt & getAPInt() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
op_range operands() const
Definition: Metadata.h:1066
LLVMContext & getContext() const
Definition: Metadata.h:923
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1461
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:27
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:909
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:847
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Value * getOperand(unsigned i) const
Definition: User.h:169
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:334
Class to represent pointers.
Definition: DerivedTypes.h:498
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:281
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned getScalarSizeInBits(Type *Ty)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:345
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static double log2(double V)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:639
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
size_t size() const
Definition: SmallVector.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1237
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:59
bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:292
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:137
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:306
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2106
reverse_iterator rbegin()
Definition: MapVector.h:74
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:90
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
Class to represent vector types.
Definition: DerivedTypes.h:424
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:549
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
const NodeList & List
Definition: RDFGraph.cpp:201
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
Definition: Instructions.h:975
uint32_t getFactor() const
Definition: VectorUtils.h:283
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1267
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
IRTranslator LLVM IR MI
This pass exposes codegen information to IR-level passes.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:929
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Root of the metadata hierarchy.
Definition: Metadata.h:57
uint32_t getNumMembers() const
Definition: VectorUtils.h:285
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1088
Type * getElementType() const
Definition: DerivedTypes.h:517
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1226
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:750
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:366
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:42
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:532