LLVM  10.0.0svn
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
27 
28 #define DEBUG_TYPE "vectorutils"
29 
30 using namespace llvm;
31 using namespace llvm::PatternMatch;
32 
33 /// Maximum factor for an interleaved memory access.
35  "max-interleave-group-factor", cl::Hidden,
36  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
37  cl::init(8));
38 
39 /// Return true if all of the intrinsic's arguments and return type are scalars
40 /// for the scalar form of the intrinsic, and vectors for the vector form of the
41 /// intrinsic (except operands that are marked as always being scalar by
42 /// hasVectorInstrinsicScalarOpd).
44  switch (ID) {
45  case Intrinsic::bswap: // Begin integer bit-manipulation.
46  case Intrinsic::bitreverse:
47  case Intrinsic::ctpop:
48  case Intrinsic::ctlz:
49  case Intrinsic::cttz:
50  case Intrinsic::fshl:
51  case Intrinsic::fshr:
52  case Intrinsic::sadd_sat:
53  case Intrinsic::ssub_sat:
54  case Intrinsic::uadd_sat:
55  case Intrinsic::usub_sat:
56  case Intrinsic::smul_fix:
57  case Intrinsic::smul_fix_sat:
58  case Intrinsic::umul_fix:
59  case Intrinsic::umul_fix_sat:
60  case Intrinsic::sqrt: // Begin floating-point.
61  case Intrinsic::sin:
62  case Intrinsic::cos:
63  case Intrinsic::exp:
64  case Intrinsic::exp2:
65  case Intrinsic::log:
66  case Intrinsic::log10:
67  case Intrinsic::log2:
68  case Intrinsic::fabs:
69  case Intrinsic::minnum:
70  case Intrinsic::maxnum:
71  case Intrinsic::minimum:
72  case Intrinsic::maximum:
73  case Intrinsic::copysign:
74  case Intrinsic::floor:
75  case Intrinsic::ceil:
76  case Intrinsic::trunc:
77  case Intrinsic::rint:
78  case Intrinsic::nearbyint:
79  case Intrinsic::round:
80  case Intrinsic::pow:
81  case Intrinsic::fma:
82  case Intrinsic::fmuladd:
83  case Intrinsic::powi:
84  case Intrinsic::canonicalize:
85  return true;
86  default:
87  return false;
88  }
89 }
90 
91 /// Identifies if the vector form of the intrinsic has a scalar operand.
93  unsigned ScalarOpdIdx) {
94  switch (ID) {
95  case Intrinsic::ctlz:
96  case Intrinsic::cttz:
97  case Intrinsic::powi:
98  return (ScalarOpdIdx == 1);
99  case Intrinsic::smul_fix:
100  case Intrinsic::smul_fix_sat:
101  case Intrinsic::umul_fix:
102  case Intrinsic::umul_fix_sat:
103  return (ScalarOpdIdx == 2);
104  default:
105  return false;
106  }
107 }
108 
109 /// Returns intrinsic ID for call.
110 /// For the input call instruction it finds mapping intrinsic and returns
111 /// its ID, in case it does not found it return not_intrinsic.
113  const TargetLibraryInfo *TLI) {
115  if (ID == Intrinsic::not_intrinsic)
117 
118  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
119  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
120  ID == Intrinsic::sideeffect)
121  return ID;
123 }
124 
125 /// Find the operand of the GEP that should be checked for consecutive
126 /// stores. This ignores trailing indices that have no effect on the final
127 /// pointer.
129  const DataLayout &DL = Gep->getModule()->getDataLayout();
130  unsigned LastOperand = Gep->getNumOperands() - 1;
131  unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
132 
133  // Walk backwards and try to peel off zeros.
134  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
135  // Find the type we're currently indexing into.
136  gep_type_iterator GEPTI = gep_type_begin(Gep);
137  std::advance(GEPTI, LastOperand - 2);
138 
139  // If it's a type with the same allocation size as the result of the GEP we
140  // can peel off the zero index.
141  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
142  break;
143  --LastOperand;
144  }
145 
146  return LastOperand;
147 }
148 
149 /// If the argument is a GEP, then returns the operand identified by
150 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
151 /// operand, it returns that instead.
154  if (!GEP)
155  return Ptr;
156 
157  unsigned InductionOperand = getGEPInductionOperand(GEP);
158 
159  // Check that all of the gep indices are uniform except for our induction
160  // operand.
161  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
162  if (i != InductionOperand &&
163  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
164  return Ptr;
165  return GEP->getOperand(InductionOperand);
166 }
167 
168 /// If a value has only one user that is a CastInst, return it.
170  Value *UniqueCast = nullptr;
171  for (User *U : Ptr->users()) {
172  CastInst *CI = dyn_cast<CastInst>(U);
173  if (CI && CI->getType() == Ty) {
174  if (!UniqueCast)
175  UniqueCast = CI;
176  else
177  return nullptr;
178  }
179  }
180  return UniqueCast;
181 }
182 
183 /// Get the stride of a pointer access in a loop. Looks for symbolic
184 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
186  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
187  if (!PtrTy || PtrTy->isAggregateType())
188  return nullptr;
189 
190  // Try to remove a gep instruction to make the pointer (actually index at this
191  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
192  // pointer, otherwise, we are analyzing the index.
193  Value *OrigPtr = Ptr;
194 
195  // The size of the pointer access.
196  int64_t PtrAccessSize = 1;
197 
198  Ptr = stripGetElementPtr(Ptr, SE, Lp);
199  const SCEV *V = SE->getSCEV(Ptr);
200 
201  if (Ptr != OrigPtr)
202  // Strip off casts.
203  while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
204  V = C->getOperand();
205 
206  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
207  if (!S)
208  return nullptr;
209 
210  V = S->getStepRecurrence(*SE);
211  if (!V)
212  return nullptr;
213 
214  // Strip off the size of access multiplication if we are still analyzing the
215  // pointer.
216  if (OrigPtr == Ptr) {
217  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
218  if (M->getOperand(0)->getSCEVType() != scConstant)
219  return nullptr;
220 
221  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
222 
223  // Huge step value - give up.
224  if (APStepVal.getBitWidth() > 64)
225  return nullptr;
226 
227  int64_t StepVal = APStepVal.getSExtValue();
228  if (PtrAccessSize != StepVal)
229  return nullptr;
230  V = M->getOperand(1);
231  }
232  }
233 
234  // Strip off casts.
235  Type *StripedOffRecurrenceCast = nullptr;
236  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
237  StripedOffRecurrenceCast = C->getType();
238  V = C->getOperand();
239  }
240 
241  // Look for the loop invariant symbolic value.
242  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
243  if (!U)
244  return nullptr;
245 
246  Value *Stride = U->getValue();
247  if (!Lp->isLoopInvariant(Stride))
248  return nullptr;
249 
250  // If we have stripped off the recurrence cast we have to make sure that we
251  // return the value that is used in this loop so that we can replace it later.
252  if (StripedOffRecurrenceCast)
253  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
254 
255  return Stride;
256 }
257 
258 /// Given a vector and an element number, see if the scalar value is
259 /// already around as a register, for example if it were inserted then extracted
260 /// from the vector.
261 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
262  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
263  VectorType *VTy = cast<VectorType>(V->getType());
264  unsigned Width = VTy->getNumElements();
265  if (EltNo >= Width) // Out of range access.
266  return UndefValue::get(VTy->getElementType());
267 
268  if (Constant *C = dyn_cast<Constant>(V))
269  return C->getAggregateElement(EltNo);
270 
271  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
272  // If this is an insert to a variable element, we don't know what it is.
273  if (!isa<ConstantInt>(III->getOperand(2)))
274  return nullptr;
275  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
276 
277  // If this is an insert to the element we are looking for, return the
278  // inserted value.
279  if (EltNo == IIElt)
280  return III->getOperand(1);
281 
282  // Otherwise, the insertelement doesn't modify the value, recurse on its
283  // vector input.
284  return findScalarElement(III->getOperand(0), EltNo);
285  }
286 
287  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
288  unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
289  int InEl = SVI->getMaskValue(EltNo);
290  if (InEl < 0)
291  return UndefValue::get(VTy->getElementType());
292  if (InEl < (int)LHSWidth)
293  return findScalarElement(SVI->getOperand(0), InEl);
294  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
295  }
296 
297  // Extract a value from a vector add operation with a constant zero.
298  // TODO: Use getBinOpIdentity() to generalize this.
299  Value *Val; Constant *C;
300  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
301  if (Constant *Elt = C->getAggregateElement(EltNo))
302  if (Elt->isNullValue())
303  return findScalarElement(Val, EltNo);
304 
305  // Otherwise, we don't know.
306  return nullptr;
307 }
308 
309 /// Get splat value if the input is a splat vector or return nullptr.
310 /// This function is not fully general. It checks only 2 cases:
311 /// the input value is (1) a splat constant vector or (2) a sequence
312 /// of instructions that broadcasts a scalar at element 0.
314  if (isa<VectorType>(V->getType()))
315  if (auto *C = dyn_cast<Constant>(V))
316  return C->getSplatValue();
317 
318  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
319  Value *Splat;
321  m_ZeroInt()),
322  m_Value(), m_ZeroInt())))
323  return Splat;
324 
325  return nullptr;
326 }
327 
328 // This setting is based on its counterpart in value tracking, but it could be
329 // adjusted if needed.
330 const unsigned MaxDepth = 6;
331 
332 bool llvm::isSplatValue(const Value *V, unsigned Depth) {
333  assert(Depth <= MaxDepth && "Limit Search Depth");
334 
335  if (isa<VectorType>(V->getType())) {
336  if (isa<UndefValue>(V))
337  return true;
338  // FIXME: Constant splat analysis does not allow undef elements.
339  if (auto *C = dyn_cast<Constant>(V))
340  return C->getSplatValue() != nullptr;
341  }
342 
343  // FIXME: Constant splat analysis does not allow undef elements.
344  Constant *Mask;
345  if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask))))
346  return Mask->getSplatValue() != nullptr;
347 
348  // The remaining tests are all recursive, so bail out if we hit the limit.
349  if (Depth++ == MaxDepth)
350  return false;
351 
352  // If both operands of a binop are splats, the result is a splat.
353  Value *X, *Y, *Z;
354  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
355  return isSplatValue(X, Depth) && isSplatValue(Y, Depth);
356 
357  // If all operands of a select are splats, the result is a splat.
358  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
359  return isSplatValue(X, Depth) && isSplatValue(Y, Depth) &&
360  isSplatValue(Z, Depth);
361 
362  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
363 
364  return false;
365 }
366 
369  const TargetTransformInfo *TTI) {
370 
371  // DemandedBits will give us every value's live-out bits. But we want
372  // to ensure no extra casts would need to be inserted, so every DAG
373  // of connected values must have the same minimum bitwidth.
375  SmallVector<Value *, 16> Worklist;
377  SmallPtrSet<Value *, 16> Visited;
379  SmallPtrSet<Instruction *, 4> InstructionSet;
381 
382  // Determine the roots. We work bottom-up, from truncs or icmps.
383  bool SeenExtFromIllegalType = false;
384  for (auto *BB : Blocks)
385  for (auto &I : *BB) {
386  InstructionSet.insert(&I);
387 
388  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
389  !TTI->isTypeLegal(I.getOperand(0)->getType()))
390  SeenExtFromIllegalType = true;
391 
392  // Only deal with non-vector integers up to 64-bits wide.
393  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
394  !I.getType()->isVectorTy() &&
395  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
396  // Don't make work for ourselves. If we know the loaded type is legal,
397  // don't add it to the worklist.
398  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
399  continue;
400 
401  Worklist.push_back(&I);
402  Roots.insert(&I);
403  }
404  }
405  // Early exit.
406  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
407  return MinBWs;
408 
409  // Now proceed breadth-first, unioning values together.
410  while (!Worklist.empty()) {
411  Value *Val = Worklist.pop_back_val();
412  Value *Leader = ECs.getOrInsertLeaderValue(Val);
413 
414  if (Visited.count(Val))
415  continue;
416  Visited.insert(Val);
417 
418  // Non-instructions terminate a chain successfully.
419  if (!isa<Instruction>(Val))
420  continue;
421  Instruction *I = cast<Instruction>(Val);
422 
423  // If we encounter a type that is larger than 64 bits, we can't represent
424  // it so bail out.
425  if (DB.getDemandedBits(I).getBitWidth() > 64)
427 
428  uint64_t V = DB.getDemandedBits(I).getZExtValue();
429  DBits[Leader] |= V;
430  DBits[I] = V;
431 
432  // Casts, loads and instructions outside of our range terminate a chain
433  // successfully.
434  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
435  !InstructionSet.count(I))
436  continue;
437 
438  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
439  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
440  // transform anything that relies on them.
441  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
442  !I->getType()->isIntegerTy()) {
443  DBits[Leader] |= ~0ULL;
444  continue;
445  }
446 
447  // We don't modify the types of PHIs. Reductions will already have been
448  // truncated if possible, and inductions' sizes will have been chosen by
449  // indvars.
450  if (isa<PHINode>(I))
451  continue;
452 
453  if (DBits[Leader] == ~0ULL)
454  // All bits demanded, no point continuing.
455  continue;
456 
457  for (Value *O : cast<User>(I)->operands()) {
458  ECs.unionSets(Leader, O);
459  Worklist.push_back(O);
460  }
461  }
462 
463  // Now we've discovered all values, walk them to see if there are
464  // any users we didn't see. If there are, we can't optimize that
465  // chain.
466  for (auto &I : DBits)
467  for (auto *U : I.first->users())
468  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
469  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
470 
471  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
472  uint64_t LeaderDemandedBits = 0;
473  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
474  LeaderDemandedBits |= DBits[*MI];
475 
476  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
477  llvm::countLeadingZeros(LeaderDemandedBits);
478  // Round up to a power of 2
479  if (!isPowerOf2_64((uint64_t)MinBW))
480  MinBW = NextPowerOf2(MinBW);
481 
482  // We don't modify the types of PHIs. Reductions will already have been
483  // truncated if possible, and inductions' sizes will have been chosen by
484  // indvars.
485  // If we are required to shrink a PHI, abandon this entire equivalence class.
486  bool Abort = false;
487  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
488  if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
489  Abort = true;
490  break;
491  }
492  if (Abort)
493  continue;
494 
495  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
496  if (!isa<Instruction>(*MI))
497  continue;
498  Type *Ty = (*MI)->getType();
499  if (Roots.count(*MI))
500  Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
501  if (MinBW < Ty->getScalarSizeInBits())
502  MinBWs[cast<Instruction>(*MI)] = MinBW;
503  }
504  }
505 
506  return MinBWs;
507 }
508 
509 /// Add all access groups in @p AccGroups to @p List.
510 template <typename ListT>
511 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
512  // Interpret an access group as a list containing itself.
513  if (AccGroups->getNumOperands() == 0) {
514  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
515  List.insert(AccGroups);
516  return;
517  }
518 
519  for (auto &AccGroupListOp : AccGroups->operands()) {
520  auto *Item = cast<MDNode>(AccGroupListOp.get());
521  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
522  List.insert(Item);
523  }
524 }
525 
526 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
527  if (!AccGroups1)
528  return AccGroups2;
529  if (!AccGroups2)
530  return AccGroups1;
531  if (AccGroups1 == AccGroups2)
532  return AccGroups1;
533 
535  addToAccessGroupList(Union, AccGroups1);
536  addToAccessGroupList(Union, AccGroups2);
537 
538  if (Union.size() == 0)
539  return nullptr;
540  if (Union.size() == 1)
541  return cast<MDNode>(Union.front());
542 
543  LLVMContext &Ctx = AccGroups1->getContext();
544  return MDNode::get(Ctx, Union.getArrayRef());
545 }
546 
548  const Instruction *Inst2) {
549  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
550  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
551 
552  if (!MayAccessMem1 && !MayAccessMem2)
553  return nullptr;
554  if (!MayAccessMem1)
555  return Inst2->getMetadata(LLVMContext::MD_access_group);
556  if (!MayAccessMem2)
557  return Inst1->getMetadata(LLVMContext::MD_access_group);
558 
559  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
560  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
561  if (!MD1 || !MD2)
562  return nullptr;
563  if (MD1 == MD2)
564  return MD1;
565 
566  // Use set for scalable 'contains' check.
567  SmallPtrSet<Metadata *, 4> AccGroupSet2;
568  addToAccessGroupList(AccGroupSet2, MD2);
569 
570  SmallVector<Metadata *, 4> Intersection;
571  if (MD1->getNumOperands() == 0) {
572  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
573  if (AccGroupSet2.count(MD1))
574  Intersection.push_back(MD1);
575  } else {
576  for (const MDOperand &Node : MD1->operands()) {
577  auto *Item = cast<MDNode>(Node.get());
578  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
579  if (AccGroupSet2.count(Item))
580  Intersection.push_back(Item);
581  }
582  }
583 
584  if (Intersection.size() == 0)
585  return nullptr;
586  if (Intersection.size() == 1)
587  return cast<MDNode>(Intersection.front());
588 
589  LLVMContext &Ctx = Inst1->getContext();
590  return MDNode::get(Ctx, Intersection);
591 }
592 
593 /// \returns \p I after propagating metadata from \p VL.
595  Instruction *I0 = cast<Instruction>(VL[0]);
597  I0->getAllMetadataOtherThanDebugLoc(Metadata);
598 
599  for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
600  LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
601  LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
602  LLVMContext::MD_access_group}) {
603  MDNode *MD = I0->getMetadata(Kind);
604 
605  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
606  const Instruction *IJ = cast<Instruction>(VL[J]);
607  MDNode *IMD = IJ->getMetadata(Kind);
608  switch (Kind) {
609  case LLVMContext::MD_tbaa:
610  MD = MDNode::getMostGenericTBAA(MD, IMD);
611  break;
612  case LLVMContext::MD_alias_scope:
613  MD = MDNode::getMostGenericAliasScope(MD, IMD);
614  break;
615  case LLVMContext::MD_fpmath:
616  MD = MDNode::getMostGenericFPMath(MD, IMD);
617  break;
618  case LLVMContext::MD_noalias:
619  case LLVMContext::MD_nontemporal:
620  case LLVMContext::MD_invariant_load:
621  MD = MDNode::intersect(MD, IMD);
622  break;
623  case LLVMContext::MD_access_group:
624  MD = intersectAccessGroups(Inst, IJ);
625  break;
626  default:
627  llvm_unreachable("unhandled metadata");
628  }
629  }
630 
631  Inst->setMetadata(Kind, MD);
632  }
633 
634  return Inst;
635 }
636 
637 Constant *
639  const InterleaveGroup<Instruction> &Group) {
640  // All 1's means mask is not needed.
641  if (Group.getNumMembers() == Group.getFactor())
642  return nullptr;
643 
644  // TODO: support reversed access.
645  assert(!Group.isReverse() && "Reversed group not supported.");
646 
648  for (unsigned i = 0; i < VF; i++)
649  for (unsigned j = 0; j < Group.getFactor(); ++j) {
650  unsigned HasMember = Group.getMember(j) ? 1 : 0;
651  Mask.push_back(Builder.getInt1(HasMember));
652  }
653 
654  return ConstantVector::get(Mask);
655 }
656 
658  unsigned ReplicationFactor, unsigned VF) {
660  for (unsigned i = 0; i < VF; i++)
661  for (unsigned j = 0; j < ReplicationFactor; j++)
662  MaskVec.push_back(Builder.getInt32(i));
663 
664  return ConstantVector::get(MaskVec);
665 }
666 
668  unsigned NumVecs) {
670  for (unsigned i = 0; i < VF; i++)
671  for (unsigned j = 0; j < NumVecs; j++)
672  Mask.push_back(Builder.getInt32(j * VF + i));
673 
674  return ConstantVector::get(Mask);
675 }
676 
677 Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
678  unsigned Stride, unsigned VF) {
680  for (unsigned i = 0; i < VF; i++)
681  Mask.push_back(Builder.getInt32(Start + i * Stride));
682 
683  return ConstantVector::get(Mask);
684 }
685 
687  unsigned NumInts, unsigned NumUndefs) {
689  for (unsigned i = 0; i < NumInts; i++)
690  Mask.push_back(Builder.getInt32(Start + i));
691 
693  for (unsigned i = 0; i < NumUndefs; i++)
694  Mask.push_back(Undef);
695 
696  return ConstantVector::get(Mask);
697 }
698 
699 /// A helper function for concatenating vectors. This function concatenates two
700 /// vectors having the same element type. If the second vector has fewer
701 /// elements than the first, it is padded with undefs.
703  Value *V2) {
704  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
705  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
706  assert(VecTy1 && VecTy2 &&
707  VecTy1->getScalarType() == VecTy2->getScalarType() &&
708  "Expect two vectors with the same element type");
709 
710  unsigned NumElts1 = VecTy1->getNumElements();
711  unsigned NumElts2 = VecTy2->getNumElements();
712  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
713 
714  if (NumElts1 > NumElts2) {
715  // Extend with UNDEFs.
716  Constant *ExtMask =
717  createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
718  V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
719  }
720 
721  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
722  return Builder.CreateShuffleVector(V1, V2, Mask);
723 }
724 
726  unsigned NumVecs = Vecs.size();
727  assert(NumVecs > 1 && "Should be at least two vectors");
728 
729  SmallVector<Value *, 8> ResList;
730  ResList.append(Vecs.begin(), Vecs.end());
731  do {
732  SmallVector<Value *, 8> TmpList;
733  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
734  Value *V0 = ResList[i], *V1 = ResList[i + 1];
735  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
736  "Only the last vector may have a different type");
737 
738  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
739  }
740 
741  // Push the last vector if the total number of vectors is odd.
742  if (NumVecs % 2 != 0)
743  TmpList.push_back(ResList[NumVecs - 1]);
744 
745  ResList = TmpList;
746  NumVecs = ResList.size();
747  } while (NumVecs > 1);
748 
749  return ResList[0];
750 }
751 
753  auto *ConstMask = dyn_cast<Constant>(Mask);
754  if (!ConstMask)
755  return false;
756  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
757  return true;
758  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
759  ++I) {
760  if (auto *MaskElt = ConstMask->getAggregateElement(I))
761  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
762  continue;
763  return false;
764  }
765  return true;
766 }
767 
768 
770  auto *ConstMask = dyn_cast<Constant>(Mask);
771  if (!ConstMask)
772  return false;
773  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
774  return true;
775  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
776  ++I) {
777  if (auto *MaskElt = ConstMask->getAggregateElement(I))
778  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
779  continue;
780  return false;
781  }
782  return true;
783 }
784 
785 /// TODO: This is a lot like known bits, but for
786 /// vectors. Is there something we can common this with?
788 
789  const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
790  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
791  if (auto *CV = dyn_cast<ConstantVector>(Mask))
792  for (unsigned i = 0; i < VWidth; i++)
793  if (CV->getAggregateElement(i)->isNullValue())
794  DemandedElts.clearBit(i);
795  return DemandedElts;
796 }
797 
798 bool InterleavedAccessInfo::isStrided(int Stride) {
799  unsigned Factor = std::abs(Stride);
800  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
801 }
802 
803 void InterleavedAccessInfo::collectConstStrideAccesses(
805  const ValueToValueMap &Strides) {
806  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
807 
808  // Since it's desired that the load/store instructions be maintained in
809  // "program order" for the interleaved access analysis, we have to visit the
810  // blocks in the loop in reverse postorder (i.e., in a topological order).
811  // Such an ordering will ensure that any load/store that may be executed
812  // before a second load/store will precede the second load/store in
813  // AccessStrideInfo.
814  LoopBlocksDFS DFS(TheLoop);
815  DFS.perform(LI);
816  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
817  for (auto &I : *BB) {
818  auto *LI = dyn_cast<LoadInst>(&I);
819  auto *SI = dyn_cast<StoreInst>(&I);
820  if (!LI && !SI)
821  continue;
822 
824  // We don't check wrapping here because we don't know yet if Ptr will be
825  // part of a full group or a group with gaps. Checking wrapping for all
826  // pointers (even those that end up in groups with no gaps) will be overly
827  // conservative. For full groups, wrapping should be ok since if we would
828  // wrap around the address space we would do a memory access at nullptr
829  // even without the transformation. The wrapping checks are therefore
830  // deferred until after we've formed the interleaved groups.
831  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
832  /*Assume=*/true, /*ShouldCheckWrap=*/false);
833 
834  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
835  PointerType *PtrTy = cast<PointerType>(Ptr->getType());
836  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
837 
838  // An alignment of 0 means target ABI alignment.
840  if (!Alignment)
841  Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType()));
842 
843  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment);
844  }
845 }
846 
847 // Analyze interleaved accesses and collect them into interleaved load and
848 // store groups.
849 //
850 // When generating code for an interleaved load group, we effectively hoist all
851 // loads in the group to the location of the first load in program order. When
852 // generating code for an interleaved store group, we sink all stores to the
853 // location of the last store. This code motion can change the order of load
854 // and store instructions and may break dependences.
855 //
856 // The code generation strategy mentioned above ensures that we won't violate
857 // any write-after-read (WAR) dependences.
858 //
859 // E.g., for the WAR dependence: a = A[i]; // (1)
860 // A[i] = b; // (2)
861 //
862 // The store group of (2) is always inserted at or below (2), and the load
863 // group of (1) is always inserted at or above (1). Thus, the instructions will
864 // never be reordered. All other dependences are checked to ensure the
865 // correctness of the instruction reordering.
866 //
867 // The algorithm visits all memory accesses in the loop in bottom-up program
868 // order. Program order is established by traversing the blocks in the loop in
869 // reverse postorder when collecting the accesses.
870 //
871 // We visit the memory accesses in bottom-up order because it can simplify the
872 // construction of store groups in the presence of write-after-write (WAW)
873 // dependences.
874 //
875 // E.g., for the WAW dependence: A[i] = a; // (1)
876 // A[i] = b; // (2)
877 // A[i + 1] = c; // (3)
878 //
879 // We will first create a store group with (3) and (2). (1) can't be added to
880 // this group because it and (2) are dependent. However, (1) can be grouped
881 // with other accesses that may precede it in program order. Note that a
882 // bottom-up order does not imply that WAW dependences should not be checked.
884  bool EnablePredicatedInterleavedMemAccesses) {
885  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
886  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
887 
888  // Holds all accesses with a constant stride.
890  collectConstStrideAccesses(AccessStrideInfo, Strides);
891 
892  if (AccessStrideInfo.empty())
893  return;
894 
895  // Collect the dependences in the loop.
896  collectDependences();
897 
898  // Holds all interleaved store groups temporarily.
900  // Holds all interleaved load groups temporarily.
902 
903  // Search in bottom-up program order for pairs of accesses (A and B) that can
904  // form interleaved load or store groups. In the algorithm below, access A
905  // precedes access B in program order. We initialize a group for B in the
906  // outer loop of the algorithm, and then in the inner loop, we attempt to
907  // insert each A into B's group if:
908  //
909  // 1. A and B have the same stride,
910  // 2. A and B have the same memory object size, and
911  // 3. A belongs in B's group according to its distance from B.
912  //
913  // Special care is taken to ensure group formation will not break any
914  // dependences.
915  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
916  BI != E; ++BI) {
917  Instruction *B = BI->first;
918  StrideDescriptor DesB = BI->second;
919 
920  // Initialize a group for B if it has an allowable stride. Even if we don't
921  // create a group for B, we continue with the bottom-up algorithm to ensure
922  // we don't break any of B's dependences.
923  InterleaveGroup<Instruction> *Group = nullptr;
924  if (isStrided(DesB.Stride) &&
925  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
926  Group = getInterleaveGroup(B);
927  if (!Group) {
928  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
929  << '\n');
930  Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
931  }
932  if (B->mayWriteToMemory())
933  StoreGroups.insert(Group);
934  else
935  LoadGroups.insert(Group);
936  }
937 
938  for (auto AI = std::next(BI); AI != E; ++AI) {
939  Instruction *A = AI->first;
940  StrideDescriptor DesA = AI->second;
941 
942  // Our code motion strategy implies that we can't have dependences
943  // between accesses in an interleaved group and other accesses located
944  // between the first and last member of the group. Note that this also
945  // means that a group can't have more than one member at a given offset.
946  // The accesses in a group can have dependences with other accesses, but
947  // we must ensure we don't extend the boundaries of the group such that
948  // we encompass those dependent accesses.
949  //
950  // For example, assume we have the sequence of accesses shown below in a
951  // stride-2 loop:
952  //
953  // (1, 2) is a group | A[i] = a; // (1)
954  // | A[i-1] = b; // (2) |
955  // A[i-3] = c; // (3)
956  // A[i] = d; // (4) | (2, 4) is not a group
957  //
958  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
959  // but not with (4). If we did, the dependent access (3) would be within
960  // the boundaries of the (2, 4) group.
961  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
962  // If a dependence exists and A is already in a group, we know that A
963  // must be a store since A precedes B and WAR dependences are allowed.
964  // Thus, A would be sunk below B. We release A's group to prevent this
965  // illegal code motion. A will then be free to form another group with
966  // instructions that precede it.
967  if (isInterleaved(A)) {
968  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
969 
970  LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
971  "dependence between " << *A << " and "<< *B << '\n');
972 
973  StoreGroups.remove(StoreGroup);
974  releaseGroup(StoreGroup);
975  }
976 
977  // If a dependence exists and A is not already in a group (or it was
978  // and we just released it), B might be hoisted above A (if B is a
979  // load) or another store might be sunk below A (if B is a store). In
980  // either case, we can't add additional instructions to B's group. B
981  // will only form a group with instructions that it precedes.
982  break;
983  }
984 
985  // At this point, we've checked for illegal code motion. If either A or B
986  // isn't strided, there's nothing left to do.
987  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
988  continue;
989 
990  // Ignore A if it's already in a group or isn't the same kind of memory
991  // operation as B.
992  // Note that mayReadFromMemory() isn't mutually exclusive to
993  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
994  // here, canVectorizeMemory() should have returned false - except for the
995  // case we asked for optimization remarks.
996  if (isInterleaved(A) ||
997  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
998  (A->mayWriteToMemory() != B->mayWriteToMemory()))
999  continue;
1000 
1001  // Check rules 1 and 2. Ignore A if its stride or size is different from
1002  // that of B.
1003  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1004  continue;
1005 
1006  // Ignore A if the memory object of A and B don't belong to the same
1007  // address space
1009  continue;
1010 
1011  // Calculate the distance from A to B.
1012  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1013  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1014  if (!DistToB)
1015  continue;
1016  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1017 
1018  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1019  // size.
1020  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1021  continue;
1022 
1023  // All members of a predicated interleave-group must have the same predicate,
1024  // and currently must reside in the same BB.
1025  BasicBlock *BlockA = A->getParent();
1026  BasicBlock *BlockB = B->getParent();
1027  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1028  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1029  continue;
1030 
1031  // The index of A is the index of B plus A's distance to B in multiples
1032  // of the size.
1033  int IndexA =
1034  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1035 
1036  // Try to insert A into B's group.
1037  if (Group->insertMember(A, IndexA, DesA.Alignment)) {
1038  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1039  << " into the interleave group with" << *B
1040  << '\n');
1041  InterleaveGroupMap[A] = Group;
1042 
1043  // Set the first load in program order as the insert position.
1044  if (A->mayReadFromMemory())
1045  Group->setInsertPos(A);
1046  }
1047  } // Iteration over A accesses.
1048  } // Iteration over B accesses.
1049 
1050  // Remove interleaved store groups with gaps.
1051  for (auto *Group : StoreGroups)
1052  if (Group->getNumMembers() != Group->getFactor()) {
1053  LLVM_DEBUG(
1054  dbgs() << "LV: Invalidate candidate interleaved store group due "
1055  "to gaps.\n");
1056  releaseGroup(Group);
1057  }
1058  // Remove interleaved groups with gaps (currently only loads) whose memory
1059  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1060  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1061  // not check wrapping (see documentation there).
1062  // FORNOW we use Assume=false;
1063  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1064  // of runtime SCEV assumptions checks (thereby potentially failing to
1065  // vectorize altogether).
1066  // Additional optional optimizations:
1067  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1068  // wrap then we can deduce that all pointers in the group don't wrap.
1069  // This means that we can forcefully peel the loop in order to only have to
1070  // check the first pointer for no-wrap. When we'll change to use Assume=true
1071  // we'll only need at most one runtime check per interleaved group.
1072  for (auto *Group : LoadGroups) {
1073  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1074  // load would wrap around the address space we would do a memory access at
1075  // nullptr even without the transformation.
1076  if (Group->getNumMembers() == Group->getFactor())
1077  continue;
1078 
1079  // Case 2: If first and last members of the group don't wrap this implies
1080  // that all the pointers in the group don't wrap.
1081  // So we check only group member 0 (which is always guaranteed to exist),
1082  // and group member Factor - 1; If the latter doesn't exist we rely on
1083  // peeling (if it is a non-reversed accsess -- see Case 3).
1084  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
1085  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
1086  /*ShouldCheckWrap=*/true)) {
1087  LLVM_DEBUG(
1088  dbgs() << "LV: Invalidate candidate interleaved group due to "
1089  "first group member potentially pointer-wrapping.\n");
1090  releaseGroup(Group);
1091  continue;
1092  }
1093  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1094  if (LastMember) {
1095  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1096  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1097  /*ShouldCheckWrap=*/true)) {
1098  LLVM_DEBUG(
1099  dbgs() << "LV: Invalidate candidate interleaved group due to "
1100  "last group member potentially pointer-wrapping.\n");
1101  releaseGroup(Group);
1102  }
1103  } else {
1104  // Case 3: A non-reversed interleaved load group with gaps: We need
1105  // to execute at least one scalar epilogue iteration. This will ensure
1106  // we don't speculatively access memory out-of-bounds. We only need
1107  // to look for a member at index factor - 1, since every group must have
1108  // a member at index zero.
1109  if (Group->isReverse()) {
1110  LLVM_DEBUG(
1111  dbgs() << "LV: Invalidate candidate interleaved group due to "
1112  "a reverse access with gaps.\n");
1113  releaseGroup(Group);
1114  continue;
1115  }
1116  LLVM_DEBUG(
1117  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1118  RequiresScalarEpilogue = true;
1119  }
1120  }
1121 }
1122 
1124  // If no group had triggered the requirement to create an epilogue loop,
1125  // there is nothing to do.
1126  if (!requiresScalarEpilogue())
1127  return;
1128 
1129  // Avoid releasing a Group twice.
1131  for (auto &I : InterleaveGroupMap) {
1132  InterleaveGroup<Instruction> *Group = I.second;
1133  if (Group->requiresScalarEpilogue())
1134  DelSet.insert(Group);
1135  }
1136  for (auto *Ptr : DelSet) {
1137  LLVM_DEBUG(
1138  dbgs()
1139  << "LV: Invalidate candidate interleaved group due to gaps that "
1140  "require a scalar epilogue (not allowed under optsize) and cannot "
1141  "be masked (not enabled). \n");
1142  releaseGroup(Ptr);
1143  }
1144 
1145  RequiresScalarEpilogue = false;
1146 }
1147 
1148 template <typename InstT>
1149 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1150  llvm_unreachable("addMetadata can only be used for Instruction");
1151 }
1152 
1153 namespace llvm {
1154 template <>
1157  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1158  [](std::pair<int, Instruction *> p) { return p.second; });
1159  propagateMetadata(NewInst, VL);
1160 }
1161 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:267
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:710
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:468
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:403
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
const T & front() const
Return the first element of the SetVector.
Definition: SetVector.h:122
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
iterator begin() const
Definition: ArrayRef.h:136
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
reverse_iterator rend()
Definition: MapVector.h:76
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:922
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
const unsigned MaxDepth
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:863
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
An instruction for reading from memory.
Definition: Instructions.h:169
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1277
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:391
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1517
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:407
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:383
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
bool isReverse() const
Definition: VectorUtils.h:397
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1264
bool empty() const
Definition: MapVector.h:79
RPOIterator endRPO() const
Definition: LoopIterator.h:140
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:398
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:759
const APInt & getAPInt() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1583
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
op_range operands() const
Definition: Metadata.h:1066
LLVMContext & getContext() const
Definition: Metadata.h:923
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1461
bool isSplatValue(const Value *V, unsigned Depth=0)
Return true if the input value is known to be a vector with all identical elements (potentially inclu...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:137
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:909
An instruction for storing to memory.
Definition: Instructions.h:325
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1103
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Value * getOperand(unsigned i) const
Definition: User.h:169
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:447
Class to represent pointers.
Definition: DerivedTypes.h:579
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:359
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:307
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:318
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
static unsigned getScalarSizeInBits(Type *Ty)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:470
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:458
constexpr double e
Definition: MathExtras.h:57
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
static double log2(double V)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, Return true if all of the elements of this predicate...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:672
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:225
size_t size() const
Definition: SmallVector.h:52
Constant * getSplatValue() const
If this is a splat vector constant, meaning that all of the elements have the same value...
Definition: Constants.cpp:1399
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1253
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:137
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2336
reverse_iterator rbegin()
Definition: MapVector.h:74
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:92
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:566
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:97
Class to represent vector types.
Definition: DerivedTypes.h:432
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:420
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x="" i1>="">, return an APInt (of bitwidth Y) for each lane which ...
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
const NodeList & List
Definition: RDFGraph.cpp:201
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1228
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
Definition: Instructions.h:983
uint32_t getFactor() const
Definition: VectorUtils.h:398
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1247
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
IRTranslator LLVM IR MI
This pass exposes codegen information to IR-level passes.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:929
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
MaybeAlign getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Root of the metadata hierarchy.
Definition: Metadata.h:57
uint32_t getNumMembers() const
Definition: VectorUtils.h:400
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1110
Type * getElementType() const
Definition: DerivedTypes.h:598
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1242
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:1006
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:479
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:43
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:542