LLVM  8.0.0svn
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines vectorizer utilities.
11 //
12 //===----------------------------------------------------------------------===//
13 
17 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/IR/Value.h"
28 
29 #define DEBUG_TYPE "vectorutils"
30 
31 using namespace llvm;
32 using namespace llvm::PatternMatch;
33 
34 /// Maximum factor for an interleaved memory access.
36  "max-interleave-group-factor", cl::Hidden,
37  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38  cl::init(8));
39 
40 /// Identify if the intrinsic is trivially vectorizable.
41 /// This method returns true if the intrinsic's argument types are all
42 /// scalars for the scalar form of the intrinsic and all vectors for
43 /// the vector form of the intrinsic.
45  switch (ID) {
46  case Intrinsic::sqrt:
47  case Intrinsic::sin:
48  case Intrinsic::cos:
49  case Intrinsic::exp:
50  case Intrinsic::exp2:
51  case Intrinsic::log:
52  case Intrinsic::log10:
53  case Intrinsic::log2:
54  case Intrinsic::fabs:
55  case Intrinsic::minnum:
56  case Intrinsic::maxnum:
57  case Intrinsic::copysign:
58  case Intrinsic::floor:
59  case Intrinsic::ceil:
60  case Intrinsic::trunc:
61  case Intrinsic::rint:
62  case Intrinsic::nearbyint:
63  case Intrinsic::round:
64  case Intrinsic::bswap:
65  case Intrinsic::bitreverse:
66  case Intrinsic::ctpop:
67  case Intrinsic::pow:
68  case Intrinsic::fma:
69  case Intrinsic::fmuladd:
70  case Intrinsic::ctlz:
71  case Intrinsic::cttz:
72  case Intrinsic::powi:
73  case Intrinsic::canonicalize:
74  return true;
75  default:
76  return false;
77  }
78 }
79 
80 /// Identifies if the intrinsic has a scalar operand. It check for
81 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
83  unsigned ScalarOpdIdx) {
84  switch (ID) {
85  case Intrinsic::ctlz:
86  case Intrinsic::cttz:
87  case Intrinsic::powi:
88  return (ScalarOpdIdx == 1);
89  default:
90  return false;
91  }
92 }
93 
94 /// Returns intrinsic ID for call.
95 /// For the input call instruction it finds mapping intrinsic and returns
96 /// its ID, in case it does not found it return not_intrinsic.
98  const TargetLibraryInfo *TLI) {
100  if (ID == Intrinsic::not_intrinsic)
102 
103  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
104  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
105  ID == Intrinsic::sideeffect)
106  return ID;
108 }
109 
110 /// Find the operand of the GEP that should be checked for consecutive
111 /// stores. This ignores trailing indices that have no effect on the final
112 /// pointer.
114  const DataLayout &DL = Gep->getModule()->getDataLayout();
115  unsigned LastOperand = Gep->getNumOperands() - 1;
116  unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
117 
118  // Walk backwards and try to peel off zeros.
119  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
120  // Find the type we're currently indexing into.
121  gep_type_iterator GEPTI = gep_type_begin(Gep);
122  std::advance(GEPTI, LastOperand - 2);
123 
124  // If it's a type with the same allocation size as the result of the GEP we
125  // can peel off the zero index.
126  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
127  break;
128  --LastOperand;
129  }
130 
131  return LastOperand;
132 }
133 
134 /// If the argument is a GEP, then returns the operand identified by
135 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
136 /// operand, it returns that instead.
139  if (!GEP)
140  return Ptr;
141 
142  unsigned InductionOperand = getGEPInductionOperand(GEP);
143 
144  // Check that all of the gep indices are uniform except for our induction
145  // operand.
146  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
147  if (i != InductionOperand &&
148  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
149  return Ptr;
150  return GEP->getOperand(InductionOperand);
151 }
152 
153 /// If a value has only one user that is a CastInst, return it.
155  Value *UniqueCast = nullptr;
156  for (User *U : Ptr->users()) {
157  CastInst *CI = dyn_cast<CastInst>(U);
158  if (CI && CI->getType() == Ty) {
159  if (!UniqueCast)
160  UniqueCast = CI;
161  else
162  return nullptr;
163  }
164  }
165  return UniqueCast;
166 }
167 
168 /// Get the stride of a pointer access in a loop. Looks for symbolic
169 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
171  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
172  if (!PtrTy || PtrTy->isAggregateType())
173  return nullptr;
174 
175  // Try to remove a gep instruction to make the pointer (actually index at this
176  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
177  // pointer, otherwise, we are analyzing the index.
178  Value *OrigPtr = Ptr;
179 
180  // The size of the pointer access.
181  int64_t PtrAccessSize = 1;
182 
183  Ptr = stripGetElementPtr(Ptr, SE, Lp);
184  const SCEV *V = SE->getSCEV(Ptr);
185 
186  if (Ptr != OrigPtr)
187  // Strip off casts.
188  while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
189  V = C->getOperand();
190 
191  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
192  if (!S)
193  return nullptr;
194 
195  V = S->getStepRecurrence(*SE);
196  if (!V)
197  return nullptr;
198 
199  // Strip off the size of access multiplication if we are still analyzing the
200  // pointer.
201  if (OrigPtr == Ptr) {
202  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
203  if (M->getOperand(0)->getSCEVType() != scConstant)
204  return nullptr;
205 
206  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
207 
208  // Huge step value - give up.
209  if (APStepVal.getBitWidth() > 64)
210  return nullptr;
211 
212  int64_t StepVal = APStepVal.getSExtValue();
213  if (PtrAccessSize != StepVal)
214  return nullptr;
215  V = M->getOperand(1);
216  }
217  }
218 
219  // Strip off casts.
220  Type *StripedOffRecurrenceCast = nullptr;
221  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
222  StripedOffRecurrenceCast = C->getType();
223  V = C->getOperand();
224  }
225 
226  // Look for the loop invariant symbolic value.
227  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
228  if (!U)
229  return nullptr;
230 
231  Value *Stride = U->getValue();
232  if (!Lp->isLoopInvariant(Stride))
233  return nullptr;
234 
235  // If we have stripped off the recurrence cast we have to make sure that we
236  // return the value that is used in this loop so that we can replace it later.
237  if (StripedOffRecurrenceCast)
238  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
239 
240  return Stride;
241 }
242 
243 /// Given a vector and an element number, see if the scalar value is
244 /// already around as a register, for example if it were inserted then extracted
245 /// from the vector.
246 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
247  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
248  VectorType *VTy = cast<VectorType>(V->getType());
249  unsigned Width = VTy->getNumElements();
250  if (EltNo >= Width) // Out of range access.
251  return UndefValue::get(VTy->getElementType());
252 
253  if (Constant *C = dyn_cast<Constant>(V))
254  return C->getAggregateElement(EltNo);
255 
256  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
257  // If this is an insert to a variable element, we don't know what it is.
258  if (!isa<ConstantInt>(III->getOperand(2)))
259  return nullptr;
260  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
261 
262  // If this is an insert to the element we are looking for, return the
263  // inserted value.
264  if (EltNo == IIElt)
265  return III->getOperand(1);
266 
267  // Otherwise, the insertelement doesn't modify the value, recurse on its
268  // vector input.
269  return findScalarElement(III->getOperand(0), EltNo);
270  }
271 
272  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
273  unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
274  int InEl = SVI->getMaskValue(EltNo);
275  if (InEl < 0)
276  return UndefValue::get(VTy->getElementType());
277  if (InEl < (int)LHSWidth)
278  return findScalarElement(SVI->getOperand(0), InEl);
279  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
280  }
281 
282  // Extract a value from a vector add operation with a constant zero.
283  // TODO: Use getBinOpIdentity() to generalize this.
284  Value *Val; Constant *C;
285  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
286  if (Constant *Elt = C->getAggregateElement(EltNo))
287  if (Elt->isNullValue())
288  return findScalarElement(Val, EltNo);
289 
290  // Otherwise, we don't know.
291  return nullptr;
292 }
293 
294 /// Get splat value if the input is a splat vector or return nullptr.
295 /// This function is not fully general. It checks only 2 cases:
296 /// the input value is (1) a splat constants vector or (2) a sequence
297 /// of instructions that broadcast a single value into a vector.
298 ///
300 
301  if (auto *C = dyn_cast<Constant>(V))
302  if (isa<VectorType>(V->getType()))
303  return C->getSplatValue();
304 
305  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
306  if (!ShuffleInst)
307  return nullptr;
308  // All-zero (or undef) shuffle mask elements.
309  for (int MaskElt : ShuffleInst->getShuffleMask())
310  if (MaskElt != 0 && MaskElt != -1)
311  return nullptr;
312  // The first shuffle source is 'insertelement' with index 0.
313  auto *InsertEltInst =
314  dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
315  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
316  !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
317  return nullptr;
318 
319  return InsertEltInst->getOperand(1);
320 }
321 
324  const TargetTransformInfo *TTI) {
325 
326  // DemandedBits will give us every value's live-out bits. But we want
327  // to ensure no extra casts would need to be inserted, so every DAG
328  // of connected values must have the same minimum bitwidth.
330  SmallVector<Value *, 16> Worklist;
332  SmallPtrSet<Value *, 16> Visited;
334  SmallPtrSet<Instruction *, 4> InstructionSet;
336 
337  // Determine the roots. We work bottom-up, from truncs or icmps.
338  bool SeenExtFromIllegalType = false;
339  for (auto *BB : Blocks)
340  for (auto &I : *BB) {
341  InstructionSet.insert(&I);
342 
343  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
344  !TTI->isTypeLegal(I.getOperand(0)->getType()))
345  SeenExtFromIllegalType = true;
346 
347  // Only deal with non-vector integers up to 64-bits wide.
348  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
349  !I.getType()->isVectorTy() &&
350  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
351  // Don't make work for ourselves. If we know the loaded type is legal,
352  // don't add it to the worklist.
353  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
354  continue;
355 
356  Worklist.push_back(&I);
357  Roots.insert(&I);
358  }
359  }
360  // Early exit.
361  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
362  return MinBWs;
363 
364  // Now proceed breadth-first, unioning values together.
365  while (!Worklist.empty()) {
366  Value *Val = Worklist.pop_back_val();
367  Value *Leader = ECs.getOrInsertLeaderValue(Val);
368 
369  if (Visited.count(Val))
370  continue;
371  Visited.insert(Val);
372 
373  // Non-instructions terminate a chain successfully.
374  if (!isa<Instruction>(Val))
375  continue;
376  Instruction *I = cast<Instruction>(Val);
377 
378  // If we encounter a type that is larger than 64 bits, we can't represent
379  // it so bail out.
380  if (DB.getDemandedBits(I).getBitWidth() > 64)
382 
383  uint64_t V = DB.getDemandedBits(I).getZExtValue();
384  DBits[Leader] |= V;
385  DBits[I] = V;
386 
387  // Casts, loads and instructions outside of our range terminate a chain
388  // successfully.
389  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
390  !InstructionSet.count(I))
391  continue;
392 
393  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
394  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
395  // transform anything that relies on them.
396  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
397  !I->getType()->isIntegerTy()) {
398  DBits[Leader] |= ~0ULL;
399  continue;
400  }
401 
402  // We don't modify the types of PHIs. Reductions will already have been
403  // truncated if possible, and inductions' sizes will have been chosen by
404  // indvars.
405  if (isa<PHINode>(I))
406  continue;
407 
408  if (DBits[Leader] == ~0ULL)
409  // All bits demanded, no point continuing.
410  continue;
411 
412  for (Value *O : cast<User>(I)->operands()) {
413  ECs.unionSets(Leader, O);
414  Worklist.push_back(O);
415  }
416  }
417 
418  // Now we've discovered all values, walk them to see if there are
419  // any users we didn't see. If there are, we can't optimize that
420  // chain.
421  for (auto &I : DBits)
422  for (auto *U : I.first->users())
423  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
424  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
425 
426  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
427  uint64_t LeaderDemandedBits = 0;
428  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
429  LeaderDemandedBits |= DBits[*MI];
430 
431  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
432  llvm::countLeadingZeros(LeaderDemandedBits);
433  // Round up to a power of 2
434  if (!isPowerOf2_64((uint64_t)MinBW))
435  MinBW = NextPowerOf2(MinBW);
436 
437  // We don't modify the types of PHIs. Reductions will already have been
438  // truncated if possible, and inductions' sizes will have been chosen by
439  // indvars.
440  // If we are required to shrink a PHI, abandon this entire equivalence class.
441  bool Abort = false;
442  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
443  if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
444  Abort = true;
445  break;
446  }
447  if (Abort)
448  continue;
449 
450  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
451  if (!isa<Instruction>(*MI))
452  continue;
453  Type *Ty = (*MI)->getType();
454  if (Roots.count(*MI))
455  Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
456  if (MinBW < Ty->getScalarSizeInBits())
457  MinBWs[cast<Instruction>(*MI)] = MinBW;
458  }
459  }
460 
461  return MinBWs;
462 }
463 
464 /// \returns \p I after propagating metadata from \p VL.
466  Instruction *I0 = cast<Instruction>(VL[0]);
468  I0->getAllMetadataOtherThanDebugLoc(Metadata);
469 
470  for (auto Kind :
474  MDNode *MD = I0->getMetadata(Kind);
475 
476  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
477  const Instruction *IJ = cast<Instruction>(VL[J]);
478  MDNode *IMD = IJ->getMetadata(Kind);
479  switch (Kind) {
481  MD = MDNode::getMostGenericTBAA(MD, IMD);
482  break;
484  MD = MDNode::getMostGenericAliasScope(MD, IMD);
485  break;
487  MD = MDNode::getMostGenericFPMath(MD, IMD);
488  break;
492  MD = MDNode::intersect(MD, IMD);
493  break;
494  default:
495  llvm_unreachable("unhandled metadata");
496  }
497  }
498 
499  Inst->setMetadata(Kind, MD);
500  }
501 
502  return Inst;
503 }
504 
506  unsigned NumVecs) {
508  for (unsigned i = 0; i < VF; i++)
509  for (unsigned j = 0; j < NumVecs; j++)
510  Mask.push_back(Builder.getInt32(j * VF + i));
511 
512  return ConstantVector::get(Mask);
513 }
514 
515 Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
516  unsigned Stride, unsigned VF) {
518  for (unsigned i = 0; i < VF; i++)
519  Mask.push_back(Builder.getInt32(Start + i * Stride));
520 
521  return ConstantVector::get(Mask);
522 }
523 
525  unsigned NumInts, unsigned NumUndefs) {
527  for (unsigned i = 0; i < NumInts; i++)
528  Mask.push_back(Builder.getInt32(Start + i));
529 
531  for (unsigned i = 0; i < NumUndefs; i++)
532  Mask.push_back(Undef);
533 
534  return ConstantVector::get(Mask);
535 }
536 
537 /// A helper function for concatenating vectors. This function concatenates two
538 /// vectors having the same element type. If the second vector has fewer
539 /// elements than the first, it is padded with undefs.
541  Value *V2) {
542  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
543  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
544  assert(VecTy1 && VecTy2 &&
545  VecTy1->getScalarType() == VecTy2->getScalarType() &&
546  "Expect two vectors with the same element type");
547 
548  unsigned NumElts1 = VecTy1->getNumElements();
549  unsigned NumElts2 = VecTy2->getNumElements();
550  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
551 
552  if (NumElts1 > NumElts2) {
553  // Extend with UNDEFs.
554  Constant *ExtMask =
555  createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
556  V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
557  }
558 
559  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
560  return Builder.CreateShuffleVector(V1, V2, Mask);
561 }
562 
564  unsigned NumVecs = Vecs.size();
565  assert(NumVecs > 1 && "Should be at least two vectors");
566 
567  SmallVector<Value *, 8> ResList;
568  ResList.append(Vecs.begin(), Vecs.end());
569  do {
570  SmallVector<Value *, 8> TmpList;
571  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
572  Value *V0 = ResList[i], *V1 = ResList[i + 1];
573  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
574  "Only the last vector may have a different type");
575 
576  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
577  }
578 
579  // Push the last vector if the total number of vectors is odd.
580  if (NumVecs % 2 != 0)
581  TmpList.push_back(ResList[NumVecs - 1]);
582 
583  ResList = TmpList;
584  NumVecs = ResList.size();
585  } while (NumVecs > 1);
586 
587  return ResList[0];
588 }
589 
590 bool InterleavedAccessInfo::isStrided(int Stride) {
591  unsigned Factor = std::abs(Stride);
592  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
593 }
594 
595 void InterleavedAccessInfo::collectConstStrideAccesses(
597  const ValueToValueMap &Strides) {
598  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
599 
600  // Since it's desired that the load/store instructions be maintained in
601  // "program order" for the interleaved access analysis, we have to visit the
602  // blocks in the loop in reverse postorder (i.e., in a topological order).
603  // Such an ordering will ensure that any load/store that may be executed
604  // before a second load/store will precede the second load/store in
605  // AccessStrideInfo.
606  LoopBlocksDFS DFS(TheLoop);
607  DFS.perform(LI);
608  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
609  for (auto &I : *BB) {
610  auto *LI = dyn_cast<LoadInst>(&I);
611  auto *SI = dyn_cast<StoreInst>(&I);
612  if (!LI && !SI)
613  continue;
614 
616  // We don't check wrapping here because we don't know yet if Ptr will be
617  // part of a full group or a group with gaps. Checking wrapping for all
618  // pointers (even those that end up in groups with no gaps) will be overly
619  // conservative. For full groups, wrapping should be ok since if we would
620  // wrap around the address space we would do a memory access at nullptr
621  // even without the transformation. The wrapping checks are therefore
622  // deferred until after we've formed the interleaved groups.
623  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
624  /*Assume=*/true, /*ShouldCheckWrap=*/false);
625 
626  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
627  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
628  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
629 
630  // An alignment of 0 means target ABI alignment.
631  unsigned Align = getLoadStoreAlignment(&I);
632  if (!Align)
633  Align = DL.getABITypeAlignment(PtrTy->getElementType());
634 
635  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
636  }
637 }
638 
639 // Analyze interleaved accesses and collect them into interleaved load and
640 // store groups.
641 //
642 // When generating code for an interleaved load group, we effectively hoist all
643 // loads in the group to the location of the first load in program order. When
644 // generating code for an interleaved store group, we sink all stores to the
645 // location of the last store. This code motion can change the order of load
646 // and store instructions and may break dependences.
647 //
648 // The code generation strategy mentioned above ensures that we won't violate
649 // any write-after-read (WAR) dependences.
650 //
651 // E.g., for the WAR dependence: a = A[i]; // (1)
652 // A[i] = b; // (2)
653 //
654 // The store group of (2) is always inserted at or below (2), and the load
655 // group of (1) is always inserted at or above (1). Thus, the instructions will
656 // never be reordered. All other dependences are checked to ensure the
657 // correctness of the instruction reordering.
658 //
659 // The algorithm visits all memory accesses in the loop in bottom-up program
660 // order. Program order is established by traversing the blocks in the loop in
661 // reverse postorder when collecting the accesses.
662 //
663 // We visit the memory accesses in bottom-up order because it can simplify the
664 // construction of store groups in the presence of write-after-write (WAW)
665 // dependences.
666 //
667 // E.g., for the WAW dependence: A[i] = a; // (1)
668 // A[i] = b; // (2)
669 // A[i + 1] = c; // (3)
670 //
671 // We will first create a store group with (3) and (2). (1) can't be added to
672 // this group because it and (2) are dependent. However, (1) can be grouped
673 // with other accesses that may precede it in program order. Note that a
674 // bottom-up order does not imply that WAW dependences should not be checked.
676  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
677  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
678 
679  // Holds all accesses with a constant stride.
681  collectConstStrideAccesses(AccessStrideInfo, Strides);
682 
683  if (AccessStrideInfo.empty())
684  return;
685 
686  // Collect the dependences in the loop.
687  collectDependences();
688 
689  // Holds all interleaved store groups temporarily.
691  // Holds all interleaved load groups temporarily.
693 
694  // Search in bottom-up program order for pairs of accesses (A and B) that can
695  // form interleaved load or store groups. In the algorithm below, access A
696  // precedes access B in program order. We initialize a group for B in the
697  // outer loop of the algorithm, and then in the inner loop, we attempt to
698  // insert each A into B's group if:
699  //
700  // 1. A and B have the same stride,
701  // 2. A and B have the same memory object size, and
702  // 3. A belongs in B's group according to its distance from B.
703  //
704  // Special care is taken to ensure group formation will not break any
705  // dependences.
706  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
707  BI != E; ++BI) {
708  Instruction *B = BI->first;
709  StrideDescriptor DesB = BI->second;
710 
711  // Initialize a group for B if it has an allowable stride. Even if we don't
712  // create a group for B, we continue with the bottom-up algorithm to ensure
713  // we don't break any of B's dependences.
714  InterleaveGroup *Group = nullptr;
715  if (isStrided(DesB.Stride)) {
716  Group = getInterleaveGroup(B);
717  if (!Group) {
718  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
719  << '\n');
720  Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
721  }
722  if (B->mayWriteToMemory())
723  StoreGroups.insert(Group);
724  else
725  LoadGroups.insert(Group);
726  }
727 
728  for (auto AI = std::next(BI); AI != E; ++AI) {
729  Instruction *A = AI->first;
730  StrideDescriptor DesA = AI->second;
731 
732  // Our code motion strategy implies that we can't have dependences
733  // between accesses in an interleaved group and other accesses located
734  // between the first and last member of the group. Note that this also
735  // means that a group can't have more than one member at a given offset.
736  // The accesses in a group can have dependences with other accesses, but
737  // we must ensure we don't extend the boundaries of the group such that
738  // we encompass those dependent accesses.
739  //
740  // For example, assume we have the sequence of accesses shown below in a
741  // stride-2 loop:
742  //
743  // (1, 2) is a group | A[i] = a; // (1)
744  // | A[i-1] = b; // (2) |
745  // A[i-3] = c; // (3)
746  // A[i] = d; // (4) | (2, 4) is not a group
747  //
748  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
749  // but not with (4). If we did, the dependent access (3) would be within
750  // the boundaries of the (2, 4) group.
751  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
752  // If a dependence exists and A is already in a group, we know that A
753  // must be a store since A precedes B and WAR dependences are allowed.
754  // Thus, A would be sunk below B. We release A's group to prevent this
755  // illegal code motion. A will then be free to form another group with
756  // instructions that precede it.
757  if (isInterleaved(A)) {
758  InterleaveGroup *StoreGroup = getInterleaveGroup(A);
759  StoreGroups.remove(StoreGroup);
760  releaseGroup(StoreGroup);
761  }
762 
763  // If a dependence exists and A is not already in a group (or it was
764  // and we just released it), B might be hoisted above A (if B is a
765  // load) or another store might be sunk below A (if B is a store). In
766  // either case, we can't add additional instructions to B's group. B
767  // will only form a group with instructions that it precedes.
768  break;
769  }
770 
771  // At this point, we've checked for illegal code motion. If either A or B
772  // isn't strided, there's nothing left to do.
773  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
774  continue;
775 
776  // Ignore A if it's already in a group or isn't the same kind of memory
777  // operation as B.
778  // Note that mayReadFromMemory() isn't mutually exclusive to
779  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
780  // here, canVectorizeMemory() should have returned false - except for the
781  // case we asked for optimization remarks.
782  if (isInterleaved(A) ||
783  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
784  (A->mayWriteToMemory() != B->mayWriteToMemory()))
785  continue;
786 
787  // Check rules 1 and 2. Ignore A if its stride or size is different from
788  // that of B.
789  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
790  continue;
791 
792  // Ignore A if the memory object of A and B don't belong to the same
793  // address space
795  continue;
796 
797  // Calculate the distance from A to B.
798  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
799  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
800  if (!DistToB)
801  continue;
802  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
803 
804  // Check rule 3. Ignore A if its distance to B is not a multiple of the
805  // size.
806  if (DistanceToB % static_cast<int64_t>(DesB.Size))
807  continue;
808 
809  // Ignore A if either A or B is in a predicated block. Although we
810  // currently prevent group formation for predicated accesses, we may be
811  // able to relax this limitation in the future once we handle more
812  // complicated blocks.
813  if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
814  continue;
815 
816  // The index of A is the index of B plus A's distance to B in multiples
817  // of the size.
818  int IndexA =
819  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
820 
821  // Try to insert A into B's group.
822  if (Group->insertMember(A, IndexA, DesA.Align)) {
823  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
824  << " into the interleave group with" << *B
825  << '\n');
826  InterleaveGroupMap[A] = Group;
827 
828  // Set the first load in program order as the insert position.
829  if (A->mayReadFromMemory())
830  Group->setInsertPos(A);
831  }
832  } // Iteration over A accesses.
833  } // Iteration over B accesses.
834 
835  // Remove interleaved store groups with gaps.
836  for (InterleaveGroup *Group : StoreGroups)
837  if (Group->getNumMembers() != Group->getFactor()) {
838  LLVM_DEBUG(
839  dbgs() << "LV: Invalidate candidate interleaved store group due "
840  "to gaps.\n");
841  releaseGroup(Group);
842  }
843  // Remove interleaved groups with gaps (currently only loads) whose memory
844  // accesses may wrap around. We have to revisit the getPtrStride analysis,
845  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
846  // not check wrapping (see documentation there).
847  // FORNOW we use Assume=false;
848  // TODO: Change to Assume=true but making sure we don't exceed the threshold
849  // of runtime SCEV assumptions checks (thereby potentially failing to
850  // vectorize altogether).
851  // Additional optional optimizations:
852  // TODO: If we are peeling the loop and we know that the first pointer doesn't
853  // wrap then we can deduce that all pointers in the group don't wrap.
854  // This means that we can forcefully peel the loop in order to only have to
855  // check the first pointer for no-wrap. When we'll change to use Assume=true
856  // we'll only need at most one runtime check per interleaved group.
857  for (InterleaveGroup *Group : LoadGroups) {
858  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
859  // load would wrap around the address space we would do a memory access at
860  // nullptr even without the transformation.
861  if (Group->getNumMembers() == Group->getFactor())
862  continue;
863 
864  // Case 2: If first and last members of the group don't wrap this implies
865  // that all the pointers in the group don't wrap.
866  // So we check only group member 0 (which is always guaranteed to exist),
867  // and group member Factor - 1; If the latter doesn't exist we rely on
868  // peeling (if it is a non-reveresed accsess -- see Case 3).
869  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
870  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
871  /*ShouldCheckWrap=*/true)) {
872  LLVM_DEBUG(
873  dbgs() << "LV: Invalidate candidate interleaved group due to "
874  "first group member potentially pointer-wrapping.\n");
875  releaseGroup(Group);
876  continue;
877  }
878  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
879  if (LastMember) {
880  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
881  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
882  /*ShouldCheckWrap=*/true)) {
883  LLVM_DEBUG(
884  dbgs() << "LV: Invalidate candidate interleaved group due to "
885  "last group member potentially pointer-wrapping.\n");
886  releaseGroup(Group);
887  }
888  } else {
889  // Case 3: A non-reversed interleaved load group with gaps: We need
890  // to execute at least one scalar epilogue iteration. This will ensure
891  // we don't speculatively access memory out-of-bounds. We only need
892  // to look for a member at index factor - 1, since every group must have
893  // a member at index zero.
894  if (Group->isReverse()) {
895  LLVM_DEBUG(
896  dbgs() << "LV: Invalidate candidate interleaved group due to "
897  "a reverse access with gaps.\n");
898  releaseGroup(Group);
899  continue;
900  }
901  LLVM_DEBUG(
902  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
903  RequiresScalarEpilogue = true;
904  }
905  }
906 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:240
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1557
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:373
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
reverse_iterator rend()
Definition: MapVector.h:77
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:923
bool insertMember(Instruction *Instr, int Index, unsigned NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:229
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:864
An instruction for reading from memory.
Definition: Instructions.h:168
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:97
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:347
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
void setInsertPos(Instruction *Inst)
Definition: VectorUtils.h:281
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:392
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
unsigned getIndex(Instruction *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:272
bool empty() const
Definition: MapVector.h:80
RPOIterator endRPO() const
Definition: LoopIterator.h:141
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:158
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:639
const APInt & getAPInt() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1569
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:217
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:910
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:310
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:753
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:338
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
Value * getOperand(unsigned i_nocapture) const
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:841
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
static double log2(double V)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:640
size_t size() const
Definition: SmallVector.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void analyzeInterleaving()
Analyze the interleaved accesses and collect them in interleave groups.
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:138
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1948
reverse_iterator rbegin()
Definition: MapVector.h:75
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:82
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:98
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
iterator_range< user_iterator > users()
Definition: Value.h:400
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
Definition: Instructions.h:943
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
const unsigned Kind
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:137
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
IRTranslator LLVM IR MI
This pass exposes codegen information to IR-level passes.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:930
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1056
Type * getElementType() const
Definition: DerivedTypes.h:486
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
std::vector< uint32_t > Metadata
PAL metadata represented as a vector.
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44