LLVM 17.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
22#include "llvm/IR/Constants.h"
24#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/Value.h"
28
29#define DEBUG_TYPE "vectorutils"
30
31using namespace llvm;
32using namespace llvm::PatternMatch;
33
34/// Maximum factor for an interleaved memory access.
36 "max-interleave-group-factor", cl::Hidden,
37 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38 cl::init(8));
39
40/// Return true if all of the intrinsic's arguments and return type are scalars
41/// for the scalar form of the intrinsic, and vectors for the vector form of the
42/// intrinsic (except operands that are marked as always being scalar by
43/// isVectorIntrinsicWithScalarOpAtArg).
45 switch (ID) {
46 case Intrinsic::abs: // Begin integer bit-manipulation.
47 case Intrinsic::bswap:
48 case Intrinsic::bitreverse:
49 case Intrinsic::ctpop:
50 case Intrinsic::ctlz:
51 case Intrinsic::cttz:
52 case Intrinsic::fshl:
53 case Intrinsic::fshr:
54 case Intrinsic::smax:
55 case Intrinsic::smin:
56 case Intrinsic::umax:
57 case Intrinsic::umin:
58 case Intrinsic::sadd_sat:
59 case Intrinsic::ssub_sat:
60 case Intrinsic::uadd_sat:
61 case Intrinsic::usub_sat:
62 case Intrinsic::smul_fix:
63 case Intrinsic::smul_fix_sat:
64 case Intrinsic::umul_fix:
65 case Intrinsic::umul_fix_sat:
66 case Intrinsic::sqrt: // Begin floating-point.
67 case Intrinsic::sin:
68 case Intrinsic::cos:
69 case Intrinsic::exp:
70 case Intrinsic::exp2:
71 case Intrinsic::log:
72 case Intrinsic::log10:
73 case Intrinsic::log2:
74 case Intrinsic::fabs:
75 case Intrinsic::minnum:
76 case Intrinsic::maxnum:
77 case Intrinsic::minimum:
78 case Intrinsic::maximum:
79 case Intrinsic::copysign:
80 case Intrinsic::floor:
81 case Intrinsic::ceil:
82 case Intrinsic::trunc:
83 case Intrinsic::rint:
84 case Intrinsic::nearbyint:
85 case Intrinsic::round:
86 case Intrinsic::roundeven:
87 case Intrinsic::pow:
88 case Intrinsic::fma:
89 case Intrinsic::fmuladd:
90 case Intrinsic::powi:
91 case Intrinsic::canonicalize:
92 case Intrinsic::fptosi_sat:
93 case Intrinsic::fptoui_sat:
94 return true;
95 default:
96 return false;
97 }
98}
99
100/// Identifies if the vector form of the intrinsic has a scalar operand.
102 unsigned ScalarOpdIdx) {
103 switch (ID) {
104 case Intrinsic::abs:
105 case Intrinsic::ctlz:
106 case Intrinsic::cttz:
107 case Intrinsic::powi:
108 return (ScalarOpdIdx == 1);
109 case Intrinsic::smul_fix:
110 case Intrinsic::smul_fix_sat:
111 case Intrinsic::umul_fix:
112 case Intrinsic::umul_fix_sat:
113 return (ScalarOpdIdx == 2);
114 default:
115 return false;
116 }
117}
118
120 unsigned OpdIdx) {
121 switch (ID) {
122 case Intrinsic::fptosi_sat:
123 case Intrinsic::fptoui_sat:
124 return OpdIdx == 0;
125 case Intrinsic::powi:
126 return OpdIdx == 1;
127 default:
128 return false;
129 }
130}
131
132/// Returns intrinsic ID for call.
133/// For the input call instruction it finds mapping intrinsic and returns
134/// its ID, in case it does not found it return not_intrinsic.
136 const TargetLibraryInfo *TLI) {
140
141 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
142 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
143 ID == Intrinsic::experimental_noalias_scope_decl ||
144 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
145 return ID;
147}
148
149/// Find the operand of the GEP that should be checked for consecutive
150/// stores. This ignores trailing indices that have no effect on the final
151/// pointer.
153 const DataLayout &DL = Gep->getModule()->getDataLayout();
154 unsigned LastOperand = Gep->getNumOperands() - 1;
155 TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
156
157 // Walk backwards and try to peel off zeros.
158 while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
159 // Find the type we're currently indexing into.
161 std::advance(GEPTI, LastOperand - 2);
162
163 // If it's a type with the same allocation size as the result of the GEP we
164 // can peel off the zero index.
165 if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
166 break;
167 --LastOperand;
168 }
169
170 return LastOperand;
171}
172
173/// If the argument is a GEP, then returns the operand identified by
174/// getGEPInductionOperand. However, if there is some other non-loop-invariant
175/// operand, it returns that instead.
177 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
178 if (!GEP)
179 return Ptr;
180
181 unsigned InductionOperand = getGEPInductionOperand(GEP);
182
183 // Check that all of the gep indices are uniform except for our induction
184 // operand.
185 for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
186 if (i != InductionOperand &&
187 !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
188 return Ptr;
189 return GEP->getOperand(InductionOperand);
190}
191
192/// If a value has only one user that is a CastInst, return it.
194 Value *UniqueCast = nullptr;
195 for (User *U : Ptr->users()) {
196 CastInst *CI = dyn_cast<CastInst>(U);
197 if (CI && CI->getType() == Ty) {
198 if (!UniqueCast)
199 UniqueCast = CI;
200 else
201 return nullptr;
202 }
203 }
204 return UniqueCast;
205}
206
207/// Get the stride of a pointer access in a loop. Looks for symbolic
208/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
210 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
211 if (!PtrTy || PtrTy->isAggregateType())
212 return nullptr;
213
214 // Try to remove a gep instruction to make the pointer (actually index at this
215 // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
216 // pointer, otherwise, we are analyzing the index.
217 Value *OrigPtr = Ptr;
218
219 // The size of the pointer access.
220 int64_t PtrAccessSize = 1;
221
222 Ptr = stripGetElementPtr(Ptr, SE, Lp);
223 const SCEV *V = SE->getSCEV(Ptr);
224
225 if (Ptr != OrigPtr)
226 // Strip off casts.
227 while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
228 V = C->getOperand();
229
230 const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
231 if (!S)
232 return nullptr;
233
234 V = S->getStepRecurrence(*SE);
235 if (!V)
236 return nullptr;
237
238 // Strip off the size of access multiplication if we are still analyzing the
239 // pointer.
240 if (OrigPtr == Ptr) {
241 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
242 if (M->getOperand(0)->getSCEVType() != scConstant)
243 return nullptr;
244
245 const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
246
247 // Huge step value - give up.
248 if (APStepVal.getBitWidth() > 64)
249 return nullptr;
250
251 int64_t StepVal = APStepVal.getSExtValue();
252 if (PtrAccessSize != StepVal)
253 return nullptr;
254 V = M->getOperand(1);
255 }
256 }
257
258 // Strip off casts.
259 Type *StripedOffRecurrenceCast = nullptr;
260 if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
261 StripedOffRecurrenceCast = C->getType();
262 V = C->getOperand();
263 }
264
265 // Look for the loop invariant symbolic value.
266 const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
267 if (!U)
268 return nullptr;
269
270 Value *Stride = U->getValue();
271 if (!Lp->isLoopInvariant(Stride))
272 return nullptr;
273
274 // If we have stripped off the recurrence cast we have to make sure that we
275 // return the value that is used in this loop so that we can replace it later.
276 if (StripedOffRecurrenceCast)
277 Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
278
279 return Stride;
280}
281
282/// Given a vector and an element number, see if the scalar value is
283/// already around as a register, for example if it were inserted then extracted
284/// from the vector.
285Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
286 assert(V->getType()->isVectorTy() && "Not looking at a vector?");
287 VectorType *VTy = cast<VectorType>(V->getType());
288 // For fixed-length vector, return undef for out of range access.
289 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
290 unsigned Width = FVTy->getNumElements();
291 if (EltNo >= Width)
292 return UndefValue::get(FVTy->getElementType());
293 }
294
295 if (Constant *C = dyn_cast<Constant>(V))
296 return C->getAggregateElement(EltNo);
297
298 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
299 // If this is an insert to a variable element, we don't know what it is.
300 if (!isa<ConstantInt>(III->getOperand(2)))
301 return nullptr;
302 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
303
304 // If this is an insert to the element we are looking for, return the
305 // inserted value.
306 if (EltNo == IIElt)
307 return III->getOperand(1);
308
309 // Guard against infinite loop on malformed, unreachable IR.
310 if (III == III->getOperand(0))
311 return nullptr;
312
313 // Otherwise, the insertelement doesn't modify the value, recurse on its
314 // vector input.
315 return findScalarElement(III->getOperand(0), EltNo);
316 }
317
318 ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
319 // Restrict the following transformation to fixed-length vector.
320 if (SVI && isa<FixedVectorType>(SVI->getType())) {
321 unsigned LHSWidth =
322 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
323 int InEl = SVI->getMaskValue(EltNo);
324 if (InEl < 0)
325 return UndefValue::get(VTy->getElementType());
326 if (InEl < (int)LHSWidth)
327 return findScalarElement(SVI->getOperand(0), InEl);
328 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
329 }
330
331 // Extract a value from a vector add operation with a constant zero.
332 // TODO: Use getBinOpIdentity() to generalize this.
333 Value *Val; Constant *C;
334 if (match(V, m_Add(m_Value(Val), m_Constant(C))))
335 if (Constant *Elt = C->getAggregateElement(EltNo))
336 if (Elt->isNullValue())
337 return findScalarElement(Val, EltNo);
338
339 // If the vector is a splat then we can trivially find the scalar element.
340 if (isa<ScalableVectorType>(VTy))
341 if (Value *Splat = getSplatValue(V))
342 if (EltNo < VTy->getElementCount().getKnownMinValue())
343 return Splat;
344
345 // Otherwise, we don't know.
346 return nullptr;
347}
348
350 int SplatIndex = -1;
351 for (int M : Mask) {
352 // Ignore invalid (undefined) mask elements.
353 if (M < 0)
354 continue;
355
356 // There can be only 1 non-negative mask element value if this is a splat.
357 if (SplatIndex != -1 && SplatIndex != M)
358 return -1;
359
360 // Initialize the splat index to the 1st non-negative mask element.
361 SplatIndex = M;
362 }
363 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
364 return SplatIndex;
365}
366
367/// Get splat value if the input is a splat vector or return nullptr.
368/// This function is not fully general. It checks only 2 cases:
369/// the input value is (1) a splat constant vector or (2) a sequence
370/// of instructions that broadcasts a scalar at element 0.
372 if (isa<VectorType>(V->getType()))
373 if (auto *C = dyn_cast<Constant>(V))
374 return C->getSplatValue();
375
376 // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
377 Value *Splat;
378 if (match(V,
380 m_Value(), m_ZeroMask())))
381 return Splat;
382
383 return nullptr;
384}
385
386bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
387 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
388
389 if (isa<VectorType>(V->getType())) {
390 if (isa<UndefValue>(V))
391 return true;
392 // FIXME: We can allow undefs, but if Index was specified, we may want to
393 // check that the constant is defined at that index.
394 if (auto *C = dyn_cast<Constant>(V))
395 return C->getSplatValue() != nullptr;
396 }
397
398 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
399 // FIXME: We can safely allow undefs here. If Index was specified, we will
400 // check that the mask elt is defined at the required index.
401 if (!all_equal(Shuf->getShuffleMask()))
402 return false;
403
404 // Match any index.
405 if (Index == -1)
406 return true;
407
408 // Match a specific element. The mask should be defined at and match the
409 // specified index.
410 return Shuf->getMaskValue(Index) == Index;
411 }
412
413 // The remaining tests are all recursive, so bail out if we hit the limit.
415 return false;
416
417 // If both operands of a binop are splats, the result is a splat.
418 Value *X, *Y, *Z;
419 if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
421
422 // If all operands of a select are splats, the result is a splat.
423 if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
424 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
426
427 // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
428
429 return false;
430}
431
433 const APInt &DemandedElts, APInt &DemandedLHS,
434 APInt &DemandedRHS, bool AllowUndefElts) {
435 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);
436
437 // Early out if we don't demand any elements.
438 if (DemandedElts.isZero())
439 return true;
440
441 // Simple case of a shuffle with zeroinitializer.
442 if (all_of(Mask, [](int Elt) { return Elt == 0; })) {
443 DemandedLHS.setBit(0);
444 return true;
445 }
446
447 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
448 int M = Mask[I];
449 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
450 "Invalid shuffle mask constant");
451
452 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))
453 continue;
454
455 // For undef elements, we don't know anything about the common state of
456 // the shuffle result.
457 if (M < 0)
458 return false;
459
460 if (M < SrcWidth)
461 DemandedLHS.setBit(M);
462 else
463 DemandedRHS.setBit(M - SrcWidth);
464 }
465
466 return true;
467}
468
470 SmallVectorImpl<int> &ScaledMask) {
471 assert(Scale > 0 && "Unexpected scaling factor");
472
473 // Fast-path: if no scaling, then it is just a copy.
474 if (Scale == 1) {
475 ScaledMask.assign(Mask.begin(), Mask.end());
476 return;
477 }
478
479 ScaledMask.clear();
480 for (int MaskElt : Mask) {
481 if (MaskElt >= 0) {
482 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
483 "Overflowed 32-bits");
484 }
485 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
486 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
487 }
488}
489
491 SmallVectorImpl<int> &ScaledMask) {
492 assert(Scale > 0 && "Unexpected scaling factor");
493
494 // Fast-path: if no scaling, then it is just a copy.
495 if (Scale == 1) {
496 ScaledMask.assign(Mask.begin(), Mask.end());
497 return true;
498 }
499
500 // We must map the original elements down evenly to a type with less elements.
501 int NumElts = Mask.size();
502 if (NumElts % Scale != 0)
503 return false;
504
505 ScaledMask.clear();
506 ScaledMask.reserve(NumElts / Scale);
507
508 // Step through the input mask by splitting into Scale-sized slices.
509 do {
510 ArrayRef<int> MaskSlice = Mask.take_front(Scale);
511 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
512
513 // The first element of the slice determines how we evaluate this slice.
514 int SliceFront = MaskSlice.front();
515 if (SliceFront < 0) {
516 // Negative values (undef or other "sentinel" values) must be equal across
517 // the entire slice.
518 if (!all_equal(MaskSlice))
519 return false;
520 ScaledMask.push_back(SliceFront);
521 } else {
522 // A positive mask element must be cleanly divisible.
523 if (SliceFront % Scale != 0)
524 return false;
525 // Elements of the slice must be consecutive.
526 for (int i = 1; i < Scale; ++i)
527 if (MaskSlice[i] != SliceFront + i)
528 return false;
529 ScaledMask.push_back(SliceFront / Scale);
530 }
531 Mask = Mask.drop_front(Scale);
532 } while (!Mask.empty());
533
534 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
535
536 // All elements of the original mask can be scaled down to map to the elements
537 // of a mask with wider elements.
538 return true;
539}
540
542 SmallVectorImpl<int> &ScaledMask) {
543 std::array<SmallVector<int, 16>, 2> TmpMasks;
544 SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];
545 ArrayRef<int> InputMask = Mask;
546 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {
547 while (widenShuffleMaskElts(Scale, InputMask, *Output)) {
548 InputMask = *Output;
549 std::swap(Output, Tmp);
550 }
551 }
552 ScaledMask.assign(InputMask.begin(), InputMask.end());
553}
554
556 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
557 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
558 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
559 function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
560 SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
561 // Try to perform better estimation of the permutation.
562 // 1. Split the source/destination vectors into real registers.
563 // 2. Do the mask analysis to identify which real registers are
564 // permuted.
565 int Sz = Mask.size();
566 unsigned SzDest = Sz / NumOfDestRegs;
567 unsigned SzSrc = Sz / NumOfSrcRegs;
568 for (unsigned I = 0; I < NumOfDestRegs; ++I) {
569 auto &RegMasks = Res[I];
570 RegMasks.assign(NumOfSrcRegs, {});
571 // Check that the values in dest registers are in the one src
572 // register.
573 for (unsigned K = 0; K < SzDest; ++K) {
574 int Idx = I * SzDest + K;
575 if (Idx == Sz)
576 break;
577 if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
578 continue;
579 int SrcRegIdx = Mask[Idx] / SzSrc;
580 // Add a cost of PermuteTwoSrc for each new source register permute,
581 // if we have more than one source registers.
582 if (RegMasks[SrcRegIdx].empty())
583 RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
584 RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
585 }
586 }
587 // Process split mask.
588 for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
589 auto &Dest = Res[I];
590 int NumSrcRegs =
591 count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
592 switch (NumSrcRegs) {
593 case 0:
594 // No input vectors were used!
595 NoInputAction();
596 break;
597 case 1: {
598 // Find the only mask with at least single undef mask elem.
599 auto *It =
600 find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
601 unsigned SrcReg = std::distance(Dest.begin(), It);
602 SingleInputAction(*It, SrcReg, I);
603 break;
604 }
605 default: {
606 // The first mask is a permutation of a single register. Since we have >2
607 // input registers to shuffle, we merge the masks for 2 first registers
608 // and generate a shuffle of 2 registers rather than the reordering of the
609 // first register and then shuffle with the second register. Next,
610 // generate the shuffles of the resulting register + the remaining
611 // registers from the list.
612 auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
613 ArrayRef<int> SecondMask) {
614 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
615 if (SecondMask[Idx] != UndefMaskElem) {
616 assert(FirstMask[Idx] == UndefMaskElem &&
617 "Expected undefined mask element.");
618 FirstMask[Idx] = SecondMask[Idx] + VF;
619 }
620 }
621 };
622 auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
623 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
624 if (Mask[Idx] != UndefMaskElem)
625 Mask[Idx] = Idx;
626 }
627 };
628 int SecondIdx;
629 do {
630 int FirstIdx = -1;
631 SecondIdx = -1;
632 MutableArrayRef<int> FirstMask, SecondMask;
633 for (unsigned I = 0; I < NumOfDestRegs; ++I) {
634 SmallVectorImpl<int> &RegMask = Dest[I];
635 if (RegMask.empty())
636 continue;
637
638 if (FirstIdx == SecondIdx) {
639 FirstIdx = I;
640 FirstMask = RegMask;
641 continue;
642 }
643 SecondIdx = I;
644 SecondMask = RegMask;
645 CombineMasks(FirstMask, SecondMask);
646 ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
647 NormalizeMask(FirstMask);
648 RegMask.clear();
649 SecondMask = FirstMask;
650 SecondIdx = FirstIdx;
651 }
652 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
653 CombineMasks(SecondMask, FirstMask);
654 ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
655 Dest[FirstIdx].clear();
656 NormalizeMask(SecondMask);
657 }
658 } while (SecondIdx >= 0);
659 break;
660 }
661 }
662 }
663}
664
667 const TargetTransformInfo *TTI) {
668
669 // DemandedBits will give us every value's live-out bits. But we want
670 // to ensure no extra casts would need to be inserted, so every DAG
671 // of connected values must have the same minimum bitwidth.
677 SmallPtrSet<Instruction *, 4> InstructionSet;
679
680 // Determine the roots. We work bottom-up, from truncs or icmps.
681 bool SeenExtFromIllegalType = false;
682 for (auto *BB : Blocks)
683 for (auto &I : *BB) {
684 InstructionSet.insert(&I);
685
686 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
687 !TTI->isTypeLegal(I.getOperand(0)->getType()))
688 SeenExtFromIllegalType = true;
689
690 // Only deal with non-vector integers up to 64-bits wide.
691 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
692 !I.getType()->isVectorTy() &&
693 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
694 // Don't make work for ourselves. If we know the loaded type is legal,
695 // don't add it to the worklist.
696 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
697 continue;
698
699 Worklist.push_back(&I);
700 Roots.insert(&I);
701 }
702 }
703 // Early exit.
704 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
705 return MinBWs;
706
707 // Now proceed breadth-first, unioning values together.
708 while (!Worklist.empty()) {
709 Value *Val = Worklist.pop_back_val();
710 Value *Leader = ECs.getOrInsertLeaderValue(Val);
711
712 if (!Visited.insert(Val).second)
713 continue;
714
715 // Non-instructions terminate a chain successfully.
716 if (!isa<Instruction>(Val))
717 continue;
718 Instruction *I = cast<Instruction>(Val);
719
720 // If we encounter a type that is larger than 64 bits, we can't represent
721 // it so bail out.
722 if (DB.getDemandedBits(I).getBitWidth() > 64)
724
725 uint64_t V = DB.getDemandedBits(I).getZExtValue();
726 DBits[Leader] |= V;
727 DBits[I] = V;
728
729 // Casts, loads and instructions outside of our range terminate a chain
730 // successfully.
731 if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
732 !InstructionSet.count(I))
733 continue;
734
735 // Unsafe casts terminate a chain unsuccessfully. We can't do anything
736 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
737 // transform anything that relies on them.
738 if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
739 !I->getType()->isIntegerTy()) {
740 DBits[Leader] |= ~0ULL;
741 continue;
742 }
743
744 // We don't modify the types of PHIs. Reductions will already have been
745 // truncated if possible, and inductions' sizes will have been chosen by
746 // indvars.
747 if (isa<PHINode>(I))
748 continue;
749
750 if (DBits[Leader] == ~0ULL)
751 // All bits demanded, no point continuing.
752 continue;
753
754 for (Value *O : cast<User>(I)->operands()) {
755 ECs.unionSets(Leader, O);
756 Worklist.push_back(O);
757 }
758 }
759
760 // Now we've discovered all values, walk them to see if there are
761 // any users we didn't see. If there are, we can't optimize that
762 // chain.
763 for (auto &I : DBits)
764 for (auto *U : I.first->users())
765 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
766 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
767
768 for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
769 uint64_t LeaderDemandedBits = 0;
770 for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
771 LeaderDemandedBits |= DBits[M];
772
773 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
774 // Round up to a power of 2
775 MinBW = llvm::bit_ceil(MinBW);
776
777 // We don't modify the types of PHIs. Reductions will already have been
778 // truncated if possible, and inductions' sizes will have been chosen by
779 // indvars.
780 // If we are required to shrink a PHI, abandon this entire equivalence class.
781 bool Abort = false;
782 for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
783 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
784 Abort = true;
785 break;
786 }
787 if (Abort)
788 continue;
789
790 for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
791 if (!isa<Instruction>(M))
792 continue;
793 Type *Ty = M->getType();
794 if (Roots.count(M))
795 Ty = cast<Instruction>(M)->getOperand(0)->getType();
796 if (MinBW < Ty->getScalarSizeInBits())
797 MinBWs[cast<Instruction>(M)] = MinBW;
798 }
799 }
800
801 return MinBWs;
802}
803
804/// Add all access groups in @p AccGroups to @p List.
805template <typename ListT>
806static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
807 // Interpret an access group as a list containing itself.
808 if (AccGroups->getNumOperands() == 0) {
809 assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
810 List.insert(AccGroups);
811 return;
812 }
813
814 for (const auto &AccGroupListOp : AccGroups->operands()) {
815 auto *Item = cast<MDNode>(AccGroupListOp.get());
816 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
817 List.insert(Item);
818 }
819}
820
821MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
822 if (!AccGroups1)
823 return AccGroups2;
824 if (!AccGroups2)
825 return AccGroups1;
826 if (AccGroups1 == AccGroups2)
827 return AccGroups1;
828
830 addToAccessGroupList(Union, AccGroups1);
831 addToAccessGroupList(Union, AccGroups2);
832
833 if (Union.size() == 0)
834 return nullptr;
835 if (Union.size() == 1)
836 return cast<MDNode>(Union.front());
837
838 LLVMContext &Ctx = AccGroups1->getContext();
839 return MDNode::get(Ctx, Union.getArrayRef());
840}
841
843 const Instruction *Inst2) {
844 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
845 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
846
847 if (!MayAccessMem1 && !MayAccessMem2)
848 return nullptr;
849 if (!MayAccessMem1)
850 return Inst2->getMetadata(LLVMContext::MD_access_group);
851 if (!MayAccessMem2)
852 return Inst1->getMetadata(LLVMContext::MD_access_group);
853
854 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
855 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
856 if (!MD1 || !MD2)
857 return nullptr;
858 if (MD1 == MD2)
859 return MD1;
860
861 // Use set for scalable 'contains' check.
862 SmallPtrSet<Metadata *, 4> AccGroupSet2;
863 addToAccessGroupList(AccGroupSet2, MD2);
864
865 SmallVector<Metadata *, 4> Intersection;
866 if (MD1->getNumOperands() == 0) {
867 assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
868 if (AccGroupSet2.count(MD1))
869 Intersection.push_back(MD1);
870 } else {
871 for (const MDOperand &Node : MD1->operands()) {
872 auto *Item = cast<MDNode>(Node.get());
873 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
874 if (AccGroupSet2.count(Item))
875 Intersection.push_back(Item);
876 }
877 }
878
879 if (Intersection.size() == 0)
880 return nullptr;
881 if (Intersection.size() == 1)
882 return cast<MDNode>(Intersection.front());
883
884 LLVMContext &Ctx = Inst1->getContext();
885 return MDNode::get(Ctx, Intersection);
886}
887
888/// \returns \p I after propagating metadata from \p VL.
890 if (VL.empty())
891 return Inst;
892 Instruction *I0 = cast<Instruction>(VL[0]);
895
896 for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
897 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
898 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
899 LLVMContext::MD_access_group}) {
900 MDNode *MD = I0->getMetadata(Kind);
901
902 for (int J = 1, E = VL.size(); MD && J != E; ++J) {
903 const Instruction *IJ = cast<Instruction>(VL[J]);
904 MDNode *IMD = IJ->getMetadata(Kind);
905 switch (Kind) {
906 case LLVMContext::MD_tbaa:
907 MD = MDNode::getMostGenericTBAA(MD, IMD);
908 break;
909 case LLVMContext::MD_alias_scope:
911 break;
912 case LLVMContext::MD_fpmath:
913 MD = MDNode::getMostGenericFPMath(MD, IMD);
914 break;
915 case LLVMContext::MD_noalias:
916 case LLVMContext::MD_nontemporal:
917 case LLVMContext::MD_invariant_load:
918 MD = MDNode::intersect(MD, IMD);
919 break;
920 case LLVMContext::MD_access_group:
921 MD = intersectAccessGroups(Inst, IJ);
922 break;
923 default:
924 llvm_unreachable("unhandled metadata");
925 }
926 }
927
928 Inst->setMetadata(Kind, MD);
929 }
930
931 return Inst;
932}
933
934Constant *
936 const InterleaveGroup<Instruction> &Group) {
937 // All 1's means mask is not needed.
938 if (Group.getNumMembers() == Group.getFactor())
939 return nullptr;
940
941 // TODO: support reversed access.
942 assert(!Group.isReverse() && "Reversed group not supported.");
943
945 for (unsigned i = 0; i < VF; i++)
946 for (unsigned j = 0; j < Group.getFactor(); ++j) {
947 unsigned HasMember = Group.getMember(j) ? 1 : 0;
948 Mask.push_back(Builder.getInt1(HasMember));
949 }
950
951 return ConstantVector::get(Mask);
952}
953
955llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
956 SmallVector<int, 16> MaskVec;
957 for (unsigned i = 0; i < VF; i++)
958 for (unsigned j = 0; j < ReplicationFactor; j++)
959 MaskVec.push_back(i);
960
961 return MaskVec;
962}
963
965 unsigned NumVecs) {
967 for (unsigned i = 0; i < VF; i++)
968 for (unsigned j = 0; j < NumVecs; j++)
969 Mask.push_back(j * VF + i);
970
971 return Mask;
972}
973
975llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
977 for (unsigned i = 0; i < VF; i++)
978 Mask.push_back(Start + i * Stride);
979
980 return Mask;
981}
982
984 unsigned NumInts,
985 unsigned NumUndefs) {
987 for (unsigned i = 0; i < NumInts; i++)
988 Mask.push_back(Start + i);
989
990 for (unsigned i = 0; i < NumUndefs; i++)
991 Mask.push_back(-1);
992
993 return Mask;
994}
995
997 unsigned NumElts) {
998 // Avoid casts in the loop and make sure we have a reasonable number.
999 int NumEltsSigned = NumElts;
1000 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
1001
1002 // If the mask chooses an element from operand 1, reduce it to choose from the
1003 // corresponding element of operand 0. Undef mask elements are unchanged.
1004 SmallVector<int, 16> UnaryMask;
1005 for (int MaskElt : Mask) {
1006 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
1007 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1008 UnaryMask.push_back(UnaryElt);
1009 }
1010 return UnaryMask;
1011}
1012
1013/// A helper function for concatenating vectors. This function concatenates two
1014/// vectors having the same element type. If the second vector has fewer
1015/// elements than the first, it is padded with undefs.
1017 Value *V2) {
1018 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
1019 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1020 assert(VecTy1 && VecTy2 &&
1021 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1022 "Expect two vectors with the same element type");
1023
1024 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1025 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1026 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
1027
1028 if (NumElts1 > NumElts2) {
1029 // Extend with UNDEFs.
1030 V2 = Builder.CreateShuffleVector(
1031 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
1032 }
1033
1034 return Builder.CreateShuffleVector(
1035 V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
1036}
1037
1039 ArrayRef<Value *> Vecs) {
1040 unsigned NumVecs = Vecs.size();
1041 assert(NumVecs > 1 && "Should be at least two vectors");
1042
1044 ResList.append(Vecs.begin(), Vecs.end());
1045 do {
1047 for (unsigned i = 0; i < NumVecs - 1; i += 2) {
1048 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1049 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1050 "Only the last vector may have a different type");
1051
1052 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1053 }
1054
1055 // Push the last vector if the total number of vectors is odd.
1056 if (NumVecs % 2 != 0)
1057 TmpList.push_back(ResList[NumVecs - 1]);
1058
1059 ResList = TmpList;
1060 NumVecs = ResList.size();
1061 } while (NumVecs > 1);
1062
1063 return ResList[0];
1064}
1065
1067 assert(isa<VectorType>(Mask->getType()) &&
1068 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1069 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1070 1 &&
1071 "Mask must be a vector of i1");
1072
1073 auto *ConstMask = dyn_cast<Constant>(Mask);
1074 if (!ConstMask)
1075 return false;
1076 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1077 return true;
1078 if (isa<ScalableVectorType>(ConstMask->getType()))
1079 return false;
1080 for (unsigned
1081 I = 0,
1082 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1083 I != E; ++I) {
1084 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1085 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1086 continue;
1087 return false;
1088 }
1089 return true;
1090}
1091
1093 assert(isa<VectorType>(Mask->getType()) &&
1094 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1095 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1096 1 &&
1097 "Mask must be a vector of i1");
1098
1099 auto *ConstMask = dyn_cast<Constant>(Mask);
1100 if (!ConstMask)
1101 return false;
1102 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1103 return true;
1104 if (isa<ScalableVectorType>(ConstMask->getType()))
1105 return false;
1106 for (unsigned
1107 I = 0,
1108 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1109 I != E; ++I) {
1110 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1111 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1112 continue;
1113 return false;
1114 }
1115 return true;
1116}
1117
1118/// TODO: This is a lot like known bits, but for
1119/// vectors. Is there something we can common this with?
1121 assert(isa<FixedVectorType>(Mask->getType()) &&
1122 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1123 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1124 1 &&
1125 "Mask must be a fixed width vector of i1");
1126
1127 const unsigned VWidth =
1128 cast<FixedVectorType>(Mask->getType())->getNumElements();
1129 APInt DemandedElts = APInt::getAllOnes(VWidth);
1130 if (auto *CV = dyn_cast<ConstantVector>(Mask))
1131 for (unsigned i = 0; i < VWidth; i++)
1132 if (CV->getAggregateElement(i)->isNullValue())
1133 DemandedElts.clearBit(i);
1134 return DemandedElts;
1135}
1136
1137bool InterleavedAccessInfo::isStrided(int Stride) {
1138 unsigned Factor = std::abs(Stride);
1139 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1140}
1141
1142void InterleavedAccessInfo::collectConstStrideAccesses(
1144 const ValueToValueMap &Strides) {
1145 auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
1146
1147 // Since it's desired that the load/store instructions be maintained in
1148 // "program order" for the interleaved access analysis, we have to visit the
1149 // blocks in the loop in reverse postorder (i.e., in a topological order).
1150 // Such an ordering will ensure that any load/store that may be executed
1151 // before a second load/store will precede the second load/store in
1152 // AccessStrideInfo.
1153 LoopBlocksDFS DFS(TheLoop);
1154 DFS.perform(LI);
1155 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1156 for (auto &I : *BB) {
1158 if (!Ptr)
1159 continue;
1160 Type *ElementTy = getLoadStoreType(&I);
1161
1162 // Currently, codegen doesn't support cases where the type size doesn't
1163 // match the alloc size. Skip them for now.
1164 uint64_t Size = DL.getTypeAllocSize(ElementTy);
1165 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
1166 continue;
1167
1168 // We don't check wrapping here because we don't know yet if Ptr will be
1169 // part of a full group or a group with gaps. Checking wrapping for all
1170 // pointers (even those that end up in groups with no gaps) will be overly
1171 // conservative. For full groups, wrapping should be ok since if we would
1172 // wrap around the address space we would do a memory access at nullptr
1173 // even without the transformation. The wrapping checks are therefore
1174 // deferred until after we've formed the interleaved groups.
1175 int64_t Stride =
1176 getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
1177 /*Assume=*/true, /*ShouldCheckWrap=*/false).value_or(0);
1178
1179 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1180 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1182 }
1183}
1184
1185// Analyze interleaved accesses and collect them into interleaved load and
1186// store groups.
1187//
1188// When generating code for an interleaved load group, we effectively hoist all
1189// loads in the group to the location of the first load in program order. When
1190// generating code for an interleaved store group, we sink all stores to the
1191// location of the last store. This code motion can change the order of load
1192// and store instructions and may break dependences.
1193//
1194// The code generation strategy mentioned above ensures that we won't violate
1195// any write-after-read (WAR) dependences.
1196//
1197// E.g., for the WAR dependence: a = A[i]; // (1)
1198// A[i] = b; // (2)
1199//
1200// The store group of (2) is always inserted at or below (2), and the load
1201// group of (1) is always inserted at or above (1). Thus, the instructions will
1202// never be reordered. All other dependences are checked to ensure the
1203// correctness of the instruction reordering.
1204//
1205// The algorithm visits all memory accesses in the loop in bottom-up program
1206// order. Program order is established by traversing the blocks in the loop in
1207// reverse postorder when collecting the accesses.
1208//
1209// We visit the memory accesses in bottom-up order because it can simplify the
1210// construction of store groups in the presence of write-after-write (WAW)
1211// dependences.
1212//
1213// E.g., for the WAW dependence: A[i] = a; // (1)
1214// A[i] = b; // (2)
1215// A[i + 1] = c; // (3)
1216//
1217// We will first create a store group with (3) and (2). (1) can't be added to
1218// this group because it and (2) are dependent. However, (1) can be grouped
1219// with other accesses that may precede it in program order. Note that a
1220// bottom-up order does not imply that WAW dependences should not be checked.
1222 bool EnablePredicatedInterleavedMemAccesses) {
1223 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1224 const ValueToValueMap &Strides = LAI->getSymbolicStrides();
1225
1226 // Holds all accesses with a constant stride.
1228 collectConstStrideAccesses(AccessStrideInfo, Strides);
1229
1230 if (AccessStrideInfo.empty())
1231 return;
1232
1233 // Collect the dependences in the loop.
1234 collectDependences();
1235
1236 // Holds all interleaved store groups temporarily.
1238 // Holds all interleaved load groups temporarily.
1240
1241 // Search in bottom-up program order for pairs of accesses (A and B) that can
1242 // form interleaved load or store groups. In the algorithm below, access A
1243 // precedes access B in program order. We initialize a group for B in the
1244 // outer loop of the algorithm, and then in the inner loop, we attempt to
1245 // insert each A into B's group if:
1246 //
1247 // 1. A and B have the same stride,
1248 // 2. A and B have the same memory object size, and
1249 // 3. A belongs in B's group according to its distance from B.
1250 //
1251 // Special care is taken to ensure group formation will not break any
1252 // dependences.
1253 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1254 BI != E; ++BI) {
1255 Instruction *B = BI->first;
1256 StrideDescriptor DesB = BI->second;
1257
1258 // Initialize a group for B if it has an allowable stride. Even if we don't
1259 // create a group for B, we continue with the bottom-up algorithm to ensure
1260 // we don't break any of B's dependences.
1261 InterleaveGroup<Instruction> *Group = nullptr;
1262 if (isStrided(DesB.Stride) &&
1263 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1264 Group = getInterleaveGroup(B);
1265 if (!Group) {
1266 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1267 << '\n');
1268 Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1269 }
1270 if (B->mayWriteToMemory())
1271 StoreGroups.insert(Group);
1272 else
1273 LoadGroups.insert(Group);
1274 }
1275
1276 for (auto AI = std::next(BI); AI != E; ++AI) {
1277 Instruction *A = AI->first;
1278 StrideDescriptor DesA = AI->second;
1279
1280 // Our code motion strategy implies that we can't have dependences
1281 // between accesses in an interleaved group and other accesses located
1282 // between the first and last member of the group. Note that this also
1283 // means that a group can't have more than one member at a given offset.
1284 // The accesses in a group can have dependences with other accesses, but
1285 // we must ensure we don't extend the boundaries of the group such that
1286 // we encompass those dependent accesses.
1287 //
1288 // For example, assume we have the sequence of accesses shown below in a
1289 // stride-2 loop:
1290 //
1291 // (1, 2) is a group | A[i] = a; // (1)
1292 // | A[i-1] = b; // (2) |
1293 // A[i-3] = c; // (3)
1294 // A[i] = d; // (4) | (2, 4) is not a group
1295 //
1296 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1297 // but not with (4). If we did, the dependent access (3) would be within
1298 // the boundaries of the (2, 4) group.
1299 if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1300 // If a dependence exists and A is already in a group, we know that A
1301 // must be a store since A precedes B and WAR dependences are allowed.
1302 // Thus, A would be sunk below B. We release A's group to prevent this
1303 // illegal code motion. A will then be free to form another group with
1304 // instructions that precede it.
1305 if (isInterleaved(A)) {
1307
1308 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1309 "dependence between " << *A << " and "<< *B << '\n');
1310
1311 StoreGroups.remove(StoreGroup);
1312 releaseGroup(StoreGroup);
1313 }
1314
1315 // If a dependence exists and A is not already in a group (or it was
1316 // and we just released it), B might be hoisted above A (if B is a
1317 // load) or another store might be sunk below A (if B is a store). In
1318 // either case, we can't add additional instructions to B's group. B
1319 // will only form a group with instructions that it precedes.
1320 break;
1321 }
1322
1323 // At this point, we've checked for illegal code motion. If either A or B
1324 // isn't strided, there's nothing left to do.
1325 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1326 continue;
1327
1328 // Ignore A if it's already in a group or isn't the same kind of memory
1329 // operation as B.
1330 // Note that mayReadFromMemory() isn't mutually exclusive to
1331 // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1332 // here, canVectorizeMemory() should have returned false - except for the
1333 // case we asked for optimization remarks.
1334 if (isInterleaved(A) ||
1335 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1336 (A->mayWriteToMemory() != B->mayWriteToMemory()))
1337 continue;
1338
1339 // Check rules 1 and 2. Ignore A if its stride or size is different from
1340 // that of B.
1341 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1342 continue;
1343
1344 // Ignore A if the memory object of A and B don't belong to the same
1345 // address space
1347 continue;
1348
1349 // Calculate the distance from A to B.
1350 const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1351 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1352 if (!DistToB)
1353 continue;
1354 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1355
1356 // Check rule 3. Ignore A if its distance to B is not a multiple of the
1357 // size.
1358 if (DistanceToB % static_cast<int64_t>(DesB.Size))
1359 continue;
1360
1361 // All members of a predicated interleave-group must have the same predicate,
1362 // and currently must reside in the same BB.
1363 BasicBlock *BlockA = A->getParent();
1364 BasicBlock *BlockB = B->getParent();
1365 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1366 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1367 continue;
1368
1369 // The index of A is the index of B plus A's distance to B in multiples
1370 // of the size.
1371 int IndexA =
1372 Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1373
1374 // Try to insert A into B's group.
1375 if (Group->insertMember(A, IndexA, DesA.Alignment)) {
1376 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1377 << " into the interleave group with" << *B
1378 << '\n');
1379 InterleaveGroupMap[A] = Group;
1380
1381 // Set the first load in program order as the insert position.
1382 if (A->mayReadFromMemory())
1383 Group->setInsertPos(A);
1384 }
1385 } // Iteration over A accesses.
1386 } // Iteration over B accesses.
1387
1388 auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1389 int Index,
1390 std::string FirstOrLast) -> bool {
1391 Instruction *Member = Group->getMember(Index);
1392 assert(Member && "Group member does not exist");
1393 Value *MemberPtr = getLoadStorePointerOperand(Member);
1394 Type *AccessTy = getLoadStoreType(Member);
1395 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1396 /*Assume=*/false, /*ShouldCheckWrap=*/true).value_or(0))
1397 return false;
1398 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1399 << FirstOrLast
1400 << " group member potentially pointer-wrapping.\n");
1401 releaseGroup(Group);
1402 return true;
1403 };
1404
1405 // Remove interleaved groups with gaps whose memory
1406 // accesses may wrap around. We have to revisit the getPtrStride analysis,
1407 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1408 // not check wrapping (see documentation there).
1409 // FORNOW we use Assume=false;
1410 // TODO: Change to Assume=true but making sure we don't exceed the threshold
1411 // of runtime SCEV assumptions checks (thereby potentially failing to
1412 // vectorize altogether).
1413 // Additional optional optimizations:
1414 // TODO: If we are peeling the loop and we know that the first pointer doesn't
1415 // wrap then we can deduce that all pointers in the group don't wrap.
1416 // This means that we can forcefully peel the loop in order to only have to
1417 // check the first pointer for no-wrap. When we'll change to use Assume=true
1418 // we'll only need at most one runtime check per interleaved group.
1419 for (auto *Group : LoadGroups) {
1420 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1421 // load would wrap around the address space we would do a memory access at
1422 // nullptr even without the transformation.
1423 if (Group->getNumMembers() == Group->getFactor())
1424 continue;
1425
1426 // Case 2: If first and last members of the group don't wrap this implies
1427 // that all the pointers in the group don't wrap.
1428 // So we check only group member 0 (which is always guaranteed to exist),
1429 // and group member Factor - 1; If the latter doesn't exist we rely on
1430 // peeling (if it is a non-reversed accsess -- see Case 3).
1431 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1432 continue;
1433 if (Group->getMember(Group->getFactor() - 1))
1434 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
1435 std::string("last"));
1436 else {
1437 // Case 3: A non-reversed interleaved load group with gaps: We need
1438 // to execute at least one scalar epilogue iteration. This will ensure
1439 // we don't speculatively access memory out-of-bounds. We only need
1440 // to look for a member at index factor - 1, since every group must have
1441 // a member at index zero.
1442 if (Group->isReverse()) {
1443 LLVM_DEBUG(
1444 dbgs() << "LV: Invalidate candidate interleaved group due to "
1445 "a reverse access with gaps.\n");
1446 releaseGroup(Group);
1447 continue;
1448 }
1449 LLVM_DEBUG(
1450 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1451 RequiresScalarEpilogue = true;
1452 }
1453 }
1454
1455 for (auto *Group : StoreGroups) {
1456 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1457 // store would wrap around the address space we would do a memory access at
1458 // nullptr even without the transformation.
1459 if (Group->getNumMembers() == Group->getFactor())
1460 continue;
1461
1462 // Interleave-store-group with gaps is implemented using masked wide store.
1463 // Remove interleaved store groups with gaps if
1464 // masked-interleaved-accesses are not enabled by the target.
1465 if (!EnablePredicatedInterleavedMemAccesses) {
1466 LLVM_DEBUG(
1467 dbgs() << "LV: Invalidate candidate interleaved store group due "
1468 "to gaps.\n");
1469 releaseGroup(Group);
1470 continue;
1471 }
1472
1473 // Case 2: If first and last members of the group don't wrap this implies
1474 // that all the pointers in the group don't wrap.
1475 // So we check only group member 0 (which is always guaranteed to exist),
1476 // and the last group member. Case 3 (scalar epilog) is not relevant for
1477 // stores with gaps, which are implemented with masked-store (rather than
1478 // speculative access, as in loads).
1479 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1480 continue;
1481 for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1482 if (Group->getMember(Index)) {
1483 InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
1484 break;
1485 }
1486 }
1487}
1488
1490 // If no group had triggered the requirement to create an epilogue loop,
1491 // there is nothing to do.
1493 return;
1494
1495 bool ReleasedGroup = false;
1496 // Release groups requiring scalar epilogues. Note that this also removes them
1497 // from InterleaveGroups.
1498 for (auto *Group : make_early_inc_range(InterleaveGroups)) {
1499 if (!Group->requiresScalarEpilogue())
1500 continue;
1501 LLVM_DEBUG(
1502 dbgs()
1503 << "LV: Invalidate candidate interleaved group due to gaps that "
1504 "require a scalar epilogue (not allowed under optsize) and cannot "
1505 "be masked (not enabled). \n");
1506 releaseGroup(Group);
1507 ReleasedGroup = true;
1508 }
1509 assert(ReleasedGroup && "At least one group must be invalidated, as a "
1510 "scalar epilogue was required");
1511 (void)ReleasedGroup;
1512 RequiresScalarEpilogue = false;
1513}
1514
1515template <typename InstT>
1516void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1517 llvm_unreachable("addMetadata can only be used for Instruction");
1518}
1519
1520namespace llvm {
1521template <>
1524 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1525 [](std::pair<int, Instruction *> p) { return p.second; });
1526 propagateMetadata(NewInst, VL);
1527}
1528}
1529
1531 StringRef ScalarName, unsigned numArgs,
1532 ElementCount VF) {
1533 SmallString<256> Buffer;
1534 llvm::raw_svector_ostream Out(Buffer);
1535 Out << "_ZGV" << VFABI::_LLVM_ << "N";
1536 if (VF.isScalable())
1537 Out << 'x';
1538 else
1539 Out << VF.getFixedValue();
1540 for (unsigned I = 0; I < numArgs; ++I)
1541 Out << "v";
1542 Out << "_" << ScalarName << "(" << VectorName << ")";
1543 return std::string(Out.str());
1544}
1545
1547 const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
1549 if (S.empty())
1550 return;
1551
1553 S.split(ListAttr, ",");
1554
1555 for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
1556#ifndef NDEBUG
1557 LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
1558 std::optional<VFInfo> Info =
1560 assert(Info && "Invalid name for a VFABI variant.");
1561 assert(CI.getModule()->getFunction(Info->VectorName) &&
1562 "Vector function is missing.");
1563#endif
1564 VariantMappings.push_back(std::string(S));
1565 }
1566}
1567
1569 for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
1570 ++Pos) {
1571 assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
1572
1573 switch (Parameters[Pos].ParamKind) {
1574 default: // Nothing to check.
1575 break;
1580 // Compile time linear steps must be non-zero.
1581 if (Parameters[Pos].LinearStepOrPos == 0)
1582 return false;
1583 break;
1588 // The runtime linear step must be referring to some other
1589 // parameters in the signature.
1590 if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
1591 return false;
1592 // The linear step parameter must be marked as uniform.
1593 if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
1595 return false;
1596 // The linear step parameter can't point at itself.
1597 if (Parameters[Pos].LinearStepOrPos == int(Pos))
1598 return false;
1599 break;
1601 // The global predicate must be the unique. Can be placed anywhere in the
1602 // signature.
1603 for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
1604 if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
1605 return false;
1606 break;
1607 }
1608 }
1609 return true;
1610}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const NodeList & List
Definition: RDFGraph.cpp:199
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getScalarSizeInBits(Type *Ty)
This pass exposes codegen information to IR-level passes.
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:366
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1516
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:166
iterator end() const
Definition: ArrayRef.h:152
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
iterator begin() const
Definition: ArrayRef.h:151
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:312
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1625
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1356
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator member_end() const
member_iterator member_begin(iterator I) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
Type * getResultElementType() const
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:622
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:275
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:298
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:623
uint32_t getFactor() const
Definition: VectorUtils.h:639
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:693
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:700
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:710
bool isReverse() const
Definition: VectorUtils.h:638
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:648
uint32_t getNumMembers() const
Definition: VectorUtils.h:641
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:810
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:821
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:802
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
const ValueToValueMap & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
BlockT * getHeader() const
Definition: LoopInfo.h:105
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
Metadata node.
Definition: Metadata.h:943
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1032
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1289
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1064
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1297
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1019
LLVMContext & getContext() const
Definition: Metadata.h:1107
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:772
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
reverse_iterator rend()
Definition: MapVector.h:77
bool empty() const
Definition: MapVector.h:80
reverse_iterator rbegin()
Definition: MapVector.h:75
Root of the metadata hierarchy.
Definition: Metadata.h:61
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A vector that has set insertion semantics.
Definition: SetVector.h:40
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:708
void reserve(size_type N)
Definition: SmallVector.h:667
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:688
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:258
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1740
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
Type * getElementType() const
Definition: DerivedTypes.h:422
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:697
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
static constexpr char const * MappingsAttrName
Definition: VectorUtils.h:194
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
static constexpr char const * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:281
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:306
constexpr int UndefMaskElem
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:46
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:1122
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1903
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1762
gep_type_iterator gep_type_begin(const User *GEP)
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:1986
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
SmallVector< VFParameter, 8 > Parameters
Definition: VectorUtils.h:84