LLVM 23.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
23#include "llvm/IR/Constants.h"
25#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Value.h"
30
31#define DEBUG_TYPE "vectorutils"
32
33using namespace llvm;
34using namespace llvm::PatternMatch;
35
36/// Maximum factor for an interleaved memory access.
38 "max-interleave-group-factor", cl::Hidden,
39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
40 cl::init(8));
41
42/// Return true if all of the intrinsic's arguments and return type are scalars
43/// for the scalar form of the intrinsic, and vectors for the vector form of the
44/// intrinsic (except operands that are marked as always being scalar by
45/// isVectorIntrinsicWithScalarOpAtArg).
47 switch (ID) {
48 case Intrinsic::abs: // Begin integer bit-manipulation.
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
52 case Intrinsic::ctlz:
53 case Intrinsic::cttz:
54 case Intrinsic::fshl:
55 case Intrinsic::fshr:
56 case Intrinsic::smax:
57 case Intrinsic::smin:
58 case Intrinsic::umax:
59 case Intrinsic::umin:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
68 case Intrinsic::uadd_with_overflow:
69 case Intrinsic::sadd_with_overflow:
70 case Intrinsic::usub_with_overflow:
71 case Intrinsic::ssub_with_overflow:
72 case Intrinsic::umul_with_overflow:
73 case Intrinsic::smul_with_overflow:
74 case Intrinsic::sqrt: // Begin floating-point.
75 case Intrinsic::asin:
76 case Intrinsic::acos:
77 case Intrinsic::atan:
78 case Intrinsic::atan2:
79 case Intrinsic::sin:
80 case Intrinsic::cos:
81 case Intrinsic::sincos:
82 case Intrinsic::sincospi:
83 case Intrinsic::tan:
84 case Intrinsic::sinh:
85 case Intrinsic::cosh:
86 case Intrinsic::tanh:
87 case Intrinsic::exp:
88 case Intrinsic::exp10:
89 case Intrinsic::exp2:
90 case Intrinsic::frexp:
91 case Intrinsic::ldexp:
92 case Intrinsic::log:
93 case Intrinsic::log10:
94 case Intrinsic::log2:
95 case Intrinsic::fabs:
96 case Intrinsic::minnum:
97 case Intrinsic::maxnum:
98 case Intrinsic::minimum:
99 case Intrinsic::maximum:
100 case Intrinsic::minimumnum:
101 case Intrinsic::maximumnum:
102 case Intrinsic::modf:
103 case Intrinsic::copysign:
104 case Intrinsic::floor:
105 case Intrinsic::ceil:
106 case Intrinsic::trunc:
107 case Intrinsic::rint:
108 case Intrinsic::nearbyint:
109 case Intrinsic::round:
110 case Intrinsic::roundeven:
111 case Intrinsic::pow:
112 case Intrinsic::fma:
113 case Intrinsic::fmuladd:
114 case Intrinsic::is_fpclass:
115 case Intrinsic::powi:
116 case Intrinsic::canonicalize:
117 case Intrinsic::fptosi_sat:
118 case Intrinsic::fptoui_sat:
119 case Intrinsic::lround:
120 case Intrinsic::llround:
121 case Intrinsic::lrint:
122 case Intrinsic::llrint:
123 case Intrinsic::ucmp:
124 case Intrinsic::scmp:
125 return true;
126 default:
127 return false;
128 }
129}
130
132 const TargetTransformInfo *TTI) {
134 return true;
135
137 return TTI->isTargetIntrinsicTriviallyScalarizable(ID);
138
139 return false;
140}
141
142/// Identifies if the vector form of the intrinsic has a scalar operand.
144 unsigned ScalarOpdIdx,
145 const TargetTransformInfo *TTI) {
146
148 return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
149
150 // Vector predication intrinsics have the EVL as the last operand.
151 if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)
152 return true;
153
154 switch (ID) {
155 case Intrinsic::abs:
156 case Intrinsic::vp_abs:
157 case Intrinsic::ctlz:
158 case Intrinsic::vp_ctlz:
159 case Intrinsic::cttz:
160 case Intrinsic::vp_cttz:
161 case Intrinsic::is_fpclass:
162 case Intrinsic::vp_is_fpclass:
163 case Intrinsic::powi:
164 case Intrinsic::vector_extract:
165 return (ScalarOpdIdx == 1);
166 case Intrinsic::smul_fix:
167 case Intrinsic::smul_fix_sat:
168 case Intrinsic::umul_fix:
169 case Intrinsic::umul_fix_sat:
170 return (ScalarOpdIdx == 2);
171 case Intrinsic::experimental_vp_splice:
172 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
173 default:
174 return false;
175 }
176}
177
179 Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI) {
180 assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");
181
183 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);
184
186 return OpdIdx == -1 || OpdIdx == 0;
187
188 switch (ID) {
189 case Intrinsic::fptosi_sat:
190 case Intrinsic::fptoui_sat:
191 case Intrinsic::lround:
192 case Intrinsic::llround:
193 case Intrinsic::lrint:
194 case Intrinsic::llrint:
195 case Intrinsic::vp_lrint:
196 case Intrinsic::vp_llrint:
197 case Intrinsic::ucmp:
198 case Intrinsic::scmp:
199 case Intrinsic::vector_extract:
200 return OpdIdx == -1 || OpdIdx == 0;
201 case Intrinsic::modf:
202 case Intrinsic::sincos:
203 case Intrinsic::sincospi:
204 case Intrinsic::is_fpclass:
205 case Intrinsic::vp_is_fpclass:
206 return OpdIdx == 0;
207 case Intrinsic::powi:
208 case Intrinsic::ldexp:
209 return OpdIdx == -1 || OpdIdx == 1;
210 default:
211 return OpdIdx == -1;
212 }
213}
214
216 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {
217
219 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);
220
221 switch (ID) {
222 case Intrinsic::frexp:
223 return RetIdx == 0 || RetIdx == 1;
224 default:
225 return RetIdx == 0;
226 }
227}
228
229/// Returns intrinsic ID for call.
230/// For the input call instruction it finds mapping intrinsic and returns
231/// its ID, in case it does not found it return not_intrinsic.
233 const TargetLibraryInfo *TLI) {
237
238 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
239 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
240 ID == Intrinsic::experimental_noalias_scope_decl ||
241 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
242 return ID;
244}
245
247 switch (ID) {
248 case Intrinsic::vector_interleave2:
249 return 2;
250 case Intrinsic::vector_interleave3:
251 return 3;
252 case Intrinsic::vector_interleave4:
253 return 4;
254 case Intrinsic::vector_interleave5:
255 return 5;
256 case Intrinsic::vector_interleave6:
257 return 6;
258 case Intrinsic::vector_interleave7:
259 return 7;
260 case Intrinsic::vector_interleave8:
261 return 8;
262 default:
263 return 0;
264 }
265}
266
268 switch (ID) {
269 case Intrinsic::vector_deinterleave2:
270 return 2;
271 case Intrinsic::vector_deinterleave3:
272 return 3;
273 case Intrinsic::vector_deinterleave4:
274 return 4;
275 case Intrinsic::vector_deinterleave5:
276 return 5;
277 case Intrinsic::vector_deinterleave6:
278 return 6;
279 case Intrinsic::vector_deinterleave7:
280 return 7;
281 case Intrinsic::vector_deinterleave8:
282 return 8;
283 default:
284 return 0;
285 }
286}
287
289 [[maybe_unused]] unsigned Factor =
291 ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
292 assert(Factor && Factor == DISubtypes.size() &&
293 "unexpected deinterleave factor or result type");
294 return cast<VectorType>(DISubtypes[0]);
295}
296
297/// Given a vector and an element number, see if the scalar value is
298/// already around as a register, for example if it were inserted then extracted
299/// from the vector.
300Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
301 assert(V->getType()->isVectorTy() && "Not looking at a vector?");
302 VectorType *VTy = cast<VectorType>(V->getType());
303 // For fixed-length vector, return poison for out of range access.
304 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
305 unsigned Width = FVTy->getNumElements();
306 if (EltNo >= Width)
307 return PoisonValue::get(FVTy->getElementType());
308 }
309
310 if (Constant *C = dyn_cast<Constant>(V))
311 return C->getAggregateElement(EltNo);
312
314 // If this is an insert to a variable element, we don't know what it is.
315 uint64_t IIElt;
316 if (!match(III->getOperand(2), m_ConstantInt(IIElt)))
317 return nullptr;
318
319 // If this is an insert to the element we are looking for, return the
320 // inserted value.
321 if (EltNo == IIElt)
322 return III->getOperand(1);
323
324 // Guard against infinite loop on malformed, unreachable IR.
325 if (III == III->getOperand(0))
326 return nullptr;
327
328 // Otherwise, the insertelement doesn't modify the value, recurse on its
329 // vector input.
330 return findScalarElement(III->getOperand(0), EltNo);
331 }
332
334 // Restrict the following transformation to fixed-length vector.
335 if (SVI && isa<FixedVectorType>(SVI->getType())) {
336 unsigned LHSWidth =
337 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
338 int InEl = SVI->getMaskValue(EltNo);
339 if (InEl < 0)
340 return PoisonValue::get(VTy->getElementType());
341 if (InEl < (int)LHSWidth)
342 return findScalarElement(SVI->getOperand(0), InEl);
343 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
344 }
345
346 // Extract a value from a vector add operation with a constant zero.
347 // TODO: Use getBinOpIdentity() to generalize this.
348 Value *Val; Constant *C;
349 if (match(V, m_Add(m_Value(Val), m_Constant(C))))
350 if (Constant *Elt = C->getAggregateElement(EltNo))
351 if (Elt->isNullValue())
352 return findScalarElement(Val, EltNo);
353
354 // If the vector is a splat then we can trivially find the scalar element.
356 if (Value *Splat = getSplatValue(V))
357 if (EltNo < VTy->getElementCount().getKnownMinValue())
358 return Splat;
359
360 // Otherwise, we don't know.
361 return nullptr;
362}
363
365 int SplatIndex = -1;
366 for (int M : Mask) {
367 // Ignore invalid (undefined) mask elements.
368 if (M < 0)
369 continue;
370
371 // There can be only 1 non-negative mask element value if this is a splat.
372 if (SplatIndex != -1 && SplatIndex != M)
373 return -1;
374
375 // Initialize the splat index to the 1st non-negative mask element.
376 SplatIndex = M;
377 }
378 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
379 return SplatIndex;
380}
381
382/// Get splat value if the input is a splat vector or return nullptr.
383/// This function is not fully general. It checks only 2 cases:
384/// the input value is (1) a splat constant vector or (2) a sequence
385/// of instructions that broadcasts a scalar at element 0.
387 if (isa<VectorType>(V->getType()))
388 if (auto *C = dyn_cast<Constant>(V))
389 return C->getSplatValue();
390
391 // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
392 Value *Splat;
393 if (match(V,
395 m_Value(), m_ZeroMask())))
396 return Splat;
397
398 return nullptr;
399}
400
401bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
402 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
403
404 if (isa<VectorType>(V->getType())) {
405 if (isa<UndefValue>(V))
406 return true;
407 // FIXME: We can allow undefs, but if Index was specified, we may want to
408 // check that the constant is defined at that index.
409 if (auto *C = dyn_cast<Constant>(V))
410 return C->getSplatValue() != nullptr;
411 }
412
413 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
414 // FIXME: We can safely allow undefs here. If Index was specified, we will
415 // check that the mask elt is defined at the required index.
416 if (!all_equal(Shuf->getShuffleMask()))
417 return false;
418
419 // Match any index.
420 if (Index == -1)
421 return true;
422
423 // Match a specific element. The mask should be defined at and match the
424 // specified index.
425 return Shuf->getMaskValue(Index) == Index;
426 }
427
428 // The remaining tests are all recursive, so bail out if we hit the limit.
430 return false;
431
432 // If both operands of a binop are splats, the result is a splat.
433 Value *X, *Y, *Z;
434 if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
435 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
436
437 // If all operands of a select are splats, the result is a splat.
438 if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
439 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
440 isSplatValue(Z, Index, Depth);
441
442 // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
443
444 return false;
445}
446
448 const APInt &DemandedElts, APInt &DemandedLHS,
449 APInt &DemandedRHS, bool AllowUndefElts) {
450 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);
451
452 // Early out if we don't demand any elements.
453 if (DemandedElts.isZero())
454 return true;
455
456 // Simple case of a shuffle with zeroinitializer.
457 if (all_of(Mask, equal_to(0))) {
458 DemandedLHS.setBit(0);
459 return true;
460 }
461
462 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
463 int M = Mask[I];
464 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
465 "Invalid shuffle mask constant");
466
467 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))
468 continue;
469
470 // For undef elements, we don't know anything about the common state of
471 // the shuffle result.
472 if (M < 0)
473 return false;
474
475 if (M < SrcWidth)
476 DemandedLHS.setBit(M);
477 else
478 DemandedRHS.setBit(M - SrcWidth);
479 }
480
481 return true;
482}
483
485 std::array<std::pair<int, int>, 2> &SrcInfo) {
486 const int SignalValue = NumElts * 2;
487 SrcInfo[0] = {-1, SignalValue};
488 SrcInfo[1] = {-1, SignalValue};
489 for (auto [i, M] : enumerate(Mask)) {
490 if (M < 0)
491 continue;
492 int Src = M >= NumElts;
493 int Diff = (int)i - (M % NumElts);
494 bool Match = false;
495 for (int j = 0; j < 2; j++) {
496 auto &[SrcE, DiffE] = SrcInfo[j];
497 if (SrcE == -1) {
498 assert(DiffE == SignalValue);
499 SrcE = Src;
500 DiffE = Diff;
501 }
502 if (SrcE == Src && DiffE == Diff) {
503 Match = true;
504 break;
505 }
506 }
507 if (!Match)
508 return false;
509 }
510 // Avoid all undef masks
511 return SrcInfo[0].first != -1;
512}
513
515 SmallVectorImpl<int> &ScaledMask) {
516 assert(Scale > 0 && "Unexpected scaling factor");
517
518 // Fast-path: if no scaling, then it is just a copy.
519 if (Scale == 1) {
520 ScaledMask.assign(Mask.begin(), Mask.end());
521 return;
522 }
523
524 ScaledMask.clear();
525 for (int MaskElt : Mask) {
526 if (MaskElt >= 0) {
527 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
528 "Overflowed 32-bits");
529 }
530 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
531 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
532 }
533}
534
536 SmallVectorImpl<int> &ScaledMask) {
537 assert(Scale > 0 && "Unexpected scaling factor");
538
539 // Fast-path: if no scaling, then it is just a copy.
540 if (Scale == 1) {
541 ScaledMask.assign(Mask.begin(), Mask.end());
542 return true;
543 }
544
545 // We must map the original elements down evenly to a type with less elements.
546 int NumElts = Mask.size();
547 if (NumElts % Scale != 0)
548 return false;
549
550 ScaledMask.clear();
551 ScaledMask.reserve(NumElts / Scale);
552
553 // Step through the input mask by splitting into Scale-sized slices.
554 do {
555 ArrayRef<int> MaskSlice = Mask.take_front(Scale);
556 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
557
558 // The first element of the slice determines how we evaluate this slice.
559 int SliceFront = MaskSlice.front();
560 if (SliceFront < 0) {
561 // Negative values (undef or other "sentinel" values) must be equal across
562 // the entire slice.
563 if (!all_equal(MaskSlice))
564 return false;
565 ScaledMask.push_back(SliceFront);
566 } else {
567 // A positive mask element must be cleanly divisible.
568 if (SliceFront % Scale != 0)
569 return false;
570 // Elements of the slice must be consecutive.
571 for (int i = 1; i < Scale; ++i)
572 if (MaskSlice[i] != SliceFront + i)
573 return false;
574 ScaledMask.push_back(SliceFront / Scale);
575 }
576 Mask = Mask.drop_front(Scale);
577 } while (!Mask.empty());
578
579 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
580
581 // All elements of the original mask can be scaled down to map to the elements
582 // of a mask with wider elements.
583 return true;
584}
585
587 SmallVectorImpl<int> &NewMask) {
588 unsigned NumElts = M.size();
589 if (NumElts % 2 != 0)
590 return false;
591
592 NewMask.clear();
593 for (unsigned i = 0; i < NumElts; i += 2) {
594 int M0 = M[i];
595 int M1 = M[i + 1];
596
597 // If both elements are undef, new mask is undef too.
598 if (M0 == -1 && M1 == -1) {
599 NewMask.push_back(-1);
600 continue;
601 }
602
603 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
604 NewMask.push_back(M1 / 2);
605 continue;
606 }
607
608 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
609 NewMask.push_back(M0 / 2);
610 continue;
611 }
612
613 NewMask.clear();
614 return false;
615 }
616
617 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
618 return true;
619}
620
621bool llvm::scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
622 SmallVectorImpl<int> &ScaledMask) {
623 unsigned NumSrcElts = Mask.size();
624 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");
625
626 // Fast-path: if no scaling, then it is just a copy.
627 if (NumSrcElts == NumDstElts) {
628 ScaledMask.assign(Mask.begin(), Mask.end());
629 return true;
630 }
631
632 // Ensure we can find a whole scale factor.
633 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
634 "Unexpected scaling factor");
635
636 if (NumSrcElts > NumDstElts) {
637 int Scale = NumSrcElts / NumDstElts;
638 return widenShuffleMaskElts(Scale, Mask, ScaledMask);
639 }
640
641 int Scale = NumDstElts / NumSrcElts;
642 narrowShuffleMaskElts(Scale, Mask, ScaledMask);
643 return true;
644}
645
647 SmallVectorImpl<int> &ScaledMask) {
648 std::array<SmallVector<int, 16>, 2> TmpMasks;
649 SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];
650 ArrayRef<int> InputMask = Mask;
651 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {
652 while (widenShuffleMaskElts(Scale, InputMask, *Output)) {
653 InputMask = *Output;
654 std::swap(Output, Tmp);
655 }
656 }
657 ScaledMask.assign(InputMask.begin(), InputMask.end());
658}
659
661 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
662 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
663 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
664 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
665 ManyInputsAction) {
666 SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
667 // Try to perform better estimation of the permutation.
668 // 1. Split the source/destination vectors into real registers.
669 // 2. Do the mask analysis to identify which real registers are
670 // permuted.
671 int Sz = Mask.size();
672 unsigned SzDest = Sz / NumOfDestRegs;
673 unsigned SzSrc = Sz / NumOfSrcRegs;
674 for (unsigned I = 0; I < NumOfDestRegs; ++I) {
675 auto &RegMasks = Res[I];
676 RegMasks.assign(2 * NumOfSrcRegs, {});
677 // Check that the values in dest registers are in the one src
678 // register.
679 for (unsigned K = 0; K < SzDest; ++K) {
680 int Idx = I * SzDest + K;
681 if (Idx == Sz)
682 break;
683 if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)
684 continue;
685 int MaskIdx = Mask[Idx] % Sz;
686 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
687 // Add a cost of PermuteTwoSrc for each new source register permute,
688 // if we have more than one source registers.
689 if (RegMasks[SrcRegIdx].empty())
690 RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
691 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
692 }
693 }
694 // Process split mask.
695 for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {
696 auto &Dest = Res[I];
697 int NumSrcRegs =
698 count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
699 switch (NumSrcRegs) {
700 case 0:
701 // No input vectors were used!
702 NoInputAction();
703 break;
704 case 1: {
705 // Find the only mask with at least single undef mask elem.
706 auto *It =
707 find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
708 unsigned SrcReg = std::distance(Dest.begin(), It);
709 SingleInputAction(*It, SrcReg, I);
710 break;
711 }
712 default: {
713 // The first mask is a permutation of a single register. Since we have >2
714 // input registers to shuffle, we merge the masks for 2 first registers
715 // and generate a shuffle of 2 registers rather than the reordering of the
716 // first register and then shuffle with the second register. Next,
717 // generate the shuffles of the resulting register + the remaining
718 // registers from the list.
719 auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
720 ArrayRef<int> SecondMask) {
721 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
722 if (SecondMask[Idx] != PoisonMaskElem) {
723 assert(FirstMask[Idx] == PoisonMaskElem &&
724 "Expected undefined mask element.");
725 FirstMask[Idx] = SecondMask[Idx] + VF;
726 }
727 }
728 };
729 auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
730 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
731 if (Mask[Idx] != PoisonMaskElem)
732 Mask[Idx] = Idx;
733 }
734 };
735 int SecondIdx;
736 bool NewReg = true;
737 do {
738 int FirstIdx = -1;
739 SecondIdx = -1;
740 MutableArrayRef<int> FirstMask, SecondMask;
741 for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
742 SmallVectorImpl<int> &RegMask = Dest[I];
743 if (RegMask.empty())
744 continue;
745
746 if (FirstIdx == SecondIdx) {
747 FirstIdx = I;
748 FirstMask = RegMask;
749 continue;
750 }
751 SecondIdx = I;
752 SecondMask = RegMask;
753 CombineMasks(FirstMask, SecondMask);
754 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
755 NewReg = false;
756 NormalizeMask(FirstMask);
757 RegMask.clear();
758 SecondMask = FirstMask;
759 SecondIdx = FirstIdx;
760 }
761 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
762 CombineMasks(SecondMask, FirstMask);
763 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
764 NewReg = false;
765 Dest[FirstIdx].clear();
766 NormalizeMask(SecondMask);
767 }
768 } while (SecondIdx >= 0);
769 break;
770 }
771 }
772 }
773}
774
775void llvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
776 const APInt &DemandedElts,
777 APInt &DemandedLHS,
778 APInt &DemandedRHS) {
779 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");
780 int NumLanes = VectorBitWidth / 128;
781 int NumElts = DemandedElts.getBitWidth();
782 int NumEltsPerLane = NumElts / NumLanes;
783 int HalfEltsPerLane = NumEltsPerLane / 2;
784
785 DemandedLHS = APInt::getZero(NumElts);
786 DemandedRHS = APInt::getZero(NumElts);
787
788 // Map DemandedElts to the horizontal operands.
789 for (int Idx = 0; Idx != NumElts; ++Idx) {
790 if (!DemandedElts[Idx])
791 continue;
792 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
793 int LocalIdx = Idx % NumEltsPerLane;
794 if (LocalIdx < HalfEltsPerLane) {
795 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);
796 } else {
797 LocalIdx -= HalfEltsPerLane;
798 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);
799 }
800 }
801}
802
805 const TargetTransformInfo *TTI) {
806
807 // DemandedBits will give us every value's live-out bits. But we want
808 // to ensure no extra casts would need to be inserted, so every DAG
809 // of connected values must have the same minimum bitwidth.
815 SmallPtrSet<Instruction *, 4> InstructionSet;
817
818 // Determine the roots. We work bottom-up, from truncs or icmps.
819 bool SeenExtFromIllegalType = false;
820 for (auto *BB : Blocks)
821 for (auto &I : *BB) {
822 InstructionSet.insert(&I);
823
824 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
825 !TTI->isTypeLegal(I.getOperand(0)->getType()))
826 SeenExtFromIllegalType = true;
827
828 // Only deal with non-vector integers up to 64-bits wide.
829 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
830 !I.getType()->isVectorTy() &&
831 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
832 // Don't make work for ourselves. If we know the loaded type is legal,
833 // don't add it to the worklist.
834 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
835 continue;
836
837 Worklist.push_back(&I);
838 Roots.insert(&I);
839 }
840 }
841 // Early exit.
842 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
843 return MinBWs;
844
845 // Now proceed breadth-first, unioning values together.
846 while (!Worklist.empty()) {
847 Instruction *I = Worklist.pop_back_val();
848 Value *Leader = ECs.getOrInsertLeaderValue(I);
849
850 if (!Visited.insert(I).second)
851 continue;
852
853 // If we encounter a type that is larger than 64 bits, we can't represent
854 // it so bail out.
855 if (DB.getDemandedBits(I).getBitWidth() > 64)
857
858 uint64_t V = DB.getDemandedBits(I).getZExtValue();
859 DBits[Leader] |= V;
860 DBits[I] = V;
861
862 // Casts, loads and instructions outside of our range terminate a chain
863 // successfully.
865 !InstructionSet.count(I))
866 continue;
867
868 // Unsafe casts terminate a chain unsuccessfully. We can't do anything
869 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
870 // transform anything that relies on them.
872 !I->getType()->isIntegerTy()) {
873 DBits[Leader] |= ~0ULL;
874 continue;
875 }
876
877 // We don't modify the types of PHIs. Reductions will already have been
878 // truncated if possible, and inductions' sizes will have been chosen by
879 // indvars.
880 if (isa<PHINode>(I))
881 continue;
882
883 // Don't modify the types of operands of a call, as doing that would cause a
884 // signature mismatch.
885 if (isa<CallBase>(I))
886 continue;
887
888 if (DBits[Leader] == ~0ULL)
889 // All bits demanded, no point continuing.
890 continue;
891
892 for (Value *O : I->operands()) {
893 ECs.unionSets(Leader, O);
894 if (auto *OI = dyn_cast<Instruction>(O))
895 Worklist.push_back(OI);
896 }
897 }
898
899 // Now we've discovered all values, walk them to see if there are
900 // any users we didn't see. If there are, we can't optimize that
901 // chain.
902 for (auto &I : DBits)
903 for (auto *U : I.first->users())
904 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
905 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
906
907 for (const auto &E : ECs) {
908 if (!E->isLeader())
909 continue;
910 uint64_t LeaderDemandedBits = 0;
911 for (Value *M : ECs.members(*E))
912 LeaderDemandedBits |= DBits[M];
913
914 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
915 // Round up to a power of 2
916 MinBW = llvm::bit_ceil(MinBW);
917
918 // We don't modify the types of PHIs. Reductions will already have been
919 // truncated if possible, and inductions' sizes will have been chosen by
920 // indvars.
921 // If we are required to shrink a PHI, abandon this entire equivalence class.
922 bool Abort = false;
923 for (Value *M : ECs.members(*E))
924 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
925 Abort = true;
926 break;
927 }
928 if (Abort)
929 continue;
930
931 for (Value *M : ECs.members(*E)) {
932 auto *MI = dyn_cast<Instruction>(M);
933 if (!MI)
934 continue;
935 Type *Ty = M->getType();
936 if (Roots.count(MI))
937 Ty = MI->getOperand(0)->getType();
938
939 if (MinBW >= Ty->getScalarSizeInBits())
940 continue;
941
942 // If any of M's operands demand more bits than MinBW then M cannot be
943 // performed safely in MinBW.
944 auto *Call = dyn_cast<CallBase>(MI);
945 auto Ops = Call ? Call->args() : MI->operands();
946 if (any_of(Ops, [&DB, MinBW](Use &U) {
947 auto *CI = dyn_cast<ConstantInt>(U);
948 // For constants shift amounts, check if the shift would result in
949 // poison.
950 if (CI &&
952 U.getOperandNo() == 1)
953 return CI->uge(MinBW);
954 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
955 return bit_ceil(BW) > MinBW;
956 }))
957 continue;
958
959 MinBWs[MI] = MinBW;
960 }
961 }
962
963 return MinBWs;
964}
965
966/// Add all access groups in @p AccGroups to @p List.
967template <typename ListT>
968static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
969 // Interpret an access group as a list containing itself.
970 if (AccGroups->getNumOperands() == 0) {
971 assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
972 List.insert(AccGroups);
973 return;
974 }
975
976 for (const auto &AccGroupListOp : AccGroups->operands()) {
977 auto *Item = cast<MDNode>(AccGroupListOp.get());
978 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
979 List.insert(Item);
980 }
981}
982
983MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
984 if (!AccGroups1)
985 return AccGroups2;
986 if (!AccGroups2)
987 return AccGroups1;
988 if (AccGroups1 == AccGroups2)
989 return AccGroups1;
990
992 addToAccessGroupList(Union, AccGroups1);
993 addToAccessGroupList(Union, AccGroups2);
994
995 if (Union.size() == 0)
996 return nullptr;
997 if (Union.size() == 1)
998 return cast<MDNode>(Union.front());
999
1000 LLVMContext &Ctx = AccGroups1->getContext();
1001 return MDNode::get(Ctx, Union.getArrayRef());
1002}
1003
1005 const Instruction *Inst2) {
1006 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
1007 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
1008
1009 if (!MayAccessMem1 && !MayAccessMem2)
1010 return nullptr;
1011 if (!MayAccessMem1)
1012 return Inst2->getMetadata(LLVMContext::MD_access_group);
1013 if (!MayAccessMem2)
1014 return Inst1->getMetadata(LLVMContext::MD_access_group);
1015
1016 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
1017 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
1018 if (!MD1 || !MD2)
1019 return nullptr;
1020 if (MD1 == MD2)
1021 return MD1;
1022
1023 // Use set for scalable 'contains' check.
1024 SmallPtrSet<Metadata *, 4> AccGroupSet2;
1025 addToAccessGroupList(AccGroupSet2, MD2);
1026
1027 SmallVector<Metadata *, 4> Intersection;
1028 if (MD1->getNumOperands() == 0) {
1029 assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
1030 if (AccGroupSet2.count(MD1))
1031 Intersection.push_back(MD1);
1032 } else {
1033 for (const MDOperand &Node : MD1->operands()) {
1034 auto *Item = cast<MDNode>(Node.get());
1035 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
1036 if (AccGroupSet2.count(Item))
1037 Intersection.push_back(Item);
1038 }
1039 }
1040
1041 if (Intersection.size() == 0)
1042 return nullptr;
1043 if (Intersection.size() == 1)
1044 return cast<MDNode>(Intersection.front());
1045
1046 LLVMContext &Ctx = Inst1->getContext();
1047 return MDNode::get(Ctx, Intersection);
1048}
1049
1050/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
1051/// vectorization.
1053 Instruction *Inst,
1054 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata) {
1056 static const unsigned SupportedIDs[] = {
1057 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1058 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
1059 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
1060 LLVMContext::MD_access_group, LLVMContext::MD_mmra};
1061
1062 // Remove any unsupported metadata kinds from Metadata.
1063 for (unsigned Idx = 0; Idx != Metadata.size();) {
1064 if (is_contained(SupportedIDs, Metadata[Idx].first)) {
1065 ++Idx;
1066 } else {
1067 // Swap element to end and remove it.
1068 std::swap(Metadata[Idx], Metadata.back());
1069 Metadata.pop_back();
1070 }
1071 }
1072}
1073
1074/// \returns \p I after propagating metadata from \p VL.
1076 if (VL.empty())
1077 return Inst;
1080
1081 for (auto &[Kind, MD] : Metadata) {
1082 // Skip MMRA metadata if the instruction cannot have it.
1083 if (Kind == LLVMContext::MD_mmra && !canInstructionHaveMMRAs(*Inst))
1084 continue;
1085
1086 for (int J = 1, E = VL.size(); MD && J != E; ++J) {
1087 const Instruction *IJ = cast<Instruction>(VL[J]);
1088 MDNode *IMD = IJ->getMetadata(Kind);
1089
1090 switch (Kind) {
1091 case LLVMContext::MD_mmra: {
1092 MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD);
1093 break;
1094 }
1095 case LLVMContext::MD_tbaa:
1096 MD = MDNode::getMostGenericTBAA(MD, IMD);
1097 break;
1098 case LLVMContext::MD_alias_scope:
1100 break;
1101 case LLVMContext::MD_fpmath:
1102 MD = MDNode::getMostGenericFPMath(MD, IMD);
1103 break;
1104 case LLVMContext::MD_noalias:
1105 case LLVMContext::MD_nontemporal:
1106 case LLVMContext::MD_invariant_load:
1107 MD = MDNode::intersect(MD, IMD);
1108 break;
1109 case LLVMContext::MD_access_group:
1110 MD = intersectAccessGroups(Inst, IJ);
1111 break;
1112 default:
1113 llvm_unreachable("unhandled metadata");
1114 }
1115 }
1116
1117 Inst->setMetadata(Kind, MD);
1118 }
1119
1120 return Inst;
1121}
1122
1123Constant *
1125 const InterleaveGroup<Instruction> &Group) {
1126 // All 1's means mask is not needed.
1127 if (Group.isFull())
1128 return nullptr;
1129
1130 // TODO: support reversed access.
1131 assert(!Group.isReverse() && "Reversed group not supported.");
1132
1134 for (unsigned i = 0; i < VF; i++)
1135 for (unsigned j = 0; j < Group.getFactor(); ++j) {
1136 unsigned HasMember = Group.getMember(j) ? 1 : 0;
1137 Mask.push_back(Builder.getInt1(HasMember));
1138 }
1139
1140 return ConstantVector::get(Mask);
1141}
1142
1144llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
1145 SmallVector<int, 16> MaskVec;
1146 for (unsigned i = 0; i < VF; i++)
1147 for (unsigned j = 0; j < ReplicationFactor; j++)
1148 MaskVec.push_back(i);
1149
1150 return MaskVec;
1151}
1152
1154 unsigned NumVecs) {
1156 for (unsigned i = 0; i < VF; i++)
1157 for (unsigned j = 0; j < NumVecs; j++)
1158 Mask.push_back(j * VF + i);
1159
1160 return Mask;
1161}
1162
1164llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
1166 for (unsigned i = 0; i < VF; i++)
1167 Mask.push_back(Start + i * Stride);
1168
1169 return Mask;
1170}
1171
1173 unsigned NumInts,
1174 unsigned NumUndefs) {
1176 for (unsigned i = 0; i < NumInts; i++)
1177 Mask.push_back(Start + i);
1178
1179 for (unsigned i = 0; i < NumUndefs; i++)
1180 Mask.push_back(-1);
1181
1182 return Mask;
1183}
1184
1186 unsigned NumElts) {
1187 // Avoid casts in the loop and make sure we have a reasonable number.
1188 int NumEltsSigned = NumElts;
1189 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
1190
1191 // If the mask chooses an element from operand 1, reduce it to choose from the
1192 // corresponding element of operand 0. Undef mask elements are unchanged.
1193 SmallVector<int, 16> UnaryMask;
1194 for (int MaskElt : Mask) {
1195 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
1196 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1197 UnaryMask.push_back(UnaryElt);
1198 }
1199 return UnaryMask;
1200}
1201
1202/// A helper function for concatenating vectors. This function concatenates two
1203/// vectors having the same element type. If the second vector has fewer
1204/// elements than the first, it is padded with undefs.
1206 Value *V2) {
1207 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
1208 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1209 assert(VecTy1 && VecTy2 &&
1210 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1211 "Expect two vectors with the same element type");
1212
1213 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1214 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1215 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
1216
1217 if (NumElts1 > NumElts2) {
1218 // Extend with UNDEFs.
1219 V2 = Builder.CreateShuffleVector(
1220 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
1221 }
1222
1223 return Builder.CreateShuffleVector(
1224 V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
1225}
1226
1228 ArrayRef<Value *> Vecs) {
1229 unsigned NumVecs = Vecs.size();
1230 assert(NumVecs > 1 && "Should be at least two vectors");
1231
1233 ResList.append(Vecs.begin(), Vecs.end());
1234 do {
1236 for (unsigned i = 0; i < NumVecs - 1; i += 2) {
1237 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1238 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1239 "Only the last vector may have a different type");
1240
1241 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1242 }
1243
1244 // Push the last vector if the total number of vectors is odd.
1245 if (NumVecs % 2 != 0)
1246 TmpList.push_back(ResList[NumVecs - 1]);
1247
1248 ResList = TmpList;
1249 NumVecs = ResList.size();
1250 } while (NumVecs > 1);
1251
1252 return ResList[0];
1253}
1254
1256 assert(isa<VectorType>(Mask->getType()) &&
1257 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1258 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1259 1 &&
1260 "Mask must be a vector of i1");
1261
1262 auto *ConstMask = dyn_cast<Constant>(Mask);
1263 if (!ConstMask)
1264 return false;
1265 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1266 return true;
1267 if (isa<ScalableVectorType>(ConstMask->getType()))
1268 return false;
1269 for (unsigned
1270 I = 0,
1271 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1272 I != E; ++I) {
1273 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1274 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1275 continue;
1276 return false;
1277 }
1278 return true;
1279}
1280
1282 assert(isa<VectorType>(Mask->getType()) &&
1283 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1284 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1285 1 &&
1286 "Mask must be a vector of i1");
1287
1288 auto *ConstMask = dyn_cast<Constant>(Mask);
1289 if (!ConstMask)
1290 return false;
1291 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1292 return true;
1293 if (isa<ScalableVectorType>(ConstMask->getType()))
1294 return false;
1295 for (unsigned
1296 I = 0,
1297 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1298 I != E; ++I) {
1299 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1300 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1301 continue;
1302 return false;
1303 }
1304 return true;
1305}
1306
1308 assert(isa<VectorType>(Mask->getType()) &&
1309 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1310 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1311 1 &&
1312 "Mask must be a vector of i1");
1313
1314 auto *ConstMask = dyn_cast<Constant>(Mask);
1315 if (!ConstMask)
1316 return false;
1317 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1318 return true;
1319 if (isa<ScalableVectorType>(ConstMask->getType()))
1320 return false;
1321 for (unsigned
1322 I = 0,
1323 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1324 I != E; ++I) {
1325 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1326 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1327 return true;
1328 }
1329 return false;
1330}
1331
1332/// TODO: This is a lot like known bits, but for
1333/// vectors. Is there something we can common this with?
1335 assert(isa<FixedVectorType>(Mask->getType()) &&
1336 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1337 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1338 1 &&
1339 "Mask must be a fixed width vector of i1");
1340
1341 const unsigned VWidth =
1342 cast<FixedVectorType>(Mask->getType())->getNumElements();
1343 APInt DemandedElts = APInt::getAllOnes(VWidth);
1344 if (auto *CV = dyn_cast<ConstantVector>(Mask))
1345 for (unsigned i = 0; i < VWidth; i++)
1346 if (CV->getAggregateElement(i)->isNullValue())
1347 DemandedElts.clearBit(i);
1348 return DemandedElts;
1349}
1350
1351bool InterleavedAccessInfo::isStrided(int Stride) {
1352 unsigned Factor = std::abs(Stride);
1353 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1354}
1355
1356void InterleavedAccessInfo::collectConstStrideAccesses(
1358 const DenseMap<Value*, const SCEV*> &Strides) {
1359 auto &DL = TheLoop->getHeader()->getDataLayout();
1360
1361 // Since it's desired that the load/store instructions be maintained in
1362 // "program order" for the interleaved access analysis, we have to visit the
1363 // blocks in the loop in reverse postorder (i.e., in a topological order).
1364 // Such an ordering will ensure that any load/store that may be executed
1365 // before a second load/store will precede the second load/store in
1366 // AccessStrideInfo.
1367 LoopBlocksDFS DFS(TheLoop);
1368 DFS.perform(LI);
1369 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1370 for (auto &I : *BB) {
1372 if (!Ptr)
1373 continue;
1374 Type *ElementTy = getLoadStoreType(&I);
1375
1376 // Currently, codegen doesn't support cases where the type size doesn't
1377 // match the alloc size. Skip them for now.
1378 uint64_t Size = DL.getTypeAllocSize(ElementTy);
1379 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
1380 continue;
1381
1382 // We don't check wrapping here because we don't know yet if Ptr will be
1383 // part of a full group or a group with gaps. Checking wrapping for all
1384 // pointers (even those that end up in groups with no gaps) will be overly
1385 // conservative. For full groups, wrapping should be ok since if we would
1386 // wrap around the address space we would do a memory access at nullptr
1387 // even without the transformation. The wrapping checks are therefore
1388 // deferred until after we've formed the interleaved groups.
1389 int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
1390 /*Assume=*/true, /*ShouldCheckWrap=*/false)
1391 .value_or(0);
1392
1393 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1394 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1396 }
1397}
1398
1399// Analyze interleaved accesses and collect them into interleaved load and
1400// store groups.
1401//
1402// When generating code for an interleaved load group, we effectively hoist all
1403// loads in the group to the location of the first load in program order. When
1404// generating code for an interleaved store group, we sink all stores to the
1405// location of the last store. This code motion can change the order of load
1406// and store instructions and may break dependences.
1407//
1408// The code generation strategy mentioned above ensures that we won't violate
1409// any write-after-read (WAR) dependences.
1410//
1411// E.g., for the WAR dependence: a = A[i]; // (1)
1412// A[i] = b; // (2)
1413//
1414// The store group of (2) is always inserted at or below (2), and the load
1415// group of (1) is always inserted at or above (1). Thus, the instructions will
1416// never be reordered. All other dependences are checked to ensure the
1417// correctness of the instruction reordering.
1418//
1419// The algorithm visits all memory accesses in the loop in bottom-up program
1420// order. Program order is established by traversing the blocks in the loop in
1421// reverse postorder when collecting the accesses.
1422//
1423// We visit the memory accesses in bottom-up order because it can simplify the
1424// construction of store groups in the presence of write-after-write (WAW)
1425// dependences.
1426//
1427// E.g., for the WAW dependence: A[i] = a; // (1)
1428// A[i] = b; // (2)
1429// A[i + 1] = c; // (3)
1430//
1431// We will first create a store group with (3) and (2). (1) can't be added to
1432// this group because it and (2) are dependent. However, (1) can be grouped
1433// with other accesses that may precede it in program order. Note that a
1434// bottom-up order does not imply that WAW dependences should not be checked.
1436 bool EnablePredicatedInterleavedMemAccesses) {
1437 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1438 const auto &Strides = LAI->getSymbolicStrides();
1439
1440 // Holds all accesses with a constant stride.
1442 collectConstStrideAccesses(AccessStrideInfo, Strides);
1443
1444 if (AccessStrideInfo.empty())
1445 return;
1446
1447 // Collect the dependences in the loop.
1448 collectDependences();
1449
1450 // Holds all interleaved store groups temporarily.
1452 // Holds all interleaved load groups temporarily.
1454 // Groups added to this set cannot have new members added.
1455 SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;
1456
1457 // Search in bottom-up program order for pairs of accesses (A and B) that can
1458 // form interleaved load or store groups. In the algorithm below, access A
1459 // precedes access B in program order. We initialize a group for B in the
1460 // outer loop of the algorithm, and then in the inner loop, we attempt to
1461 // insert each A into B's group if:
1462 //
1463 // 1. A and B have the same stride,
1464 // 2. A and B have the same memory object size, and
1465 // 3. A belongs in B's group according to its distance from B.
1466 //
1467 // Special care is taken to ensure group formation will not break any
1468 // dependences.
1469 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1470 BI != E; ++BI) {
1471 Instruction *B = BI->first;
1472 StrideDescriptor DesB = BI->second;
1473
1474 // Initialize a group for B if it has an allowable stride. Even if we don't
1475 // create a group for B, we continue with the bottom-up algorithm to ensure
1476 // we don't break any of B's dependences.
1477 InterleaveGroup<Instruction> *GroupB = nullptr;
1478 if (isStrided(DesB.Stride) &&
1479 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1480 GroupB = getInterleaveGroup(B);
1481 if (!GroupB) {
1482 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1483 << '\n');
1484 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1485 if (B->mayWriteToMemory())
1486 StoreGroups.insert(GroupB);
1487 else
1488 LoadGroups.insert(GroupB);
1489 }
1490 }
1491
1492 for (auto AI = std::next(BI); AI != E; ++AI) {
1493 Instruction *A = AI->first;
1494 StrideDescriptor DesA = AI->second;
1495
1496 // Our code motion strategy implies that we can't have dependences
1497 // between accesses in an interleaved group and other accesses located
1498 // between the first and last member of the group. Note that this also
1499 // means that a group can't have more than one member at a given offset.
1500 // The accesses in a group can have dependences with other accesses, but
1501 // we must ensure we don't extend the boundaries of the group such that
1502 // we encompass those dependent accesses.
1503 //
1504 // For example, assume we have the sequence of accesses shown below in a
1505 // stride-2 loop:
1506 //
1507 // (1, 2) is a group | A[i] = a; // (1)
1508 // | A[i-1] = b; // (2) |
1509 // A[i-3] = c; // (3)
1510 // A[i] = d; // (4) | (2, 4) is not a group
1511 //
1512 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1513 // but not with (4). If we did, the dependent access (3) would be within
1514 // the boundaries of the (2, 4) group.
1515 auto DependentMember = [&](InterleaveGroup<Instruction> *Group,
1516 StrideEntry *A) -> Instruction * {
1517 for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {
1518 Instruction *MemberOfGroupB = Group->getMember(Index);
1519 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1520 A, &*AccessStrideInfo.find(MemberOfGroupB)))
1521 return MemberOfGroupB;
1522 }
1523 return nullptr;
1524 };
1525
1526 auto GroupA = getInterleaveGroup(A);
1527 // If A is a load, dependencies are tolerable, there's nothing to do here.
1528 // If both A and B belong to the same (store) group, they are independent,
1529 // even if dependencies have not been recorded.
1530 // If both GroupA and GroupB are null, there's nothing to do here.
1531 if (A->mayWriteToMemory() && GroupA != GroupB) {
1532 Instruction *DependentInst = nullptr;
1533 // If GroupB is a load group, we have to compare AI against all
1534 // members of GroupB because if any load within GroupB has a dependency
1535 // on AI, we need to mark GroupB as complete and also release the
1536 // store GroupA (if A belongs to one). The former prevents incorrect
1537 // hoisting of load B above store A while the latter prevents incorrect
1538 // sinking of store A below load B.
1539 if (GroupB && LoadGroups.contains(GroupB))
1540 DependentInst = DependentMember(GroupB, &*AI);
1541 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1542 DependentInst = B;
1543
1544 if (DependentInst) {
1545 // A has a store dependence on B (or on some load within GroupB) and
1546 // is part of a store group. Release A's group to prevent illegal
1547 // sinking of A below B. A will then be free to form another group
1548 // with instructions that precede it.
1549 if (GroupA && StoreGroups.contains(GroupA)) {
1550 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1551 "dependence between "
1552 << *A << " and " << *DependentInst << '\n');
1553 StoreGroups.remove(GroupA);
1554 releaseGroup(GroupA);
1555 }
1556 // If B is a load and part of an interleave group, no earlier loads
1557 // can be added to B's interleave group, because this would mean the
1558 // DependentInst would move across store A. Mark the interleave group
1559 // as complete.
1560 if (GroupB && LoadGroups.contains(GroupB)) {
1561 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
1562 << " as complete.\n");
1563 CompletedLoadGroups.insert(GroupB);
1564 }
1565 }
1566 }
1567 if (CompletedLoadGroups.contains(GroupB)) {
1568 // Skip trying to add A to B, continue to look for other conflicting A's
1569 // in groups to be released.
1570 continue;
1571 }
1572
1573 // At this point, we've checked for illegal code motion. If either A or B
1574 // isn't strided, there's nothing left to do.
1575 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1576 continue;
1577
1578 // Ignore A if it's already in a group or isn't the same kind of memory
1579 // operation as B.
1580 // Note that mayReadFromMemory() isn't mutually exclusive to
1581 // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1582 // here, canVectorizeMemory() should have returned false - except for the
1583 // case we asked for optimization remarks.
1584 if (isInterleaved(A) ||
1585 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1586 (A->mayWriteToMemory() != B->mayWriteToMemory()))
1587 continue;
1588
1589 // Check rules 1 and 2. Ignore A if its stride or size is different from
1590 // that of B.
1591 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1592 continue;
1593
1594 // Ignore A if the memory object of A and B don't belong to the same
1595 // address space
1597 continue;
1598
1599 // Calculate the distance from A to B.
1600 const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1601 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1602 if (!DistToB)
1603 continue;
1604 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1605
1606 // Check rule 3. Ignore A if its distance to B is not a multiple of the
1607 // size.
1608 if (DistanceToB % static_cast<int64_t>(DesB.Size))
1609 continue;
1610
1611 // All members of a predicated interleave-group must have the same predicate,
1612 // and currently must reside in the same BB.
1613 BasicBlock *BlockA = A->getParent();
1614 BasicBlock *BlockB = B->getParent();
1615 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1616 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1617 continue;
1618
1619 // The index of A is the index of B plus A's distance to B in multiples
1620 // of the size.
1621 int IndexA =
1622 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1623
1624 // Try to insert A into B's group.
1625 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {
1626 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1627 << " into the interleave group with" << *B
1628 << '\n');
1629 InterleaveGroupMap[A] = GroupB;
1630
1631 // Set the first load in program order as the insert position.
1632 if (A->mayReadFromMemory())
1633 GroupB->setInsertPos(A);
1634 }
1635 } // Iteration over A accesses.
1636 } // Iteration over B accesses.
1637
1638 auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1639 int Index,
1640 const char *FirstOrLast) -> bool {
1641 Instruction *Member = Group->getMember(Index);
1642 assert(Member && "Group member does not exist");
1643 Value *MemberPtr = getLoadStorePointerOperand(Member);
1644 Type *AccessTy = getLoadStoreType(Member);
1645 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
1646 /*Assume=*/false, /*ShouldCheckWrap=*/true)
1647 .value_or(0))
1648 return false;
1649 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1650 << FirstOrLast
1651 << " group member potentially pointer-wrapping.\n");
1652 releaseGroup(Group);
1653 return true;
1654 };
1655
1656 // Remove interleaved groups with gaps whose memory
1657 // accesses may wrap around. We have to revisit the getPtrStride analysis,
1658 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1659 // not check wrapping (see documentation there).
1660 // FORNOW we use Assume=false;
1661 // TODO: Change to Assume=true but making sure we don't exceed the threshold
1662 // of runtime SCEV assumptions checks (thereby potentially failing to
1663 // vectorize altogether).
1664 // Additional optional optimizations:
1665 // TODO: If we are peeling the loop and we know that the first pointer doesn't
1666 // wrap then we can deduce that all pointers in the group don't wrap.
1667 // This means that we can forcefully peel the loop in order to only have to
1668 // check the first pointer for no-wrap. When we'll change to use Assume=true
1669 // we'll only need at most one runtime check per interleaved group.
1670 for (auto *Group : LoadGroups) {
1671 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1672 // load would wrap around the address space we would do a memory access at
1673 // nullptr even without the transformation.
1674 if (Group->isFull())
1675 continue;
1676
1677 // Case 2: If first and last members of the group don't wrap this implies
1678 // that all the pointers in the group don't wrap.
1679 // So we check only group member 0 (which is always guaranteed to exist),
1680 // and group member Factor - 1; If the latter doesn't exist we rely on
1681 // peeling (if it is a non-reversed access -- see Case 3).
1682 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1683 continue;
1684 if (Group->getMember(Group->getFactor() - 1))
1685 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");
1686 else {
1687 // Case 3: A non-reversed interleaved load group with gaps: We need
1688 // to execute at least one scalar epilogue iteration. This will ensure
1689 // we don't speculatively access memory out-of-bounds. We only need
1690 // to look for a member at index factor - 1, since every group must have
1691 // a member at index zero.
1692 if (Group->isReverse()) {
1693 LLVM_DEBUG(
1694 dbgs() << "LV: Invalidate candidate interleaved group due to "
1695 "a reverse access with gaps.\n");
1696 releaseGroup(Group);
1697 continue;
1698 }
1699 LLVM_DEBUG(
1700 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1701 RequiresScalarEpilogue = true;
1702 }
1703 }
1704
1705 for (auto *Group : StoreGroups) {
1706 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1707 // store would wrap around the address space we would do a memory access at
1708 // nullptr even without the transformation.
1709 if (Group->isFull())
1710 continue;
1711
1712 // Interleave-store-group with gaps is implemented using masked wide store.
1713 // Remove interleaved store groups with gaps if
1714 // masked-interleaved-accesses are not enabled by the target.
1715 if (!EnablePredicatedInterleavedMemAccesses) {
1716 LLVM_DEBUG(
1717 dbgs() << "LV: Invalidate candidate interleaved store group due "
1718 "to gaps.\n");
1719 releaseGroup(Group);
1720 continue;
1721 }
1722
1723 // Case 2: If first and last members of the group don't wrap this implies
1724 // that all the pointers in the group don't wrap.
1725 // So we check only group member 0 (which is always guaranteed to exist),
1726 // and the last group member. Case 3 (scalar epilog) is not relevant for
1727 // stores with gaps, which are implemented with masked-store (rather than
1728 // speculative access, as in loads).
1729 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1730 continue;
1731 for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1732 if (Group->getMember(Index)) {
1733 InvalidateGroupIfMemberMayWrap(Group, Index, "last");
1734 break;
1735 }
1736 }
1737}
1738
1740 // If no group had triggered the requirement to create an epilogue loop,
1741 // there is nothing to do.
1743 return;
1744
1745 // Release groups requiring scalar epilogues. Note that this also removes them
1746 // from InterleaveGroups.
1747 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {
1748 if (!Group->requiresScalarEpilogue())
1749 return false;
1750 LLVM_DEBUG(
1751 dbgs()
1752 << "LV: Invalidate candidate interleaved group due to gaps that "
1753 "require a scalar epilogue (not allowed under optsize) and cannot "
1754 "be masked (not enabled). \n");
1755 releaseGroupWithoutRemovingFromSet(Group);
1756 return true;
1757 });
1758 assert(ReleasedGroup && "At least one group must be invalidated, as a "
1759 "scalar epilogue was required");
1760 (void)ReleasedGroup;
1761 RequiresScalarEpilogue = false;
1762}
1763
1764template <typename InstT>
1765void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1766 llvm_unreachable("addMetadata can only be used for Instruction");
1767}
1768
1769namespace llvm {
1770template <>
1775} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1415
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Metadata node.
Definition Metadata.h:1078
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1440
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
static LLVM_ABI MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)
Combines A and B according to MMRA semantics.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
reverse_iterator rend()
Definition MapVector.h:74
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
reverse_iterator rbegin()
Definition MapVector.h:70
Root of the metadata hierarchy.
Definition Metadata.h:64
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a constant integer value.
const APInt & getAPInt() const
bool remove(const value_type &X)
Remove an item from the set vector.
Definition SetVector.h:181
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
ArrayRef< Type * > subtypes() const
Definition Type.h:365
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2163
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned M0(unsigned Val)
Definition VE.h:376
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1407
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2156
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872