21#define DEBUG_TYPE "riscvtti"
24 "riscv-v-register-bit-width-lmul",
26 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
27 "by autovectorized code. Fractional LMULs are not supported."),
33 "Overrides result used for getMaximumVF query which is used "
34 "exclusively by SLP vectorizer."),
40 "getIntImmCost can only estimate cost of materialising integers");
56 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
57 if (!BO || !BO->hasOneUse())
60 if (BO->getOpcode() != Instruction::Shl)
63 if (!isa<ConstantInt>(BO->getOperand(1)))
66 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
71 if (ShAmt == Trailing)
83 "getIntImmCost can only estimate cost of materialising integers");
91 bool Takes12BitImm =
false;
92 unsigned ImmArgIdx = ~0U;
95 case Instruction::GetElementPtr:
100 case Instruction::And:
102 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
105 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
108 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
110 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
113 Takes12BitImm =
true;
115 case Instruction::Add:
116 Takes12BitImm =
true;
118 case Instruction::Or:
119 case Instruction::Xor:
121 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
123 Takes12BitImm =
true;
125 case Instruction::Mul:
127 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
130 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
133 Takes12BitImm =
true;
135 case Instruction::Sub:
136 case Instruction::Shl:
137 case Instruction::LShr:
138 case Instruction::AShr:
139 Takes12BitImm =
true;
150 if (Imm.getSignificantBits() <= 64 &&
186 case Intrinsic::vector_reduce_mul:
187 case Intrinsic::vector_reduce_fmul:
243 return cast<VectorType>(
EVT(IndexVT).getTypeForEVT(
C));
258 if (isa<FixedVectorType>(Tp)) {
263 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
264 MVT EltTp = LT.second.getVectorElementType();
277 if (Mask[0] == 0 || Mask[0] == 1) {
281 if (
equal(DeinterleaveMask, Mask))
288 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
289 (LT.second.getScalarSizeInBits() != 8 ||
290 LT.second.getVectorNumElements() <= 256)) {
302 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
303 (LT.second.getScalarSizeInBits() != 8 ||
304 LT.second.getVectorNumElements() <= 256)) {
319 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
320 LT.second.isFixedLengthVector() &&
321 LT.second.getVectorElementType().getSizeInBits() ==
323 LT.second.getVectorNumElements() <
324 cast<FixedVectorType>(Tp)->getNumElements() &&
326 cast<FixedVectorType>(Tp)->getNumElements()) ==
327 static_cast<unsigned>(*LT.first.getValue())) {
328 unsigned NumRegs = *LT.first.getValue();
329 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
334 for (
unsigned I = 0;
I < NumRegs; ++
I) {
335 bool IsSingleVector =
true;
338 I == NumRegs - 1 ? Mask.size() % SubVF : SubVF),
339 SubMask.
begin(), [&](
int I) {
340 bool SingleSubVector = I / VF == 0;
341 IsSingleVector &= SingleSubVector;
342 return (SingleSubVector ? 0 : 1) * SubVF + I % VF;
346 SubVecTy, SubMask,
CostKind, 0,
nullptr);
382 Instruction::InsertElement);
383 if (LT.second.getScalarSizeInBits() == 1) {
435 if (LT.second.isFixedLengthVector())
437 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
441 return LT.first * (LenCost + GatherCost + ExtendCost);
462 bool UseMaskForCond,
bool UseMaskForGaps) {
463 if (isa<ScalableVectorType>(VecTy))
465 auto *FVTy = cast<FixedVectorType>(VecTy);
468 unsigned VF = FVTy->getNumElements() / Factor;
474 if (!UseMaskForCond && !UseMaskForGaps &&
475 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
478 if (LT.second.isFixedLengthVector()) {
480 LT.second.getVectorNumElements());
489 return LT.first + LegalMemCost;
499 if (
Opcode == Instruction::Load) {
501 for (
unsigned Index : Indices) {
526 UseMaskForCond, UseMaskForGaps);
528 assert(
Opcode == Instruction::Store &&
"Opcode must be a store");
535 return MemCost + ShuffleCost;
545 if ((
Opcode == Instruction::Load &&
547 (
Opcode == Instruction::Store &&
555 auto &VTy = *cast<VectorType>(DataTy);
558 {TTI::OK_AnyValue, TTI::OP_None},
I);
559 unsigned NumLoads = getEstimatedVLFor(&VTy);
560 return NumLoads * MemOpCost;
568 {Intrinsic::floor, MVT::v2f32, 9},
569 {Intrinsic::floor, MVT::v4f32, 9},
570 {Intrinsic::floor, MVT::v8f32, 9},
571 {Intrinsic::floor, MVT::v16f32, 9},
572 {Intrinsic::floor, MVT::nxv1f32, 9},
573 {Intrinsic::floor, MVT::nxv2f32, 9},
574 {Intrinsic::floor, MVT::nxv4f32, 9},
575 {Intrinsic::floor, MVT::nxv8f32, 9},
576 {Intrinsic::floor, MVT::nxv16f32, 9},
577 {Intrinsic::floor, MVT::v2f64, 9},
578 {Intrinsic::floor, MVT::v4f64, 9},
579 {Intrinsic::floor, MVT::v8f64, 9},
580 {Intrinsic::floor, MVT::v16f64, 9},
581 {Intrinsic::floor, MVT::nxv1f64, 9},
582 {Intrinsic::floor, MVT::nxv2f64, 9},
583 {Intrinsic::floor, MVT::nxv4f64, 9},
584 {Intrinsic::floor, MVT::nxv8f64, 9},
585 {Intrinsic::ceil, MVT::v2f32, 9},
586 {Intrinsic::ceil, MVT::v4f32, 9},
587 {Intrinsic::ceil, MVT::v8f32, 9},
588 {Intrinsic::ceil, MVT::v16f32, 9},
589 {Intrinsic::ceil, MVT::nxv1f32, 9},
590 {Intrinsic::ceil, MVT::nxv2f32, 9},
591 {Intrinsic::ceil, MVT::nxv4f32, 9},
592 {Intrinsic::ceil, MVT::nxv8f32, 9},
593 {Intrinsic::ceil, MVT::nxv16f32, 9},
594 {Intrinsic::ceil, MVT::v2f64, 9},
595 {Intrinsic::ceil, MVT::v4f64, 9},
596 {Intrinsic::ceil, MVT::v8f64, 9},
597 {Intrinsic::ceil, MVT::v16f64, 9},
598 {Intrinsic::ceil, MVT::nxv1f64, 9},
599 {Intrinsic::ceil, MVT::nxv2f64, 9},
600 {Intrinsic::ceil, MVT::nxv4f64, 9},
601 {Intrinsic::ceil, MVT::nxv8f64, 9},
602 {Intrinsic::trunc, MVT::v2f32, 7},
603 {Intrinsic::trunc, MVT::v4f32, 7},
604 {Intrinsic::trunc, MVT::v8f32, 7},
605 {Intrinsic::trunc, MVT::v16f32, 7},
606 {Intrinsic::trunc, MVT::nxv1f32, 7},
607 {Intrinsic::trunc, MVT::nxv2f32, 7},
608 {Intrinsic::trunc, MVT::nxv4f32, 7},
609 {Intrinsic::trunc, MVT::nxv8f32, 7},
610 {Intrinsic::trunc, MVT::nxv16f32, 7},
611 {Intrinsic::trunc, MVT::v2f64, 7},
612 {Intrinsic::trunc, MVT::v4f64, 7},
613 {Intrinsic::trunc, MVT::v8f64, 7},
614 {Intrinsic::trunc, MVT::v16f64, 7},
615 {Intrinsic::trunc, MVT::nxv1f64, 7},
616 {Intrinsic::trunc, MVT::nxv2f64, 7},
617 {Intrinsic::trunc, MVT::nxv4f64, 7},
618 {Intrinsic::trunc, MVT::nxv8f64, 7},
619 {Intrinsic::round, MVT::v2f32, 9},
620 {Intrinsic::round, MVT::v4f32, 9},
621 {Intrinsic::round, MVT::v8f32, 9},
622 {Intrinsic::round, MVT::v16f32, 9},
623 {Intrinsic::round, MVT::nxv1f32, 9},
624 {Intrinsic::round, MVT::nxv2f32, 9},
625 {Intrinsic::round, MVT::nxv4f32, 9},
626 {Intrinsic::round, MVT::nxv8f32, 9},
627 {Intrinsic::round, MVT::nxv16f32, 9},
628 {Intrinsic::round, MVT::v2f64, 9},
629 {Intrinsic::round, MVT::v4f64, 9},
630 {Intrinsic::round, MVT::v8f64, 9},
631 {Intrinsic::round, MVT::v16f64, 9},
632 {Intrinsic::round, MVT::nxv1f64, 9},
633 {Intrinsic::round, MVT::nxv2f64, 9},
634 {Intrinsic::round, MVT::nxv4f64, 9},
635 {Intrinsic::round, MVT::nxv8f64, 9},
636 {Intrinsic::roundeven, MVT::v2f32, 9},
637 {Intrinsic::roundeven, MVT::v4f32, 9},
638 {Intrinsic::roundeven, MVT::v8f32, 9},
639 {Intrinsic::roundeven, MVT::v16f32, 9},
640 {Intrinsic::roundeven, MVT::nxv1f32, 9},
641 {Intrinsic::roundeven, MVT::nxv2f32, 9},
642 {Intrinsic::roundeven, MVT::nxv4f32, 9},
643 {Intrinsic::roundeven, MVT::nxv8f32, 9},
644 {Intrinsic::roundeven, MVT::nxv16f32, 9},
645 {Intrinsic::roundeven, MVT::v2f64, 9},
646 {Intrinsic::roundeven, MVT::v4f64, 9},
647 {Intrinsic::roundeven, MVT::v8f64, 9},
648 {Intrinsic::roundeven, MVT::v16f64, 9},
649 {Intrinsic::roundeven, MVT::nxv1f64, 9},
650 {Intrinsic::roundeven, MVT::nxv2f64, 9},
651 {Intrinsic::roundeven, MVT::nxv4f64, 9},
652 {Intrinsic::roundeven, MVT::nxv8f64, 9},
653 {Intrinsic::rint, MVT::v2f32, 7},
654 {Intrinsic::rint, MVT::v4f32, 7},
655 {Intrinsic::rint, MVT::v8f32, 7},
656 {Intrinsic::rint, MVT::v16f32, 7},
657 {Intrinsic::rint, MVT::nxv1f32, 7},
658 {Intrinsic::rint, MVT::nxv2f32, 7},
659 {Intrinsic::rint, MVT::nxv4f32, 7},
660 {Intrinsic::rint, MVT::nxv8f32, 7},
661 {Intrinsic::rint, MVT::nxv16f32, 7},
662 {Intrinsic::rint, MVT::v2f64, 7},
663 {Intrinsic::rint, MVT::v4f64, 7},
664 {Intrinsic::rint, MVT::v8f64, 7},
665 {Intrinsic::rint, MVT::v16f64, 7},
666 {Intrinsic::rint, MVT::nxv1f64, 7},
667 {Intrinsic::rint, MVT::nxv2f64, 7},
668 {Intrinsic::rint, MVT::nxv4f64, 7},
669 {Intrinsic::rint, MVT::nxv8f64, 7},
670 {Intrinsic::lrint, MVT::v2i32, 1},
671 {Intrinsic::lrint, MVT::v4i32, 1},
672 {Intrinsic::lrint, MVT::v8i32, 1},
673 {Intrinsic::lrint, MVT::v16i32, 1},
674 {Intrinsic::lrint, MVT::nxv1i32, 1},
675 {Intrinsic::lrint, MVT::nxv2i32, 1},
676 {Intrinsic::lrint, MVT::nxv4i32, 1},
677 {Intrinsic::lrint, MVT::nxv8i32, 1},
678 {Intrinsic::lrint, MVT::nxv16i32, 1},
679 {Intrinsic::lrint, MVT::v2i64, 1},
680 {Intrinsic::lrint, MVT::v4i64, 1},
681 {Intrinsic::lrint, MVT::v8i64, 1},
682 {Intrinsic::lrint, MVT::v16i64, 1},
683 {Intrinsic::lrint, MVT::nxv1i64, 1},
684 {Intrinsic::lrint, MVT::nxv2i64, 1},
685 {Intrinsic::lrint, MVT::nxv4i64, 1},
686 {Intrinsic::lrint, MVT::nxv8i64, 1},
687 {Intrinsic::llrint, MVT::v2i64, 1},
688 {Intrinsic::llrint, MVT::v4i64, 1},
689 {Intrinsic::llrint, MVT::v8i64, 1},
690 {Intrinsic::llrint, MVT::v16i64, 1},
691 {Intrinsic::llrint, MVT::nxv1i64, 1},
692 {Intrinsic::llrint, MVT::nxv2i64, 1},
693 {Intrinsic::llrint, MVT::nxv4i64, 1},
694 {Intrinsic::llrint, MVT::nxv8i64, 1},
695 {Intrinsic::nearbyint, MVT::v2f32, 9},
696 {Intrinsic::nearbyint, MVT::v4f32, 9},
697 {Intrinsic::nearbyint, MVT::v8f32, 9},
698 {Intrinsic::nearbyint, MVT::v16f32, 9},
699 {Intrinsic::nearbyint, MVT::nxv1f32, 9},
700 {Intrinsic::nearbyint, MVT::nxv2f32, 9},
701 {Intrinsic::nearbyint, MVT::nxv4f32, 9},
702 {Intrinsic::nearbyint, MVT::nxv8f32, 9},
703 {Intrinsic::nearbyint, MVT::nxv16f32, 9},
704 {Intrinsic::nearbyint, MVT::v2f64, 9},
705 {Intrinsic::nearbyint, MVT::v4f64, 9},
706 {Intrinsic::nearbyint, MVT::v8f64, 9},
707 {Intrinsic::nearbyint, MVT::v16f64, 9},
708 {Intrinsic::nearbyint, MVT::nxv1f64, 9},
709 {Intrinsic::nearbyint, MVT::nxv2f64, 9},
710 {Intrinsic::nearbyint, MVT::nxv4f64, 9},
711 {Intrinsic::nearbyint, MVT::nxv8f64, 9},
712 {Intrinsic::bswap, MVT::v2i16, 3},
713 {Intrinsic::bswap, MVT::v4i16, 3},
714 {Intrinsic::bswap, MVT::v8i16, 3},
715 {Intrinsic::bswap, MVT::v16i16, 3},
716 {Intrinsic::bswap, MVT::nxv1i16, 3},
717 {Intrinsic::bswap, MVT::nxv2i16, 3},
718 {Intrinsic::bswap, MVT::nxv4i16, 3},
719 {Intrinsic::bswap, MVT::nxv8i16, 3},
720 {Intrinsic::bswap, MVT::nxv16i16, 3},
721 {Intrinsic::bswap, MVT::v2i32, 12},
722 {Intrinsic::bswap, MVT::v4i32, 12},
723 {Intrinsic::bswap, MVT::v8i32, 12},
724 {Intrinsic::bswap, MVT::v16i32, 12},
725 {Intrinsic::bswap, MVT::nxv1i32, 12},
726 {Intrinsic::bswap, MVT::nxv2i32, 12},
727 {Intrinsic::bswap, MVT::nxv4i32, 12},
728 {Intrinsic::bswap, MVT::nxv8i32, 12},
729 {Intrinsic::bswap, MVT::nxv16i32, 12},
730 {Intrinsic::bswap, MVT::v2i64, 31},
731 {Intrinsic::bswap, MVT::v4i64, 31},
732 {Intrinsic::bswap, MVT::v8i64, 31},
733 {Intrinsic::bswap, MVT::v16i64, 31},
734 {Intrinsic::bswap, MVT::nxv1i64, 31},
735 {Intrinsic::bswap, MVT::nxv2i64, 31},
736 {Intrinsic::bswap, MVT::nxv4i64, 31},
737 {Intrinsic::bswap, MVT::nxv8i64, 31},
738 {Intrinsic::vp_bswap, MVT::v2i16, 3},
739 {Intrinsic::vp_bswap, MVT::v4i16, 3},
740 {Intrinsic::vp_bswap, MVT::v8i16, 3},
741 {Intrinsic::vp_bswap, MVT::v16i16, 3},
742 {Intrinsic::vp_bswap, MVT::nxv1i16, 3},
743 {Intrinsic::vp_bswap, MVT::nxv2i16, 3},
744 {Intrinsic::vp_bswap, MVT::nxv4i16, 3},
745 {Intrinsic::vp_bswap, MVT::nxv8i16, 3},
746 {Intrinsic::vp_bswap, MVT::nxv16i16, 3},
747 {Intrinsic::vp_bswap, MVT::v2i32, 12},
748 {Intrinsic::vp_bswap, MVT::v4i32, 12},
749 {Intrinsic::vp_bswap, MVT::v8i32, 12},
750 {Intrinsic::vp_bswap, MVT::v16i32, 12},
751 {Intrinsic::vp_bswap, MVT::nxv1i32, 12},
752 {Intrinsic::vp_bswap, MVT::nxv2i32, 12},
753 {Intrinsic::vp_bswap, MVT::nxv4i32, 12},
754 {Intrinsic::vp_bswap, MVT::nxv8i32, 12},
755 {Intrinsic::vp_bswap, MVT::nxv16i32, 12},
756 {Intrinsic::vp_bswap, MVT::v2i64, 31},
757 {Intrinsic::vp_bswap, MVT::v4i64, 31},
758 {Intrinsic::vp_bswap, MVT::v8i64, 31},
759 {Intrinsic::vp_bswap, MVT::v16i64, 31},
760 {Intrinsic::vp_bswap, MVT::nxv1i64, 31},
761 {Intrinsic::vp_bswap, MVT::nxv2i64, 31},
762 {Intrinsic::vp_bswap, MVT::nxv4i64, 31},
763 {Intrinsic::vp_bswap, MVT::nxv8i64, 31},
764 {Intrinsic::vp_fshl, MVT::v2i8, 7},
765 {Intrinsic::vp_fshl, MVT::v4i8, 7},
766 {Intrinsic::vp_fshl, MVT::v8i8, 7},
767 {Intrinsic::vp_fshl, MVT::v16i8, 7},
768 {Intrinsic::vp_fshl, MVT::nxv1i8, 7},
769 {Intrinsic::vp_fshl, MVT::nxv2i8, 7},
770 {Intrinsic::vp_fshl, MVT::nxv4i8, 7},
771 {Intrinsic::vp_fshl, MVT::nxv8i8, 7},
772 {Intrinsic::vp_fshl, MVT::nxv16i8, 7},
773 {Intrinsic::vp_fshl, MVT::nxv32i8, 7},
774 {Intrinsic::vp_fshl, MVT::nxv64i8, 7},
775 {Intrinsic::vp_fshl, MVT::v2i16, 7},
776 {Intrinsic::vp_fshl, MVT::v4i16, 7},
777 {Intrinsic::vp_fshl, MVT::v8i16, 7},
778 {Intrinsic::vp_fshl, MVT::v16i16, 7},
779 {Intrinsic::vp_fshl, MVT::nxv1i16, 7},
780 {Intrinsic::vp_fshl, MVT::nxv2i16, 7},
781 {Intrinsic::vp_fshl, MVT::nxv4i16, 7},
782 {Intrinsic::vp_fshl, MVT::nxv8i16, 7},
783 {Intrinsic::vp_fshl, MVT::nxv16i16, 7},
784 {Intrinsic::vp_fshl, MVT::nxv32i16, 7},
785 {Intrinsic::vp_fshl, MVT::v2i32, 7},
786 {Intrinsic::vp_fshl, MVT::v4i32, 7},
787 {Intrinsic::vp_fshl, MVT::v8i32, 7},
788 {Intrinsic::vp_fshl, MVT::v16i32, 7},
789 {Intrinsic::vp_fshl, MVT::nxv1i32, 7},
790 {Intrinsic::vp_fshl, MVT::nxv2i32, 7},
791 {Intrinsic::vp_fshl, MVT::nxv4i32, 7},
792 {Intrinsic::vp_fshl, MVT::nxv8i32, 7},
793 {Intrinsic::vp_fshl, MVT::nxv16i32, 7},
794 {Intrinsic::vp_fshl, MVT::v2i64, 7},
795 {Intrinsic::vp_fshl, MVT::v4i64, 7},
796 {Intrinsic::vp_fshl, MVT::v8i64, 7},
797 {Intrinsic::vp_fshl, MVT::v16i64, 7},
798 {Intrinsic::vp_fshl, MVT::nxv1i64, 7},
799 {Intrinsic::vp_fshl, MVT::nxv2i64, 7},
800 {Intrinsic::vp_fshl, MVT::nxv4i64, 7},
801 {Intrinsic::vp_fshl, MVT::nxv8i64, 7},
802 {Intrinsic::vp_fshr, MVT::v2i8, 7},
803 {Intrinsic::vp_fshr, MVT::v4i8, 7},
804 {Intrinsic::vp_fshr, MVT::v8i8, 7},
805 {Intrinsic::vp_fshr, MVT::v16i8, 7},
806 {Intrinsic::vp_fshr, MVT::nxv1i8, 7},
807 {Intrinsic::vp_fshr, MVT::nxv2i8, 7},
808 {Intrinsic::vp_fshr, MVT::nxv4i8, 7},
809 {Intrinsic::vp_fshr, MVT::nxv8i8, 7},
810 {Intrinsic::vp_fshr, MVT::nxv16i8, 7},
811 {Intrinsic::vp_fshr, MVT::nxv32i8, 7},
812 {Intrinsic::vp_fshr, MVT::nxv64i8, 7},
813 {Intrinsic::vp_fshr, MVT::v2i16, 7},
814 {Intrinsic::vp_fshr, MVT::v4i16, 7},
815 {Intrinsic::vp_fshr, MVT::v8i16, 7},
816 {Intrinsic::vp_fshr, MVT::v16i16, 7},
817 {Intrinsic::vp_fshr, MVT::nxv1i16, 7},
818 {Intrinsic::vp_fshr, MVT::nxv2i16, 7},
819 {Intrinsic::vp_fshr, MVT::nxv4i16, 7},
820 {Intrinsic::vp_fshr, MVT::nxv8i16, 7},
821 {Intrinsic::vp_fshr, MVT::nxv16i16, 7},
822 {Intrinsic::vp_fshr, MVT::nxv32i16, 7},
823 {Intrinsic::vp_fshr, MVT::v2i32, 7},
824 {Intrinsic::vp_fshr, MVT::v4i32, 7},
825 {Intrinsic::vp_fshr, MVT::v8i32, 7},
826 {Intrinsic::vp_fshr, MVT::v16i32, 7},
827 {Intrinsic::vp_fshr, MVT::nxv1i32, 7},
828 {Intrinsic::vp_fshr, MVT::nxv2i32, 7},
829 {Intrinsic::vp_fshr, MVT::nxv4i32, 7},
830 {Intrinsic::vp_fshr, MVT::nxv8i32, 7},
831 {Intrinsic::vp_fshr, MVT::nxv16i32, 7},
832 {Intrinsic::vp_fshr, MVT::v2i64, 7},
833 {Intrinsic::vp_fshr, MVT::v4i64, 7},
834 {Intrinsic::vp_fshr, MVT::v8i64, 7},
835 {Intrinsic::vp_fshr, MVT::v16i64, 7},
836 {Intrinsic::vp_fshr, MVT::nxv1i64, 7},
837 {Intrinsic::vp_fshr, MVT::nxv2i64, 7},
838 {Intrinsic::vp_fshr, MVT::nxv4i64, 7},
839 {Intrinsic::vp_fshr, MVT::nxv8i64, 7},
840 {Intrinsic::bitreverse, MVT::v2i8, 17},
841 {Intrinsic::bitreverse, MVT::v4i8, 17},
842 {Intrinsic::bitreverse, MVT::v8i8, 17},
843 {Intrinsic::bitreverse, MVT::v16i8, 17},
844 {Intrinsic::bitreverse, MVT::nxv1i8, 17},
845 {Intrinsic::bitreverse, MVT::nxv2i8, 17},
846 {Intrinsic::bitreverse, MVT::nxv4i8, 17},
847 {Intrinsic::bitreverse, MVT::nxv8i8, 17},
848 {Intrinsic::bitreverse, MVT::nxv16i8, 17},
849 {Intrinsic::bitreverse, MVT::v2i16, 24},
850 {Intrinsic::bitreverse, MVT::v4i16, 24},
851 {Intrinsic::bitreverse, MVT::v8i16, 24},
852 {Intrinsic::bitreverse, MVT::v16i16, 24},
853 {Intrinsic::bitreverse, MVT::nxv1i16, 24},
854 {Intrinsic::bitreverse, MVT::nxv2i16, 24},
855 {Intrinsic::bitreverse, MVT::nxv4i16, 24},
856 {Intrinsic::bitreverse, MVT::nxv8i16, 24},
857 {Intrinsic::bitreverse, MVT::nxv16i16, 24},
858 {Intrinsic::bitreverse, MVT::v2i32, 33},
859 {Intrinsic::bitreverse, MVT::v4i32, 33},
860 {Intrinsic::bitreverse, MVT::v8i32, 33},
861 {Intrinsic::bitreverse, MVT::v16i32, 33},
862 {Intrinsic::bitreverse, MVT::nxv1i32, 33},
863 {Intrinsic::bitreverse, MVT::nxv2i32, 33},
864 {Intrinsic::bitreverse, MVT::nxv4i32, 33},
865 {Intrinsic::bitreverse, MVT::nxv8i32, 33},
866 {Intrinsic::bitreverse, MVT::nxv16i32, 33},
867 {Intrinsic::bitreverse, MVT::v2i64, 52},
868 {Intrinsic::bitreverse, MVT::v4i64, 52},
869 {Intrinsic::bitreverse, MVT::v8i64, 52},
870 {Intrinsic::bitreverse, MVT::v16i64, 52},
871 {Intrinsic::bitreverse, MVT::nxv1i64, 52},
872 {Intrinsic::bitreverse, MVT::nxv2i64, 52},
873 {Intrinsic::bitreverse, MVT::nxv4i64, 52},
874 {Intrinsic::bitreverse, MVT::nxv8i64, 52},
875 {Intrinsic::vp_bitreverse, MVT::v2i8, 17},
876 {Intrinsic::vp_bitreverse, MVT::v4i8, 17},
877 {Intrinsic::vp_bitreverse, MVT::v8i8, 17},
878 {Intrinsic::vp_bitreverse, MVT::v16i8, 17},
879 {Intrinsic::vp_bitreverse, MVT::nxv1i8, 17},
880 {Intrinsic::vp_bitreverse, MVT::nxv2i8, 17},
881 {Intrinsic::vp_bitreverse, MVT::nxv4i8, 17},
882 {Intrinsic::vp_bitreverse, MVT::nxv8i8, 17},
883 {Intrinsic::vp_bitreverse, MVT::nxv16i8, 17},
884 {Intrinsic::vp_bitreverse, MVT::v2i16, 24},
885 {Intrinsic::vp_bitreverse, MVT::v4i16, 24},
886 {Intrinsic::vp_bitreverse, MVT::v8i16, 24},
887 {Intrinsic::vp_bitreverse, MVT::v16i16, 24},
888 {Intrinsic::vp_bitreverse, MVT::nxv1i16, 24},
889 {Intrinsic::vp_bitreverse, MVT::nxv2i16, 24},
890 {Intrinsic::vp_bitreverse, MVT::nxv4i16, 24},
891 {Intrinsic::vp_bitreverse, MVT::nxv8i16, 24},
892 {Intrinsic::vp_bitreverse, MVT::nxv16i16, 24},
893 {Intrinsic::vp_bitreverse, MVT::v2i32, 33},
894 {Intrinsic::vp_bitreverse, MVT::v4i32, 33},
895 {Intrinsic::vp_bitreverse, MVT::v8i32, 33},
896 {Intrinsic::vp_bitreverse, MVT::v16i32, 33},
897 {Intrinsic::vp_bitreverse, MVT::nxv1i32, 33},
898 {Intrinsic::vp_bitreverse, MVT::nxv2i32, 33},
899 {Intrinsic::vp_bitreverse, MVT::nxv4i32, 33},
900 {Intrinsic::vp_bitreverse, MVT::nxv8i32, 33},
901 {Intrinsic::vp_bitreverse, MVT::nxv16i32, 33},
902 {Intrinsic::vp_bitreverse, MVT::v2i64, 52},
903 {Intrinsic::vp_bitreverse, MVT::v4i64, 52},
904 {Intrinsic::vp_bitreverse, MVT::v8i64, 52},
905 {Intrinsic::vp_bitreverse, MVT::v16i64, 52},
906 {Intrinsic::vp_bitreverse, MVT::nxv1i64, 52},
907 {Intrinsic::vp_bitreverse, MVT::nxv2i64, 52},
908 {Intrinsic::vp_bitreverse, MVT::nxv4i64, 52},
909 {Intrinsic::vp_bitreverse, MVT::nxv8i64, 52},
910 {Intrinsic::ctpop, MVT::v2i8, 12},
911 {Intrinsic::ctpop, MVT::v4i8, 12},
912 {Intrinsic::ctpop, MVT::v8i8, 12},
913 {Intrinsic::ctpop, MVT::v16i8, 12},
914 {Intrinsic::ctpop, MVT::nxv1i8, 12},
915 {Intrinsic::ctpop, MVT::nxv2i8, 12},
916 {Intrinsic::ctpop, MVT::nxv4i8, 12},
917 {Intrinsic::ctpop, MVT::nxv8i8, 12},
918 {Intrinsic::ctpop, MVT::nxv16i8, 12},
919 {Intrinsic::ctpop, MVT::v2i16, 19},
920 {Intrinsic::ctpop, MVT::v4i16, 19},
921 {Intrinsic::ctpop, MVT::v8i16, 19},
922 {Intrinsic::ctpop, MVT::v16i16, 19},
923 {Intrinsic::ctpop, MVT::nxv1i16, 19},
924 {Intrinsic::ctpop, MVT::nxv2i16, 19},
925 {Intrinsic::ctpop, MVT::nxv4i16, 19},
926 {Intrinsic::ctpop, MVT::nxv8i16, 19},
927 {Intrinsic::ctpop, MVT::nxv16i16, 19},
928 {Intrinsic::ctpop, MVT::v2i32, 20},
929 {Intrinsic::ctpop, MVT::v4i32, 20},
930 {Intrinsic::ctpop, MVT::v8i32, 20},
931 {Intrinsic::ctpop, MVT::v16i32, 20},
932 {Intrinsic::ctpop, MVT::nxv1i32, 20},
933 {Intrinsic::ctpop, MVT::nxv2i32, 20},
934 {Intrinsic::ctpop, MVT::nxv4i32, 20},
935 {Intrinsic::ctpop, MVT::nxv8i32, 20},
936 {Intrinsic::ctpop, MVT::nxv16i32, 20},
937 {Intrinsic::ctpop, MVT::v2i64, 21},
938 {Intrinsic::ctpop, MVT::v4i64, 21},
939 {Intrinsic::ctpop, MVT::v8i64, 21},
940 {Intrinsic::ctpop, MVT::v16i64, 21},
941 {Intrinsic::ctpop, MVT::nxv1i64, 21},
942 {Intrinsic::ctpop, MVT::nxv2i64, 21},
943 {Intrinsic::ctpop, MVT::nxv4i64, 21},
944 {Intrinsic::ctpop, MVT::nxv8i64, 21},
945 {Intrinsic::vp_ctpop, MVT::v2i8, 12},
946 {Intrinsic::vp_ctpop, MVT::v4i8, 12},
947 {Intrinsic::vp_ctpop, MVT::v8i8, 12},
948 {Intrinsic::vp_ctpop, MVT::v16i8, 12},
949 {Intrinsic::vp_ctpop, MVT::nxv1i8, 12},
950 {Intrinsic::vp_ctpop, MVT::nxv2i8, 12},
951 {Intrinsic::vp_ctpop, MVT::nxv4i8, 12},
952 {Intrinsic::vp_ctpop, MVT::nxv8i8, 12},
953 {Intrinsic::vp_ctpop, MVT::nxv16i8, 12},
954 {Intrinsic::vp_ctpop, MVT::v2i16, 19},
955 {Intrinsic::vp_ctpop, MVT::v4i16, 19},
956 {Intrinsic::vp_ctpop, MVT::v8i16, 19},
957 {Intrinsic::vp_ctpop, MVT::v16i16, 19},
958 {Intrinsic::vp_ctpop, MVT::nxv1i16, 19},
959 {Intrinsic::vp_ctpop, MVT::nxv2i16, 19},
960 {Intrinsic::vp_ctpop, MVT::nxv4i16, 19},
961 {Intrinsic::vp_ctpop, MVT::nxv8i16, 19},
962 {Intrinsic::vp_ctpop, MVT::nxv16i16, 19},
963 {Intrinsic::vp_ctpop, MVT::v2i32, 20},
964 {Intrinsic::vp_ctpop, MVT::v4i32, 20},
965 {Intrinsic::vp_ctpop, MVT::v8i32, 20},
966 {Intrinsic::vp_ctpop, MVT::v16i32, 20},
967 {Intrinsic::vp_ctpop, MVT::nxv1i32, 20},
968 {Intrinsic::vp_ctpop, MVT::nxv2i32, 20},
969 {Intrinsic::vp_ctpop, MVT::nxv4i32, 20},
970 {Intrinsic::vp_ctpop, MVT::nxv8i32, 20},
971 {Intrinsic::vp_ctpop, MVT::nxv16i32, 20},
972 {Intrinsic::vp_ctpop, MVT::v2i64, 21},
973 {Intrinsic::vp_ctpop, MVT::v4i64, 21},
974 {Intrinsic::vp_ctpop, MVT::v8i64, 21},
975 {Intrinsic::vp_ctpop, MVT::v16i64, 21},
976 {Intrinsic::vp_ctpop, MVT::nxv1i64, 21},
977 {Intrinsic::vp_ctpop, MVT::nxv2i64, 21},
978 {Intrinsic::vp_ctpop, MVT::nxv4i64, 21},
979 {Intrinsic::vp_ctpop, MVT::nxv8i64, 21},
980 {Intrinsic::vp_ctlz, MVT::v2i8, 19},
981 {Intrinsic::vp_ctlz, MVT::v4i8, 19},
982 {Intrinsic::vp_ctlz, MVT::v8i8, 19},
983 {Intrinsic::vp_ctlz, MVT::v16i8, 19},
984 {Intrinsic::vp_ctlz, MVT::nxv1i8, 19},
985 {Intrinsic::vp_ctlz, MVT::nxv2i8, 19},
986 {Intrinsic::vp_ctlz, MVT::nxv4i8, 19},
987 {Intrinsic::vp_ctlz, MVT::nxv8i8, 19},
988 {Intrinsic::vp_ctlz, MVT::nxv16i8, 19},
989 {Intrinsic::vp_ctlz, MVT::nxv32i8, 19},
990 {Intrinsic::vp_ctlz, MVT::nxv64i8, 19},
991 {Intrinsic::vp_ctlz, MVT::v2i16, 28},
992 {Intrinsic::vp_ctlz, MVT::v4i16, 28},
993 {Intrinsic::vp_ctlz, MVT::v8i16, 28},
994 {Intrinsic::vp_ctlz, MVT::v16i16, 28},
995 {Intrinsic::vp_ctlz, MVT::nxv1i16, 28},
996 {Intrinsic::vp_ctlz, MVT::nxv2i16, 28},
997 {Intrinsic::vp_ctlz, MVT::nxv4i16, 28},
998 {Intrinsic::vp_ctlz, MVT::nxv8i16, 28},
999 {Intrinsic::vp_ctlz, MVT::nxv16i16, 28},
1000 {Intrinsic::vp_ctlz, MVT::nxv32i16, 28},
1001 {Intrinsic::vp_ctlz, MVT::v2i32, 31},
1002 {Intrinsic::vp_ctlz, MVT::v4i32, 31},
1003 {Intrinsic::vp_ctlz, MVT::v8i32, 31},
1004 {Intrinsic::vp_ctlz, MVT::v16i32, 31},
1005 {Intrinsic::vp_ctlz, MVT::nxv1i32, 31},
1006 {Intrinsic::vp_ctlz, MVT::nxv2i32, 31},
1007 {Intrinsic::vp_ctlz, MVT::nxv4i32, 31},
1008 {Intrinsic::vp_ctlz, MVT::nxv8i32, 31},
1009 {Intrinsic::vp_ctlz, MVT::nxv16i32, 31},
1010 {Intrinsic::vp_ctlz, MVT::v2i64, 35},
1011 {Intrinsic::vp_ctlz, MVT::v4i64, 35},
1012 {Intrinsic::vp_ctlz, MVT::v8i64, 35},
1013 {Intrinsic::vp_ctlz, MVT::v16i64, 35},
1014 {Intrinsic::vp_ctlz, MVT::nxv1i64, 35},
1015 {Intrinsic::vp_ctlz, MVT::nxv2i64, 35},
1016 {Intrinsic::vp_ctlz, MVT::nxv4i64, 35},
1017 {Intrinsic::vp_ctlz, MVT::nxv8i64, 35},
1018 {Intrinsic::vp_cttz, MVT::v2i8, 16},
1019 {Intrinsic::vp_cttz, MVT::v4i8, 16},
1020 {Intrinsic::vp_cttz, MVT::v8i8, 16},
1021 {Intrinsic::vp_cttz, MVT::v16i8, 16},
1022 {Intrinsic::vp_cttz, MVT::nxv1i8, 16},
1023 {Intrinsic::vp_cttz, MVT::nxv2i8, 16},
1024 {Intrinsic::vp_cttz, MVT::nxv4i8, 16},
1025 {Intrinsic::vp_cttz, MVT::nxv8i8, 16},
1026 {Intrinsic::vp_cttz, MVT::nxv16i8, 16},
1027 {Intrinsic::vp_cttz, MVT::nxv32i8, 16},
1028 {Intrinsic::vp_cttz, MVT::nxv64i8, 16},
1029 {Intrinsic::vp_cttz, MVT::v2i16, 23},
1030 {Intrinsic::vp_cttz, MVT::v4i16, 23},
1031 {Intrinsic::vp_cttz, MVT::v8i16, 23},
1032 {Intrinsic::vp_cttz, MVT::v16i16, 23},
1033 {Intrinsic::vp_cttz, MVT::nxv1i16, 23},
1034 {Intrinsic::vp_cttz, MVT::nxv2i16, 23},
1035 {Intrinsic::vp_cttz, MVT::nxv4i16, 23},
1036 {Intrinsic::vp_cttz, MVT::nxv8i16, 23},
1037 {Intrinsic::vp_cttz, MVT::nxv16i16, 23},
1038 {Intrinsic::vp_cttz, MVT::nxv32i16, 23},
1039 {Intrinsic::vp_cttz, MVT::v2i32, 24},
1040 {Intrinsic::vp_cttz, MVT::v4i32, 24},
1041 {Intrinsic::vp_cttz, MVT::v8i32, 24},
1042 {Intrinsic::vp_cttz, MVT::v16i32, 24},
1043 {Intrinsic::vp_cttz, MVT::nxv1i32, 24},
1044 {Intrinsic::vp_cttz, MVT::nxv2i32, 24},
1045 {Intrinsic::vp_cttz, MVT::nxv4i32, 24},
1046 {Intrinsic::vp_cttz, MVT::nxv8i32, 24},
1047 {Intrinsic::vp_cttz, MVT::nxv16i32, 24},
1048 {Intrinsic::vp_cttz, MVT::v2i64, 25},
1049 {Intrinsic::vp_cttz, MVT::v4i64, 25},
1050 {Intrinsic::vp_cttz, MVT::v8i64, 25},
1051 {Intrinsic::vp_cttz, MVT::v16i64, 25},
1052 {Intrinsic::vp_cttz, MVT::nxv1i64, 25},
1053 {Intrinsic::vp_cttz, MVT::nxv2i64, 25},
1054 {Intrinsic::vp_cttz, MVT::nxv4i64, 25},
1055 {Intrinsic::vp_cttz, MVT::nxv8i64, 25},
1060#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
1061 case Intrinsic::VPID: \
1063#include "llvm/IR/VPIntrinsics.def"
1064#undef HELPER_MAP_VPID_TO_VPSD
1073 switch (ICA.
getID()) {
1074 case Intrinsic::ceil:
1075 case Intrinsic::floor:
1076 case Intrinsic::trunc:
1077 case Intrinsic::rint:
1078 case Intrinsic::lrint:
1079 case Intrinsic::llrint:
1080 case Intrinsic::round:
1081 case Intrinsic::roundeven: {
1085 return LT.first * 8;
1088 case Intrinsic::umin:
1089 case Intrinsic::umax:
1090 case Intrinsic::smin:
1091 case Intrinsic::smax: {
1094 (LT.second.isScalarInteger() && ST->hasStdExtZbb()))
1098 case Intrinsic::sadd_sat:
1099 case Intrinsic::ssub_sat:
1100 case Intrinsic::uadd_sat:
1101 case Intrinsic::usub_sat:
1102 case Intrinsic::fabs:
1103 case Intrinsic::sqrt: {
1109 case Intrinsic::ctpop: {
1115 case Intrinsic::abs: {
1120 return LT.first * 2;
1125 case Intrinsic::experimental_stepvector: {
1128 return Cost + (LT.first - 1);
1130 case Intrinsic::vp_rint: {
1135 return Cost * LT.first;
1138 case Intrinsic::vp_nearbyint: {
1143 return Cost * LT.first;
1146 case Intrinsic::vp_ceil:
1147 case Intrinsic::vp_floor:
1148 case Intrinsic::vp_round:
1149 case Intrinsic::vp_roundeven:
1150 case Intrinsic::vp_roundtozero: {
1157 return Cost * LT.first;
1165 ICA.
getID(), LT.second))
1166 return LT.first * Entry->Cost;
1177 if (isa<VectorType>(Dst) && isa<VectorType>(Src)) {
1183 if (Src->getScalarSizeInBits() > ST->
getELen() ||
1184 Dst->getScalarSizeInBits() > ST->
getELen())
1188 assert(ISD &&
"Invalid opcode");
1191 int PowDiff = (int)
Log2_32(Dst->getScalarSizeInBits()) -
1192 (
int)
Log2_32(Src->getScalarSizeInBits());
1196 if (Src->getScalarSizeInBits() == 1) {
1205 if (Dst->getScalarSizeInBits() == 1) {
1217 return std::abs(PowDiff);
1222 if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {
1236 if (std::abs(PowDiff) <= 1)
1240 if (Src->isIntOrIntVectorTy())
1243 return std::abs(PowDiff);
1249unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty) {
1250 if (isa<ScalableVectorType>(Ty)) {
1256 return cast<FixedVectorType>(Ty)->getNumElements();
1274 return (LT.first - 1) + 3;
1280 return (LT.first - 1) + BaseCost;
1282 unsigned VL = getEstimatedVLFor(Ty);
1288 std::optional<FastMathFlags> FMF,
1298 assert(ISD &&
"Invalid opcode");
1307 return (LT.first - 1) + (ISD ==
ISD::AND ? 3 : 2);
1313 return (LT.first - 1) + BaseCost;
1315 unsigned VL = getEstimatedVLFor(Ty);
1317 return (LT.first - 1) + BaseCost + VL;
1333 if (
Opcode != Instruction::Add &&
Opcode != Instruction::FAdd)
1343 return (LT.first - 1) +
1351 if (!isa<VectorType>(Ty))
1363 return getConstantPoolLoadCost(Ty,
CostKind);
1375 if (VT == MVT::Other)
1389 LT.second.isVector())
1391 return Cost + BaseCost;
1420 return LT.first * 3;
1423 return LT.first * 1;
1432 return LT.first * 5;
1438 return LT.first * 3;
1441 if ((
Opcode == Instruction::ICmp ||
Opcode == Instruction::FCmp) &&
1447 return LT.first * 1;
1466 return LT.first * 1;
1482 return Opcode == Instruction::PHI ? 0 : 1;
1493 if (
Opcode != Instruction::ExtractElement &&
1494 Opcode != Instruction::InsertElement)
1501 if (!LT.second.isVector()) {
1502 auto *FixedVecTy = cast<FixedVectorType>(Val);
1510 Type *ElemTy = FixedVecTy->getElementType();
1511 auto NumElems = FixedVecTy->getNumElements();
1517 return Opcode == Instruction::ExtractElement
1518 ? StoreCost * NumElems + LoadCost
1519 : (StoreCost + LoadCost) * NumElems + StoreCost;
1523 if (LT.second.isScalableVector() && !LT.first.isValid())
1533 cast<VectorType>(Val)->getElementCount());
1534 if (
Opcode == Instruction::ExtractElement) {
1540 return ExtendCost + ExtractCost;
1550 return ExtendCost + InsertCost + TruncCost;
1556 unsigned BaseCost = 1;
1558 unsigned SlideCost =
Opcode == Instruction::InsertElement ? 2 : 1;
1563 if (LT.second.isFixedLengthVector()) {
1564 unsigned Width = LT.second.getVectorNumElements();
1571 else if (
Opcode == Instruction::InsertElement)
1596 BaseCost =
Opcode == Instruction::InsertElement ? 3 : 4;
1598 return BaseCost + SlideCost;
1624 if (!LT.second.isVector())
1629 auto getConstantMatCost =
1639 return getConstantPoolLoadCost(Ty,
CostKind);
1645 ConstantMatCost += getConstantMatCost(0, Op1Info);
1647 ConstantMatCost += getConstantMatCost(1, Op2Info);
1665 return ConstantMatCost + TLI->
getLMULCost(LT.second) * LT.first * 1;
1668 return ConstantMatCost +
1691 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1694 if (
Info.isSameBase() && V !=
Base) {
1695 if (
GEP->hasAllConstantIndices())
1702 if (
Info.isUnitStride() &&
1708 GEP->getType()->getPointerAddressSpace()))
1711 {TTI::OK_AnyValue, TTI::OP_None},
1712 {TTI::OK_AnyValue, TTI::OP_None},
1730 if (ST->enableDefaultUnroll())
1740 if (L->getHeader()->getParent()->hasOptSize())
1744 L->getExitingBlocks(ExitingBlocks);
1746 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
1747 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
1751 if (ExitingBlocks.
size() > 2)
1756 if (L->getNumBlocks() > 4)
1766 for (
auto *BB : L->getBlocks()) {
1767 for (
auto &
I : *BB) {
1770 if (
I.getType()->isVectorTy())
1773 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1832 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
static constexpr uint32_t Opcode
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
bool isIntPredicate() const
A parsed version of the target data layout string in and methods for querying it.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideCost(MVT VT) const
Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction for the type VT.
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.