63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
183 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
185 if (ST->
hasAVX() && PreferVectorWidth >= 256)
187 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
228 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
245 assert(ISD &&
"Invalid opcode");
247 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
248 (LT.second.getScalarType() == MVT::i32 ||
249 LT.second.getScalarType() == MVT::i64)) {
251 bool Op1Signed =
false, Op2Signed =
false;
254 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
255 bool SignedMode = Op1Signed || Op2Signed;
260 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
261 LT.second.getScalarType() == MVT::i32) {
263 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
265 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
266 bool Op1Sext = isa<SExtInst>(Args[0]) &&
267 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
268 bool Op2Sext = isa<SExtInst>(Args[1]) &&
269 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
271 bool IsZeroExtended = !Op1Signed || !Op2Signed;
272 bool IsConstant = Op1Constant || Op2Constant;
273 bool IsSext = Op1Sext || Op2Sext;
274 if (IsConstant || IsZeroExtended || IsSext)
282 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
285 if (!SignedMode && OpMinSize <= 8)
289 if (!SignedMode && OpMinSize <= 16)
296 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
349 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
350 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
351 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
352 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
353 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
354 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
355 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
356 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
357 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
361 if (
const auto *Entry =
363 if (
auto KindCost = Entry->Cost[
CostKind])
364 return LT.first * *KindCost;
367 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
368 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
369 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
370 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
371 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
372 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
373 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
374 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
375 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
377 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
378 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
379 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
380 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
381 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
382 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
386 if (
const auto *Entry =
388 if (
auto KindCost = Entry->Cost[
CostKind])
389 return LT.first * *KindCost;
392 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
393 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
394 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
396 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
397 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
398 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
400 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
401 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
402 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
403 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
404 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
405 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
407 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
408 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
409 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
410 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
411 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
412 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
413 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
422 if (
const auto *Entry =
424 if (
auto KindCost = Entry->Cost[
CostKind])
425 return LT.first * *KindCost;
428 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
429 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
430 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
431 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
432 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
433 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
435 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
436 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
437 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
438 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
439 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
440 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
442 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
443 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
444 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
445 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
446 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
447 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
449 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
450 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
451 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
452 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
453 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
454 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
463 if (
const auto *Entry =
465 if (
auto KindCost = Entry->Cost[
CostKind])
466 return LT.first * *KindCost;
469 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
470 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
471 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
472 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
473 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
474 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
476 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
477 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
478 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
479 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
480 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
481 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
483 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
484 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
485 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
486 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
487 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
488 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
490 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
491 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
492 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
493 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
494 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
495 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
505 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
506 if (
const auto *Entry =
508 if (
auto KindCost = Entry->Cost[
CostKind])
509 return LT.first * *KindCost;
512 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
513 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
514 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
516 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
517 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
518 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
520 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
521 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
522 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
524 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
525 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
526 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
536 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
537 if (
const auto *Entry =
539 if (
auto KindCost = Entry->Cost[
CostKind])
540 return LT.first * *KindCost;
555 if (
const auto *Entry =
557 if (
auto KindCost = Entry->Cost[
CostKind])
558 return LT.first * *KindCost;
578 if (
const auto *Entry =
580 if (
auto KindCost = Entry->Cost[
CostKind])
581 return LT.first * *KindCost;
601 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
602 if (
auto KindCost = Entry->Cost[
CostKind])
603 return LT.first * *KindCost;
623 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
624 if (
auto KindCost = Entry->Cost[
CostKind])
625 return LT.first * *KindCost;
633 if (
const auto *Entry =
635 if (
auto KindCost = Entry->Cost[
CostKind])
636 return LT.first * *KindCost;
656 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
657 if (
auto KindCost = Entry->Cost[
CostKind])
658 return LT.first * *KindCost;
661 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
662 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
663 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
664 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
665 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
666 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
667 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
668 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
669 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
671 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
672 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
673 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
677 if (
const auto *Entry =
679 if (
auto KindCost = Entry->Cost[
CostKind])
680 return LT.first * *KindCost;
683 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
684 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
685 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
687 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
688 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
689 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
691 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
692 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
693 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
694 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
695 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
696 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
697 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
701 if (
const auto *Entry =
703 if (
auto KindCost = Entry->Cost[
CostKind])
704 return LT.first * *KindCost;
708 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
709 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
710 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
711 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
712 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
713 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
715 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
716 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
717 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
718 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
719 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
720 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
722 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
723 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
724 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
725 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
726 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
727 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
729 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
730 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
731 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
732 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
733 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
734 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
738 if (
const auto *Entry =
740 if (
auto KindCost = Entry->Cost[
CostKind])
741 return LT.first * *KindCost;
744 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
745 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
746 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
747 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
748 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
749 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
751 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
752 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
753 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
754 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
755 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
756 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
758 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
759 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
760 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
761 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
762 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
763 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
765 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
766 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
767 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
768 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
769 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
770 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
775 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
776 if (
const auto *Entry =
778 if (
auto KindCost = Entry->Cost[
CostKind])
779 return LT.first * *KindCost;
783 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
784 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
785 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
787 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
788 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
789 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
791 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
792 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
793 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
795 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
796 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
797 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
801 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
802 if (
const auto *Entry =
804 if (
auto KindCost = Entry->Cost[
CostKind])
805 return LT.first * *KindCost;
808 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
809 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
810 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
815 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
816 if (
auto KindCost = Entry->Cost[
CostKind])
817 return LT.first * *KindCost;
820 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
821 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
822 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
823 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
824 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
825 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
826 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
827 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
828 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
830 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
831 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
832 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
833 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
834 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
835 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
836 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
837 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
838 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
840 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
841 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
843 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
844 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
845 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
846 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
848 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
849 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
851 {
ISD::MUL, MVT::v64i8, { 5, 10,10,11 } },
852 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
854 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
855 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
856 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
857 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
862 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
863 if (
auto KindCost = Entry->Cost[
CostKind])
864 return LT.first * *KindCost;
867 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
868 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
869 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
871 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
872 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
873 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
875 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
876 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
877 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
878 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
879 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
880 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
881 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
882 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
883 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
885 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
886 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
887 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
888 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
889 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
890 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
891 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
892 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
893 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
895 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
896 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
898 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
899 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
901 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
902 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
903 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
904 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
906 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
907 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
908 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
909 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
911 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
912 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
913 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
914 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
916 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
917 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
918 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
919 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
924 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
925 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
926 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
927 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
928 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
929 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
930 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
931 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
934 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
935 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
936 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
937 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
939 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
940 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
941 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
942 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
943 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
944 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
945 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
946 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
949 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
950 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
951 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
952 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
956 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
957 if (
auto KindCost = Entry->Cost[
CostKind])
958 return LT.first * *KindCost;
963 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
964 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
965 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
966 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
967 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
968 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
969 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
970 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
971 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
972 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
984 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
985 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
992 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
993 if (
auto KindCost = Entry->Cost[
CostKind])
994 return LT.first * *KindCost;
999 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1000 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1001 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1002 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1003 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1004 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1005 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1006 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1007 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1008 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1009 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1010 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1012 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1013 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1014 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1015 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1016 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1017 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1018 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1019 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1020 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1021 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1022 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1023 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1033 if (
const auto *Entry =
1035 if (
auto KindCost = Entry->Cost[
CostKind])
1036 return LT.first * *KindCost;
1043 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1044 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1049 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1050 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1051 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1052 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1055 if (ST->useGLMDivSqrtCosts())
1056 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1057 if (
auto KindCost = Entry->Cost[
CostKind])
1058 return LT.first * *KindCost;
1061 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1062 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1063 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1064 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1065 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1066 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1067 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1068 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1069 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1070 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1071 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1072 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1078 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1080 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1081 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1084 if (ST->useSLMArithCosts())
1085 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1086 if (
auto KindCost = Entry->Cost[
CostKind])
1087 return LT.first * *KindCost;
1090 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1091 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1092 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1093 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1095 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1096 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1097 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1098 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1100 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1101 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1102 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1103 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1104 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1105 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1107 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1108 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1109 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1110 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1111 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1112 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1113 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1114 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1116 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1117 {
ISD::MUL, MVT::v32i8, { 6, 11,10,19 } },
1118 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1119 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1120 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1121 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1122 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1126 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1127 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1129 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1130 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1131 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1132 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1133 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1134 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1136 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1137 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1138 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1139 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1140 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1141 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1143 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1144 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1145 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1146 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1147 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1148 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1150 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1151 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1152 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1153 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1154 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1155 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1160 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1161 if (
auto KindCost = Entry->Cost[
CostKind])
1162 return LT.first * *KindCost;
1168 {
ISD::MUL, MVT::v32i8, { 12, 13, 22, 23 } },
1169 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1170 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1171 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1172 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1174 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1175 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1176 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1177 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1179 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1180 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1181 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1182 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1184 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1185 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1186 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1187 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1189 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1190 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1191 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1192 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1193 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1194 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1195 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1196 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1197 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1198 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1200 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1201 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1202 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1203 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1204 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1205 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1206 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1207 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1209 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1210 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1211 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1212 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1213 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1214 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1215 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1216 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1218 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1219 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1220 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1221 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1222 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1223 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1224 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1225 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1227 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1228 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1230 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1231 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1232 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1233 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1234 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1235 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1237 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1238 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1239 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1240 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1241 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1242 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1244 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1245 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1246 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1247 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1248 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1249 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1251 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1252 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1253 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1254 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1255 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1256 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1260 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1261 if (
auto KindCost = Entry->Cost[
CostKind])
1262 return LT.first * *KindCost;
1265 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1266 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1267 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1268 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1270 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1271 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1272 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1273 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1275 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1276 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1277 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1278 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1280 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1281 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1282 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1283 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1285 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1289 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1290 if (
auto KindCost = Entry->Cost[
CostKind])
1291 return LT.first * *KindCost;
1294 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1295 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1296 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1298 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1299 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1300 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1301 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1303 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1304 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1305 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1306 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1308 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1309 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1313 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1314 if (
auto KindCost = Entry->Cost[
CostKind])
1315 return LT.first * *KindCost;
1320 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1321 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1322 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1323 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1325 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1326 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1327 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1328 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1330 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1331 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1332 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1333 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1335 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1336 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1337 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1338 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1340 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1341 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1342 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1343 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1345 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1346 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1347 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1348 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1350 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1351 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1353 {
ISD::MUL, MVT::v16i8, { 5, 18,12,12 } },
1354 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1355 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1356 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1360 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1361 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1362 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1363 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1365 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1366 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1367 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1368 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1370 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1371 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1372 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1374 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1375 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1376 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1378 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1379 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1383 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1384 if (
auto KindCost = Entry->Cost[
CostKind])
1385 return LT.first * *KindCost;
1388 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1389 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1391 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1392 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1394 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1395 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1397 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1398 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1400 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1401 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1405 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1406 if (
auto KindCost = Entry->Cost[
CostKind])
1407 return LT.first * *KindCost;
1412 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1417 if (
auto KindCost = Entry->Cost[
CostKind])
1418 return LT.first * *KindCost;
1429 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1430 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1431 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1433 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1434 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1435 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1436 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1437 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1441 if (
auto KindCost = Entry->Cost[
CostKind])
1442 return LT.first * *KindCost;
1456 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1505 CostKind, Mask.size() / 2, BaseTp);
1518 using namespace PatternMatch;
1521 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1526 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1527 LT.second = LT.second.changeVectorElementType(MVT::f16);
1532 int NumElts = LT.second.getVectorNumElements();
1533 if ((
Index % NumElts) == 0)
1536 if (SubLT.second.isVector()) {
1537 int NumSubElts = SubLT.second.getVectorNumElements();
1538 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1546 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1547 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1548 (NumSubElts % OrigSubElts) == 0 &&
1549 LT.second.getVectorElementType() ==
1550 SubLT.second.getVectorElementType() &&
1551 LT.second.getVectorElementType().getSizeInBits() ==
1553 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1554 "Unexpected number of elements!");
1556 LT.second.getVectorNumElements());
1558 SubLT.second.getVectorNumElements());
1567 return ExtractCost + 1;
1570 "Unexpected vector size");
1572 return ExtractCost + 2;
1583 int NumElts = LT.second.getVectorNumElements();
1585 if (SubLT.second.isVector()) {
1586 int NumSubElts = SubLT.second.getVectorNumElements();
1587 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1600 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1631 if (
const auto *Entry =
1640 MVT LegalVT = LT.second;
1645 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1649 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1656 if (!Mask.empty() && NumOfDests.
isValid()) {
1674 unsigned E = *NumOfDests.
getValue();
1675 unsigned NormalizedVF =
1681 unsigned PrevSrcReg = 0;
1685 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1686 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1691 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1692 PrevRegMask != RegMask)
1700 if (SrcReg != DestReg &&
1705 PrevSrcReg = SrcReg;
1706 PrevRegMask = RegMask;
1719 std::nullopt,
CostKind, 0,
nullptr);
1730 LT.first = NumOfDests * NumOfShufflesPerDest;
1746 if (
const auto *Entry =
1748 return LT.first * Entry->Cost;
1781 if (
const auto *Entry =
1783 return LT.first * Entry->Cost;
1860 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1861 if (
auto KindCost = Entry->Cost[
CostKind])
1862 return LT.first * *KindCost;
1915 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1916 return LT.first * Entry->Cost;
1937 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1938 return LT.first * Entry->Cost;
2000 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2001 return LT.first * Entry->Cost;
2014 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2015 return LT.first * Entry->Cost;
2046 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2047 return LT.first * Entry->Cost;
2103 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2105 if (
const auto *Entry =
2108 LT.second.getVectorElementCount()) &&
2109 "Table entry missing from isLegalBroadcastLoad()");
2110 return LT.first * Entry->Cost;
2113 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2114 return LT.first * Entry->Cost;
2127 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2128 return LT.first * Entry->Cost;
2139 assert(ISD &&
"Invalid opcode");
2145 return Cost == 0 ? 0 :
N;
2960 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2961 return AdjustCost(Entry->Cost);
2965 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2966 return AdjustCost(Entry->Cost);
2970 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2971 return AdjustCost(Entry->Cost);
2976 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2977 return AdjustCost(Entry->Cost);
2981 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2982 return AdjustCost(Entry->Cost);
2986 SimpleDstTy, SimpleSrcTy))
2987 return AdjustCost(Entry->Cost);
2991 SimpleDstTy, SimpleSrcTy))
2992 return AdjustCost(Entry->Cost);
2997 SimpleDstTy, SimpleSrcTy))
2998 return AdjustCost(Entry->Cost);
3003 SimpleDstTy, SimpleSrcTy))
3004 return AdjustCost(Entry->Cost);
3009 SimpleDstTy, SimpleSrcTy))
3010 return AdjustCost(Entry->Cost);
3025 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3026 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3030 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3031 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3035 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3036 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3041 LTDest.second, LTSrc.second))
3042 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3046 LTDest.second, LTSrc.second))
3047 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3051 LTDest.second, LTSrc.second))
3052 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3056 LTDest.second, LTSrc.second))
3057 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3061 LTDest.second, LTSrc.second))
3062 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3066 LTDest.second, LTSrc.second))
3067 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3071 LTDest.second, LTSrc.second))
3072 return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first));
3077 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3078 Type *ExtSrc = Src->getWithNewBitWidth(32);
3084 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3094 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3095 Type *TruncDst = Dst->getWithNewBitWidth(32);
3118 MVT MTy = LT.second;
3121 assert(ISD &&
"Invalid opcode");
3124 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3137 Pred = cast<CmpInst>(
I)->getPredicate();
3139 bool CmpWithConstant =
false;
3140 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3141 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3146 ExtraCost = CmpWithConstant ? 0 : 1;
3151 ExtraCost = CmpWithConstant ? 0 : 1;
3157 ExtraCost = CmpWithConstant ? 1 : 2;
3168 ExtraCost = CmpWithConstant ? 2 : 3;
3175 if (CondTy && !ST->
hasAVX())
3344 if (ST->useSLMArithCosts())
3346 if (
auto KindCost = Entry->Cost[
CostKind])
3347 return LT.first * (ExtraCost + *KindCost);
3351 if (
auto KindCost = Entry->Cost[
CostKind])
3352 return LT.first * (ExtraCost + *KindCost);
3356 if (
auto KindCost = Entry->Cost[
CostKind])
3357 return LT.first * (ExtraCost + *KindCost);
3361 if (
auto KindCost = Entry->Cost[
CostKind])
3362 return LT.first * (ExtraCost + *KindCost);
3366 if (
auto KindCost = Entry->Cost[
CostKind])
3367 return LT.first * (ExtraCost + *KindCost);
3371 if (
auto KindCost = Entry->Cost[
CostKind])
3372 return LT.first * (ExtraCost + *KindCost);
3376 if (
auto KindCost = Entry->Cost[
CostKind])
3377 return LT.first * (ExtraCost + *KindCost);
3381 if (
auto KindCost = Entry->Cost[
CostKind])
3382 return LT.first * (ExtraCost + *KindCost);
3386 if (
auto KindCost = Entry->Cost[
CostKind])
3387 return LT.first * (ExtraCost + *KindCost);
3391 if (
auto KindCost = Entry->Cost[
CostKind])
3392 return LT.first * (ExtraCost + *KindCost);
3417 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3418 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3419 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3420 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3421 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3422 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3423 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3424 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3425 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3426 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3427 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3428 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3429 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3430 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3431 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3453 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3454 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3455 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3456 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3457 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3458 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3459 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3460 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3461 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3462 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3463 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3464 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3466 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3467 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3468 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3469 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3470 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3471 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3474 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3475 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3497 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3498 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3499 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3500 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3501 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3502 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3503 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3504 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3505 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3506 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3507 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3508 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3509 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3513 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3514 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3515 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3516 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3517 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3518 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3519 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3520 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3521 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3522 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3523 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3524 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3525 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3526 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3527 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3528 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3529 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3530 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3539 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3540 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3541 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3542 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3547 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3548 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3549 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3550 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3555 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3556 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3557 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3558 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3559 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3560 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3561 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3562 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3563 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3571 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3572 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3573 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3574 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3575 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3576 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3577 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3578 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3579 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3580 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3581 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3582 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3583 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3584 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3585 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3586 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3587 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3588 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3589 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3590 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3591 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3592 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3593 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3594 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3601 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3602 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3603 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3604 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3605 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3606 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3607 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3608 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3609 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3610 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3611 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3612 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3613 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3614 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3615 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3616 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3617 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3618 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3619 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3620 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3621 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3622 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3623 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3624 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3652 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3655 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3656 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3672 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3673 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3674 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3675 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3676 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3677 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3678 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3679 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3680 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3681 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3682 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3683 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3684 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3685 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3686 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3687 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3698 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3699 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3700 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3701 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3702 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3703 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3704 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3705 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3720 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3721 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3722 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3723 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3724 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3725 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3726 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3727 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3728 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3729 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3730 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3731 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3732 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3733 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3736 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3737 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3738 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3739 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3740 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3741 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3742 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3743 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3746 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3747 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3748 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3749 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3750 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3751 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3752 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3753 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3754 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3755 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3761 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3762 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3763 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3764 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3765 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3766 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3767 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3768 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3769 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3770 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3782 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3784 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3785 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3788 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3789 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3790 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3791 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3804 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3806 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3807 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3808 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3809 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3810 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3811 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3812 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3813 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3814 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3815 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3816 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3817 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3818 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3819 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3820 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3821 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3822 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3823 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3824 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3825 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3826 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3827 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3828 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3829 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3832 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3833 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3834 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3835 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3836 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3837 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3838 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3839 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3840 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3841 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3847 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3848 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3849 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3850 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3851 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3852 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3853 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3854 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3855 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3856 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3867 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
3868 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
3870 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
3871 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
3896 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
3898 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
3905 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
3907 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
3917 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
3920 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
3921 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
3922 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3923 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3924 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3925 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3926 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3927 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
3928 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3929 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3930 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
3931 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3932 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3935 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
3936 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
3937 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
3945 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
3946 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
3947 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
3948 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
3949 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
3950 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
3951 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
3952 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
3953 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
3954 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
3955 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
3956 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
3959 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
3960 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
3961 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
3962 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
3967 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
3970 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
3971 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
3972 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
3973 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
3974 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
3975 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
3976 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
3977 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
3978 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
3979 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
3980 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
3981 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
3984 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3985 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
3986 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3987 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
3988 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
3989 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
3990 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3991 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
3996 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
3997 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
3998 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
3999 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4000 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4001 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4002 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4003 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4009 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4015 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4042 {
ISD::ABS, MVT::i64, { 1, 2, 3, 4 } },
4050 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4051 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4053 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4054 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4055 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4056 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4057 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4063 {
ISD::ABS, MVT::i32, { 1, 2, 3, 4 } },
4064 {
ISD::ABS, MVT::i16, { 2, 2, 3, 4 } },
4065 {
ISD::ABS, MVT::i8, { 2, 4, 4, 4 } },
4086 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4087 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4089 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4090 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4095 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4096 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4098 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4099 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4101 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4102 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4104 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4105 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4107 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4108 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4128 case Intrinsic::abs:
4131 case Intrinsic::bitreverse:
4134 case Intrinsic::bswap:
4137 case Intrinsic::ctlz:
4140 case Intrinsic::ctpop:
4143 case Intrinsic::cttz:
4146 case Intrinsic::fshl:
4150 if (Args[0] == Args[1]) {
4161 case Intrinsic::fshr:
4166 if (Args[0] == Args[1]) {
4177 case Intrinsic::lrint:
4178 case Intrinsic::llrint:
4187 case Intrinsic::maxnum:
4188 case Intrinsic::minnum:
4192 case Intrinsic::sadd_sat:
4195 case Intrinsic::smax:
4198 case Intrinsic::smin:
4201 case Intrinsic::ssub_sat:
4204 case Intrinsic::uadd_sat:
4207 case Intrinsic::umax:
4210 case Intrinsic::umin:
4213 case Intrinsic::usub_sat:
4216 case Intrinsic::sqrt:
4219 case Intrinsic::sadd_with_overflow:
4220 case Intrinsic::ssub_with_overflow:
4223 OpTy =
RetTy->getContainedType(0);
4225 case Intrinsic::uadd_with_overflow:
4226 case Intrinsic::usub_with_overflow:
4229 OpTy =
RetTy->getContainedType(0);
4231 case Intrinsic::umul_with_overflow:
4232 case Intrinsic::smul_with_overflow:
4235 OpTy =
RetTy->getContainedType(0);
4242 MVT MTy = LT.second;
4245 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4246 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4249 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4250 if (Cst->isAllOnesValue())
4258 auto adjustTableCost = [](
int ISD,
unsigned Cost,
4266 return LegalizationCost * 1;
4268 return LegalizationCost * (int)
Cost;
4271 if (ST->useGLMDivSqrtCosts())
4273 if (
auto KindCost = Entry->Cost[
CostKind])
4274 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4277 if (ST->useSLMArithCosts())
4279 if (
auto KindCost = Entry->Cost[
CostKind])
4280 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4284 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4285 if (
auto KindCost = Entry->Cost[
CostKind])
4286 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4289 if (ST->hasBITALG())
4290 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4291 if (
auto KindCost = Entry->Cost[
CostKind])
4292 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4295 if (ST->hasVPOPCNTDQ())
4296 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4297 if (
auto KindCost = Entry->Cost[
CostKind])
4298 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4303 if (
auto KindCost = Entry->Cost[
CostKind])
4304 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4309 if (
auto KindCost = Entry->Cost[
CostKind])
4310 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4315 if (
auto KindCost = Entry->Cost[
CostKind])
4316 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4321 if (
auto KindCost = Entry->Cost[
CostKind])
4322 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4327 if (
auto KindCost = Entry->Cost[
CostKind])
4328 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4333 if (
auto KindCost = Entry->Cost[
CostKind])
4334 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4339 if (
auto KindCost = Entry->Cost[
CostKind])
4340 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4345 if (
auto KindCost = Entry->Cost[
CostKind])
4346 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4351 if (
auto KindCost = Entry->Cost[
CostKind])
4352 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4357 if (
auto KindCost = Entry->Cost[
CostKind])
4358 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4363 if (
auto KindCost = Entry->Cost[
CostKind])
4364 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4369 if (
auto KindCost = Entry->Cost[
CostKind])
4370 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4376 if (
auto KindCost = Entry->Cost[
CostKind])
4377 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4381 if (
auto KindCost = Entry->Cost[
CostKind])
4382 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4386 if (ST->hasLZCNT()) {
4389 if (
auto KindCost = Entry->Cost[
CostKind])
4390 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4394 if (
auto KindCost = Entry->Cost[
CostKind])
4395 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4399 if (ST->hasPOPCNT()) {
4402 if (
auto KindCost = Entry->Cost[
CostKind])
4403 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4407 if (
auto KindCost = Entry->Cost[
CostKind])
4408 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4412 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4414 if (II->hasOneUse() && isa<StoreInst>(II->user_back()))
4416 if (
auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) {
4417 if (LI->hasOneUse())
4425 if (
auto KindCost = Entry->Cost[
CostKind])
4426 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4430 if (
auto KindCost = Entry->Cost[
CostKind])
4431 return adjustTableCost(Entry->ISD, *KindCost, LT.first, ICA.
getFlags());
4454 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4455 Opcode == Instruction::InsertElement)) {
4460 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4465 if (Opcode == Instruction::ExtractElement) {
4471 if (Opcode == Instruction::InsertElement) {
4479 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4480 Opcode == Instruction::InsertElement)) {
4482 if (Opcode == Instruction::ExtractElement &&
4484 cast<FixedVectorType>(Val)->getNumElements() > 1)
4491 if (!LT.second.isVector())
4495 unsigned SizeInBits = LT.second.getSizeInBits();
4496 unsigned NumElts = LT.second.getVectorNumElements();
4497 unsigned SubNumElts = NumElts;
4502 if (SizeInBits > 128) {
4503 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4504 unsigned NumSubVecs = SizeInBits / 128;
4505 SubNumElts = NumElts / NumSubVecs;
4506 if (SubNumElts <=
Index) {
4507 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4508 Index %= SubNumElts;
4512 MVT MScalarTy = LT.second.getScalarType();
4513 auto IsCheapPInsrPExtrInsertPS = [&]() {
4516 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4518 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4519 Opcode == Instruction::InsertElement);
4527 (Opcode != Instruction::InsertElement || !Op0 ||
4528 isa<UndefValue>(Op0)))
4529 return RegisterFileMoveCost;
4531 if (Opcode == Instruction::InsertElement &&
4532 isa_and_nonnull<UndefValue>(Op0)) {
4534 if (isa_and_nonnull<LoadInst>(Op1))
4535 return RegisterFileMoveCost;
4536 if (!IsCheapPInsrPExtrInsertPS()) {
4539 return 2 + RegisterFileMoveCost;
4541 return 1 + RegisterFileMoveCost;
4546 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4547 return 1 + RegisterFileMoveCost;
4551 assert(ISD &&
"Unexpected vector opcode");
4552 if (ST->useSLMArithCosts())
4554 return Entry->Cost + RegisterFileMoveCost;
4557 if (IsCheapPInsrPExtrInsertPS())
4558 return 1 + RegisterFileMoveCost;
4567 if (Opcode == Instruction::InsertElement) {
4568 auto *SubTy = cast<VectorType>(Val);
4576 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4580 RegisterFileMoveCost;
4585 bool Insert,
bool Extract,
4588 cast<FixedVectorType>(Ty)->getNumElements() &&
4589 "Vector size mismatch");
4592 MVT MScalarTy = LT.second.getScalarType();
4593 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4596 constexpr unsigned LaneBitWidth = 128;
4597 assert((LegalVectorBitWidth < LaneBitWidth ||
4598 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4601 const int NumLegalVectors = *LT.first.getValue();
4602 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4607 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4609 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4612 if (LegalVectorBitWidth <= LaneBitWidth) {
4628 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4629 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4630 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4631 unsigned NumLegalElts =
4632 LT.second.getVectorNumElements() * NumLegalVectors;
4634 "Vector has been legalized to smaller element count");
4635 assert((NumLegalElts % NumLanesTotal) == 0 &&
4636 "Unexpected elts per lane");
4637 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4639 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4643 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4645 NumEltsPerLane, NumEltsPerLane *
I);
4646 if (LaneEltMask.
isZero())
4657 APInt AffectedLanes =
4660 AffectedLanes, NumLegalVectors,
true);
4661 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4662 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4663 unsigned I = NumLegalLanes * LegalVec + Lane;
4666 if (!AffectedLanes[
I] ||
4667 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4674 }
else if (LT.second.isVector()) {
4685 unsigned NumElts = LT.second.getVectorNumElements();
4687 PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
4688 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4697 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4698 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4699 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4703 if (LT.second.isVector()) {
4704 unsigned NumLegalElts =
4705 LT.second.getVectorNumElements() * NumLegalVectors;
4707 "Vector has been legalized to smaller element count");
4711 if (LegalVectorBitWidth > LaneBitWidth) {
4712 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4713 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4714 assert((NumLegalElts % NumLanesTotal) == 0 &&
4715 "Unexpected elts per lane");
4716 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4720 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4724 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4726 NumEltsPerLane,
I * NumEltsPerLane);
4727 if (LaneEltMask.
isZero())
4732 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4749 int VF,
const APInt &DemandedDstElts,
4755 auto bailout = [&]() {
4765 unsigned PromEltTyBits = EltTyBits;
4766 switch (EltTyBits) {
4797 int NumDstElements = VF * ReplicationFactor;
4811 if (PromEltTyBits != EltTyBits) {
4817 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4824 ReplicationFactor, VF,
4830 "We expect that the legalization doesn't affect the element width, "
4831 "doesn't coalesce/split elements.");
4834 unsigned NumDstVectors =
4835 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4844 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
4845 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
4850 return NumDstVectorsDemanded * SingleShuffleCost;
4861 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
4864 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
4865 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
4872 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4882 auto *VTy = dyn_cast<FixedVectorType>(Src);
4887 if (Opcode == Instruction::Store && OpInfo.
isConstant())
4893 if (!VTy || !LT.second.isVector()) {
4895 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
4898 bool IsLoad = Opcode == Instruction::Load;
4900 Type *EltTy = VTy->getElementType();
4905 const unsigned SrcNumElt = VTy->getNumElements();
4908 int NumEltRemaining = SrcNumElt;
4910 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
4912 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
4915 const unsigned XMMBits = 128;
4916 if (XMMBits % EltTyBits != 0)
4920 const int NumEltPerXMM = XMMBits / EltTyBits;
4924 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4925 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4927 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4931 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4933 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
4934 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4935 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4936 "Unless we haven't halved the op size yet, "
4937 "we have less than two op's sized units of work left.");
4939 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4943 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4944 "After halving sizes, the vector elt count is no longer a multiple "
4945 "of number of elements per operation?");
4946 auto *CoalescedVecTy =
4947 CurrNumEltPerOp == 1
4951 EltTyBits * CurrNumEltPerOp),
4952 CurrVecTy->getNumElements() / CurrNumEltPerOp);
4955 "coalesciing elements doesn't change vector width.");
4957 while (NumEltRemaining > 0) {
4958 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
4962 if (NumEltRemaining < CurrNumEltPerOp &&
4963 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
4964 CurrOpSizeBytes != 1)
4967 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
4970 if (SubVecEltsLeft == 0) {
4971 SubVecEltsLeft += CurrVecTy->getNumElements();
4976 VTy, std::nullopt,
CostKind, NumEltDone(),
4984 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
4985 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
4986 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
4987 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
4988 APInt DemandedElts =
4990 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
4991 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5001 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5003 else if (CurrOpSizeBytes < 4)
5008 SubVecEltsLeft -= CurrNumEltPerOp;
5009 NumEltRemaining -= CurrNumEltPerOp;
5014 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5023 bool IsLoad = (Instruction::Load == Opcode);
5024 bool IsStore = (Instruction::Store == Opcode);
5026 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5031 unsigned NumElem = SrcVTy->getNumElements();
5039 MaskTy, DemandedElts,
false,
true,
CostKind);
5044 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5046 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5050 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5057 if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
5058 LT.second.getVectorNumElements() == NumElem)
5065 else if (LT.first * LT.second.getVectorNumElements() > NumElem) {
5067 LT.second.getVectorNumElements());
5075 return Cost + LT.first * (IsLoad ? 2 : 8);
5078 return Cost + LT.first;
5086 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5090 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5092 return getGEPCost(BaseGEP->getSourceElementType(),
5093 BaseGEP->getPointerOperand(), Indices,
nullptr,
5108 const unsigned NumVectorInstToHideOverhead = 10;
5121 return NumVectorInstToHideOverhead;
5131 std::optional<FastMathFlags> FMF,
5172 assert(ISD &&
"Invalid opcode");
5180 if (ST->useSLMArithCosts())
5195 MVT MTy = LT.second;
5197 auto *ValVTy = cast<FixedVectorType>(ValTy);
5210 if (LT.first != 1 && MTy.
isVector() &&
5216 ArithmeticCost *= LT.first - 1;
5219 if (ST->useSLMArithCosts())
5221 return ArithmeticCost + Entry->Cost;
5225 return ArithmeticCost + Entry->Cost;
5229 return ArithmeticCost + Entry->Cost;
5278 if (ValVTy->getElementType()->isIntegerTy(1)) {
5280 if (LT.first != 1 && MTy.
isVector() &&
5286 ArithmeticCost *= LT.first - 1;
5290 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5291 return ArithmeticCost + Entry->Cost;
5294 return ArithmeticCost + Entry->Cost;
5297 return ArithmeticCost + Entry->Cost;
5300 return ArithmeticCost + Entry->Cost;
5305 unsigned NumVecElts = ValVTy->getNumElements();
5306 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5316 if (LT.first != 1 && MTy.
isVector() &&
5322 ReductionCost *= LT.first - 1;
5328 while (NumVecElts > 1) {
5330 unsigned Size = NumVecElts * ScalarSize;
5339 }
else if (
Size == 128) {
5342 if (ValVTy->isFloatingPointTy())
5349 std::nullopt,
CostKind, 0,
nullptr);
5350 }
else if (
Size == 64) {
5353 if (ValVTy->isFloatingPointTy())
5360 std::nullopt,
CostKind, 0,
nullptr);
5366 Instruction::LShr, ShiftTy,
CostKind,
5393 MVT MTy = LT.second;
5397 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5401 "Expected float point or integer vector type.");
5402 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5470 auto *ValVTy = cast<FixedVectorType>(ValTy);
5471 unsigned NumVecElts = ValVTy->getNumElements();
5475 if (LT.first != 1 && MTy.
isVector() &&
5481 MinMaxCost *= LT.first - 1;
5487 return MinMaxCost + Entry->Cost;
5491 return MinMaxCost + Entry->Cost;
5495 return MinMaxCost + Entry->Cost;
5499 return MinMaxCost + Entry->Cost;
5511 while (NumVecElts > 1) {
5513 unsigned Size = NumVecElts * ScalarSize;
5521 }
else if (
Size == 128) {
5530 std::nullopt,
CostKind, 0,
nullptr);
5531 }
else if (
Size == 64) {
5539 std::nullopt,
CostKind, 0,
nullptr);
5592 if (BitSize % 64 != 0)
5593 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5598 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5604 return std::max<InstructionCost>(1,
Cost);
5619 unsigned ImmIdx = ~0U;
5623 case Instruction::GetElementPtr:
5630 case Instruction::Store:
5633 case Instruction::ICmp:
5639 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5640 uint64_t ImmVal = Imm.getZExtValue();
5641 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5646 case Instruction::And:
5650 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5654 case Instruction::Add:
5655 case Instruction::Sub:
5657 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5661 case Instruction::UDiv:
5662 case Instruction::SDiv:
5663 case Instruction::URem:
5664 case Instruction::SRem:
5669 case Instruction::Mul:
5670 case Instruction::Or:
5671 case Instruction::Xor:
5675 case Instruction::Shl:
5676 case Instruction::LShr:
5677 case Instruction::AShr:
5681 case Instruction::Trunc:
5682 case Instruction::ZExt:
5683 case Instruction::SExt:
5684 case Instruction::IntToPtr:
5685 case Instruction::PtrToInt:
5686 case Instruction::BitCast:
5687 case Instruction::PHI:
5688 case Instruction::Call:
5689 case Instruction::Select:
5690 case Instruction::Ret:
5691 case Instruction::Load:
5695 if (
Idx == ImmIdx) {
5720 case Intrinsic::sadd_with_overflow:
5721 case Intrinsic::uadd_with_overflow:
5722 case Intrinsic::ssub_with_overflow:
5723 case Intrinsic::usub_with_overflow:
5724 case Intrinsic::smul_with_overflow:
5725 case Intrinsic::umul_with_overflow:
5726 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5729 case Intrinsic::experimental_stackmap:
5730 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5733 case Intrinsic::experimental_patchpoint_void:
5734 case Intrinsic::experimental_patchpoint:
5735 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5746 return Opcode == Instruction::PHI ? 0 : 1;
5751int X86TTIImpl::getGatherOverhead()
const {
5764int X86TTIImpl::getScatterOverhead()
const {
5779 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5780 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5790 if (IndexSize < 64 || !
GEP)
5793 unsigned NumOfVarIndices = 0;
5794 const Value *Ptrs =
GEP->getPointerOperand();
5797 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5798 if (isa<Constant>(
GEP->getOperand(
I)))
5800 Type *IndxTy =
GEP->getOperand(
I)->getType();
5801 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5802 IndxTy = IndexVTy->getElementType();
5804 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5805 ++NumOfVarIndices > 1)
5808 return (
unsigned)32;
5813 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5814 ? getIndexSizeInBits(
Ptr,
DL)
5822 *std::max(IdxsLT.first, SrcLT.first).getValue();
5823 if (SplitFactor > 1) {
5827 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
5833 const int GSOverhead = (Opcode == Instruction::Load)
5834 ? getGatherOverhead()
5835 : getScatterOverhead();
5851 Type *SrcVTy,
bool VariableMask,
5855 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5863 MaskTy, DemandedElts,
false,
true,
CostKind);
5868 MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
5873 DemandedElts,
false,
true,
CostKind);
5883 cast<FixedVectorType>(SrcVTy), DemandedElts,
5884 Opcode == Instruction::Load,
5885 Opcode == Instruction::Store,
CostKind);
5887 return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
5892 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
5896 if ((Opcode == Instruction::Load &&
5899 Align(Alignment))) ||
5900 (Opcode == Instruction::Store &&
5911 if (!PtrTy &&
Ptr->getType()->isVectorTy())
5912 PtrTy = dyn_cast<PointerType>(
5913 cast<VectorType>(
Ptr->getType())->getElementType());
5914 assert(PtrTy &&
"Unexpected type for Ptr argument");
5917 if ((Opcode == Instruction::Load &&
5920 Align(Alignment)))) ||
5921 (Opcode == Instruction::Store &&
5924 Align(Alignment)))))
5925 return getGSScalarCost(Opcode,
CostKind, SrcVTy, VariableMask, Alignment,
5928 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
5944 return ST->hasMacroFusion() || ST->hasBranchFusion();
5952 if (isa<VectorType>(DataTy) &&
5953 cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5963 if (ScalarTy->
isHalfTy() && ST->hasBWI())
5973 return IntWidth == 32 || IntWidth == 64 ||
5974 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
5986 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
6003 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6025 if (!isa<VectorType>(DataTy))
6032 if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
6035 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6044 return IntWidth == 32 || IntWidth == 64 ||
6045 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6052bool X86TTIImpl::supportsGather()
const {
6066 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6067 return NumElts == 1 ||
6068 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6083 return IntWidth == 32 || IntWidth == 64;
6087 if (!supportsGather() || !ST->preferGather())
6102 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6103 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6108 for (
int Lane : seq<int>(0, NumElements)) {
6109 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6111 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6113 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6117 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6119 return ST->
hasSSE3() && NumElements % 4 == 0;
6121 return ST->
hasSSE3() && NumElements % 2 == 0;
6127 if (!ST->
hasAVX512() || !ST->preferScatter())
6140 if (
I->getOpcode() == Instruction::FDiv)
6156 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6158 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6161 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6162 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6163 if (RealCallerBits == RealCalleeBits)
6168 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6172 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6174 if (CB->isInlineAsm())
6178 for (
Value *Arg : CB->args())
6179 Types.push_back(Arg->getType());
6180 if (!CB->getType()->isVoidTy())
6181 Types.push_back(CB->getType());
6184 auto IsSimpleTy = [](
Type *Ty) {
6185 return !Ty->isVectorTy() && !Ty->isAggregateType();
6187 if (
all_of(Types, IsSimpleTy))
6190 if (
Function *NestedCallee = CB->getCalledFunction()) {
6192 if (NestedCallee->isIntrinsic())
6227 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6236 Options.AllowOverlappingLoads =
true;
6241 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6242 Options.LoadSizes.push_back(64);
6243 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6244 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6246 if (ST->is64Bit()) {
6247 Options.LoadSizes.push_back(8);
6249 Options.LoadSizes.push_back(4);
6250 Options.LoadSizes.push_back(2);
6251 Options.LoadSizes.push_back(1);
6256 return supportsGather();
6267 return !(ST->isAtom());
6287 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6293 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6305 if (UseMaskedMemOp) {
6307 for (
unsigned Index : Indices) {
6308 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6309 for (
unsigned Elm = 0; Elm < VF; Elm++)
6310 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6317 UseMaskForGaps ? DemandedLoadStoreElts
6326 if (UseMaskForGaps) {
6332 if (Opcode == Instruction::Load) {
6339 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6340 {3, MVT::v16i8, 12},
6341 {3, MVT::v32i8, 14},
6342 {3, MVT::v64i8, 22},
6345 if (
const auto *Entry =
6347 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6357 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6359 unsigned NumOfLoadsInInterleaveGrp =
6360 Indices.
size() ? Indices.
size() : Factor;
6369 unsigned NumOfUnfoldedLoads =
6370 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6373 unsigned NumOfShufflesPerResult =
6374 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6381 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6384 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6391 assert(Opcode == Instruction::Store &&
6392 "Expected Store Instruction at this point");
6394 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6395 {3, MVT::v16i8, 12},
6396 {3, MVT::v32i8, 14},
6397 {3, MVT::v64i8, 26},
6400 {4, MVT::v16i8, 11},
6401 {4, MVT::v32i8, 14},
6405 if (
const auto *Entry =
6407 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6412 unsigned NumOfSources = Factor;
6415 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6419 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6422 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6430 bool UseMaskForCond,
bool UseMaskForGaps) {
6431 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6433 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6434 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6439 return ST->hasBWI();
6441 return ST->hasBF16();
6444 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6446 Opcode, VecTy, Factor, Indices, Alignment,
6449 if (UseMaskForCond || UseMaskForGaps)
6452 UseMaskForCond, UseMaskForGaps);
6472 unsigned VF = VecTy->getNumElements() / Factor;
6473 Type *ScalarTy = VecTy->getElementType();
6505 {2, MVT::v16i16, 9},
6506 {2, MVT::v32i16, 18},
6509 {2, MVT::v16i32, 8},
6510 {2, MVT::v32i32, 16},
6514 {2, MVT::v16i64, 16},
6515 {2, MVT::v32i64, 32},
6520 {3, MVT::v16i8, 11},
6521 {3, MVT::v32i8, 14},
6526 {3, MVT::v16i16, 28},
6527 {3, MVT::v32i16, 56},
6532 {3, MVT::v16i32, 14},
6533 {3, MVT::v32i32, 32},
6537 {3, MVT::v8i64, 10},
6538 {3, MVT::v16i64, 20},
6543 {4, MVT::v16i8, 24},
6544 {4, MVT::v32i8, 56},
6547 {4, MVT::v4i16, 17},
6548 {4, MVT::v8i16, 33},
6549 {4, MVT::v16i16, 75},
6550 {4, MVT::v32i16, 150},
6554 {4, MVT::v8i32, 16},
6555 {4, MVT::v16i32, 32},
6556 {4, MVT::v32i32, 68},
6560 {4, MVT::v8i64, 20},
6561 {4, MVT::v16i64, 40},
6566 {6, MVT::v16i8, 43},
6567 {6, MVT::v32i8, 82},
6569 {6, MVT::v2i16, 13},
6571 {6, MVT::v8i16, 39},
6572 {6, MVT::v16i16, 106},
6573 {6, MVT::v32i16, 212},
6576 {6, MVT::v4i32, 15},
6577 {6, MVT::v8i32, 31},
6578 {6, MVT::v16i32, 64},
6581 {6, MVT::v4i64, 18},
6582 {6, MVT::v8i64, 36},
6587 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6601 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6606 {2, MVT::v16i16, 4},
6607 {2, MVT::v32i16, 8},
6611 {2, MVT::v16i32, 8},
6612 {2, MVT::v32i32, 16},
6617 {2, MVT::v16i64, 16},
6618 {2, MVT::v32i64, 32},
6623 {3, MVT::v16i8, 11},
6624 {3, MVT::v32i8, 13},
6628 {3, MVT::v8i16, 12},
6629 {3, MVT::v16i16, 27},
6630 {3, MVT::v32i16, 54},
6634 {3, MVT::v8i32, 11},
6635 {3, MVT::v16i32, 22},
6636 {3, MVT::v32i32, 48},
6640 {3, MVT::v8i64, 12},
6641 {3, MVT::v16i64, 24},
6647 {4, MVT::v32i8, 12},
6651 {4, MVT::v8i16, 10},
6652 {4, MVT::v16i16, 32},
6653 {4, MVT::v32i16, 64},
6657 {4, MVT::v8i32, 16},
6658 {4, MVT::v16i32, 32},
6659 {4, MVT::v32i32, 64},
6663 {4, MVT::v8i64, 20},
6664 {4, MVT::v16i64, 40},
6669 {6, MVT::v16i8, 27},
6670 {6, MVT::v32i8, 90},
6672 {6, MVT::v2i16, 10},
6673 {6, MVT::v4i16, 15},
6674 {6, MVT::v8i16, 21},
6675 {6, MVT::v16i16, 58},
6676 {6, MVT::v32i16, 90},
6679 {6, MVT::v4i32, 12},
6680 {6, MVT::v8i32, 33},
6681 {6, MVT::v16i32, 66},
6684 {6, MVT::v4i64, 15},
6685 {6, MVT::v8i64, 30},
6688 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6699 if (Opcode == Instruction::Load) {
6700 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6704 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6708 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6710 return GetDiscountedCost(Entry);
6713 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6715 return GetDiscountedCost(Entry);
6718 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6720 return GetDiscountedCost(Entry);
6722 assert(Opcode == Instruction::Store &&
6723 "Expected Store Instruction at this point");
6725 "Interleaved store only supports fully-interleaved groups.");
6727 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6729 return MemOpCosts + Entry->Cost;
6732 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6734 return MemOpCosts + Entry->Cost;
6739 UseMaskForCond, UseMaskForGaps);
6744 bool HasBaseReg, int64_t Scale,
6745 unsigned AddrSpace)
const {
6772 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.