63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
165 bool Vector = (ClassID == 1);
172 if (!
Vector && ST->hasEGPR())
207 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
209 if (ST->
hasAVX() && PreferVectorWidth >= 256)
211 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
252 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
269 assert(ISD &&
"Invalid opcode");
271 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
272 (LT.second.getScalarType() == MVT::i32 ||
273 LT.second.getScalarType() == MVT::i64)) {
275 bool Op1Signed =
false, Op2Signed =
false;
278 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
279 bool SignedMode = Op1Signed || Op2Signed;
284 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
285 LT.second.getScalarType() == MVT::i32) {
287 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
289 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
290 bool Op1Sext = isa<SExtInst>(Args[0]) &&
291 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
292 bool Op2Sext = isa<SExtInst>(Args[1]) &&
293 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
295 bool IsZeroExtended = !Op1Signed || !Op2Signed;
296 bool IsConstant = Op1Constant || Op2Constant;
297 bool IsSext = Op1Sext || Op2Sext;
298 if (IsConstant || IsZeroExtended || IsSext)
306 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
309 if (!SignedMode && OpMinSize <= 8)
313 if (!SignedMode && OpMinSize <= 16)
320 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
373 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
374 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
375 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
376 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
377 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
378 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
379 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
380 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
381 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
385 if (
const auto *Entry =
387 if (
auto KindCost = Entry->Cost[
CostKind])
388 return LT.first * *KindCost;
391 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
392 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
393 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
394 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
395 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
396 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
397 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
398 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
399 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
401 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
402 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
403 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
404 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
405 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
406 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
410 if (
const auto *Entry =
412 if (
auto KindCost = Entry->Cost[
CostKind])
413 return LT.first * *KindCost;
416 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
417 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
418 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
420 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
421 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
422 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
424 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
425 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
426 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
427 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
428 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
429 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
431 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
432 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
433 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
434 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
435 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
436 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
437 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
446 if (
const auto *Entry =
448 if (
auto KindCost = Entry->Cost[
CostKind])
449 return LT.first * *KindCost;
452 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
453 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
454 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
455 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
456 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
457 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
459 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
460 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
461 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
462 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
463 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
464 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
466 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
467 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
468 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
469 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
470 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
471 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
473 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
474 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
475 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
476 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
477 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
478 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
487 if (
const auto *Entry =
489 if (
auto KindCost = Entry->Cost[
CostKind])
490 return LT.first * *KindCost;
493 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
494 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
495 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
496 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
497 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
498 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
500 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
501 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
502 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
503 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
504 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
505 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
507 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
508 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
509 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
510 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
511 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
512 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
514 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
515 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
516 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
517 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
518 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
519 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
529 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
530 if (
const auto *Entry =
532 if (
auto KindCost = Entry->Cost[
CostKind])
533 return LT.first * *KindCost;
536 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
537 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
538 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
540 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
541 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
542 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
544 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
545 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
546 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
548 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
549 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
550 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
560 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
561 if (
const auto *Entry =
563 if (
auto KindCost = Entry->Cost[
CostKind])
564 return LT.first * *KindCost;
579 if (
const auto *Entry =
581 if (
auto KindCost = Entry->Cost[
CostKind])
582 return LT.first * *KindCost;
602 if (
const auto *Entry =
604 if (
auto KindCost = Entry->Cost[
CostKind])
605 return LT.first * *KindCost;
625 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
626 if (
auto KindCost = Entry->Cost[
CostKind])
627 return LT.first * *KindCost;
647 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
648 if (
auto KindCost = Entry->Cost[
CostKind])
649 return LT.first * *KindCost;
657 if (
const auto *Entry =
659 if (
auto KindCost = Entry->Cost[
CostKind])
660 return LT.first * *KindCost;
680 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
681 if (
auto KindCost = Entry->Cost[
CostKind])
682 return LT.first * *KindCost;
685 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
686 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
687 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
688 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
689 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
690 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
691 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
692 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
693 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
695 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
696 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
697 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
701 if (
const auto *Entry =
703 if (
auto KindCost = Entry->Cost[
CostKind])
704 return LT.first * *KindCost;
707 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
708 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
709 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
711 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
712 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
713 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
715 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
716 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
717 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
718 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
719 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
720 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
721 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
725 if (
const auto *Entry =
727 if (
auto KindCost = Entry->Cost[
CostKind])
728 return LT.first * *KindCost;
732 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
733 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
734 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
735 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
736 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
737 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
739 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
740 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
741 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
742 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
743 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
744 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
746 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
747 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
748 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
749 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
750 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
751 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
753 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
754 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
755 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
756 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
757 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
758 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
762 if (
const auto *Entry =
764 if (
auto KindCost = Entry->Cost[
CostKind])
765 return LT.first * *KindCost;
768 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
769 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
770 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
771 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
772 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
773 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
775 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
776 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
777 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
778 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
779 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
780 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
782 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
783 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
784 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
785 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
786 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
787 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
789 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
790 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
791 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
792 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
793 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
794 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
799 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
800 if (
const auto *Entry =
802 if (
auto KindCost = Entry->Cost[
CostKind])
803 return LT.first * *KindCost;
807 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
808 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
809 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
811 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
812 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
813 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
815 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
816 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
817 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
819 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
820 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
821 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
825 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
826 if (
const auto *Entry =
828 if (
auto KindCost = Entry->Cost[
CostKind])
829 return LT.first * *KindCost;
832 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
833 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
834 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
839 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
840 if (
auto KindCost = Entry->Cost[
CostKind])
841 return LT.first * *KindCost;
844 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
845 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
846 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
847 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
848 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
849 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
850 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
851 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
852 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
854 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
855 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
856 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
857 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
858 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
859 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
860 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
861 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
862 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
864 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
865 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
867 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
868 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
869 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
870 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
872 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
873 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
875 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
876 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
877 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
878 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
880 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
881 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
882 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
883 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
888 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
889 if (
auto KindCost = Entry->Cost[
CostKind])
890 return LT.first * *KindCost;
893 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
894 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
895 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
897 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
898 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
899 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
901 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
902 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
903 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
904 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
905 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
906 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
907 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
908 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
909 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
911 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
912 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
913 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
914 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
915 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
916 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
918 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
919 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
921 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
922 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
924 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
925 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
927 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
928 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
929 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
930 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
932 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
933 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
934 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
935 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
937 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
938 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
939 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
940 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
942 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
943 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
944 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
945 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
950 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
951 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
952 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
953 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
954 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
955 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
956 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
957 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
960 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
961 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
962 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
963 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
965 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
966 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
967 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
968 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
969 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
970 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
971 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
972 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
975 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
976 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
977 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
978 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
982 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
983 if (
auto KindCost = Entry->Cost[
CostKind])
984 return LT.first * *KindCost;
989 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
990 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
991 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
992 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
993 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
994 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
995 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
996 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
997 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
998 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1010 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1011 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1018 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1019 if (
auto KindCost = Entry->Cost[
CostKind])
1020 return LT.first * *KindCost;
1025 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1026 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1027 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1028 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1029 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1030 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1031 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1032 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1033 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1034 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1035 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1036 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1038 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1039 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1040 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1041 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1042 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1043 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1044 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1045 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1046 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1047 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1048 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1049 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1059 if (
const auto *Entry =
1061 if (
auto KindCost = Entry->Cost[
CostKind])
1062 return LT.first * *KindCost;
1069 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1070 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1075 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1076 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1077 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1078 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1081 if (ST->useGLMDivSqrtCosts())
1082 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1083 if (
auto KindCost = Entry->Cost[
CostKind])
1084 return LT.first * *KindCost;
1087 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1088 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1089 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1090 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1091 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1092 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1093 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1094 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1095 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1096 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1097 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1098 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1104 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1106 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1107 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1110 if (ST->useSLMArithCosts())
1111 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1112 if (
auto KindCost = Entry->Cost[
CostKind])
1113 return LT.first * *KindCost;
1116 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1117 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1118 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1119 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1121 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1122 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1123 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1124 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1126 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1127 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1128 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1129 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1130 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1131 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1133 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1134 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1135 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1136 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1137 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1138 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1139 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1140 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1142 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1143 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1144 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1145 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1146 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1147 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1148 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1152 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1153 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1155 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1156 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1157 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1158 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1159 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1160 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1162 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1163 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1164 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1165 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1166 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1167 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1169 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1170 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1171 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1172 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1173 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1174 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1176 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1177 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1178 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1179 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1180 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1181 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1186 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1187 if (
auto KindCost = Entry->Cost[
CostKind])
1188 return LT.first * *KindCost;
1194 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1195 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1196 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1197 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1198 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1199 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1201 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1202 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1203 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1204 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1206 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1207 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1208 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1209 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1211 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1212 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1213 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1214 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1216 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1217 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1218 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1219 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1220 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1221 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1222 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1223 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1224 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1225 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1227 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1228 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1229 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1230 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1231 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1232 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1233 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1234 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1236 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1237 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1238 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1239 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1240 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1241 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1242 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1243 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1245 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1246 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1247 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1248 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1249 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1250 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1251 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1252 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1254 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1255 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1257 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1258 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1259 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1260 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1261 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1262 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1264 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1265 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1266 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1267 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1268 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1269 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1271 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1272 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1273 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1274 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1275 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1276 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1278 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1279 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1280 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1281 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1282 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1283 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1287 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1288 if (
auto KindCost = Entry->Cost[
CostKind])
1289 return LT.first * *KindCost;
1292 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1293 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1294 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1295 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1297 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1298 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1299 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1300 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1302 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1303 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1304 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1305 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1307 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1308 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1309 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1310 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1312 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1316 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1317 if (
auto KindCost = Entry->Cost[
CostKind])
1318 return LT.first * *KindCost;
1321 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1322 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1323 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1325 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1326 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1327 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1328 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1330 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1331 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1332 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1333 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1335 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1339 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1340 if (
auto KindCost = Entry->Cost[
CostKind])
1341 return LT.first * *KindCost;
1344 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1348 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1349 if (
auto KindCost = Entry->Cost[
CostKind])
1350 return LT.first * *KindCost;
1355 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1356 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1357 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1358 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1360 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1361 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1362 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1363 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1365 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1366 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1367 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1368 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1370 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1371 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1372 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1373 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1375 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1376 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1377 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1378 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1380 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1381 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1382 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1383 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1385 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1386 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1388 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1389 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1390 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1391 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1395 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1396 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1397 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1398 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1400 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1401 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1402 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1403 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1405 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1406 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1407 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1409 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1410 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1411 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1413 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1414 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1418 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1419 if (
auto KindCost = Entry->Cost[
CostKind])
1420 return LT.first * *KindCost;
1423 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1424 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1426 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1427 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1429 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1430 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1432 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1433 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1435 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1436 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1440 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1441 if (
auto KindCost = Entry->Cost[
CostKind])
1442 return LT.first * *KindCost;
1447 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1452 if (
auto KindCost = Entry->Cost[
CostKind])
1453 return LT.first * *KindCost;
1464 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1465 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1466 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1468 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1469 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1470 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1471 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1472 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1476 if (
auto KindCost = Entry->Cost[
CostKind])
1477 return LT.first * *KindCost;
1491 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1540 CostKind, Mask.size() / 2, BaseTp);
1553 using namespace PatternMatch;
1556 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1561 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1562 LT.second = LT.second.changeVectorElementType(MVT::f16);
1567 int NumElts = LT.second.getVectorNumElements();
1568 if ((
Index % NumElts) == 0)
1571 if (SubLT.second.isVector()) {
1572 int NumSubElts = SubLT.second.getVectorNumElements();
1573 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1581 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1582 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1583 (NumSubElts % OrigSubElts) == 0 &&
1584 LT.second.getVectorElementType() ==
1585 SubLT.second.getVectorElementType() &&
1586 LT.second.getVectorElementType().getSizeInBits() ==
1588 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1589 "Unexpected number of elements!");
1591 LT.second.getVectorNumElements());
1593 SubLT.second.getVectorNumElements());
1602 return ExtractCost + 1;
1605 "Unexpected vector size");
1607 return ExtractCost + 2;
1618 int NumElts = LT.second.getVectorNumElements();
1620 if (SubLT.second.isVector()) {
1621 int NumSubElts = SubLT.second.getVectorNumElements();
1622 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1635 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1666 if (
const auto *Entry =
1675 MVT LegalVT = LT.second;
1680 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1684 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1691 if (!Mask.empty() && NumOfDests.
isValid()) {
1709 unsigned E = *NumOfDests.
getValue();
1710 unsigned NormalizedVF =
1716 unsigned PrevSrcReg = 0;
1720 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1721 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1726 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1727 PrevRegMask != RegMask)
1735 if (SrcReg != DestReg &&
1740 PrevSrcReg = SrcReg;
1741 PrevRegMask = RegMask;
1754 std::nullopt,
CostKind, 0,
nullptr);
1765 LT.first = NumOfDests * NumOfShufflesPerDest;
1781 if (
const auto *Entry =
1783 return LT.first * Entry->Cost;
1816 if (
const auto *Entry =
1818 return LT.first * Entry->Cost;
1895 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1896 if (
auto KindCost = Entry->Cost[
CostKind])
1897 return LT.first * *KindCost;
1950 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1951 return LT.first * Entry->Cost;
1972 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1973 return LT.first * Entry->Cost;
2035 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2036 return LT.first * Entry->Cost;
2049 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2050 return LT.first * Entry->Cost;
2081 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2082 return LT.first * Entry->Cost;
2138 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2140 if (
const auto *Entry =
2143 LT.second.getVectorElementCount()) &&
2144 "Table entry missing from isLegalBroadcastLoad()");
2145 return LT.first * Entry->Cost;
2148 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2149 return LT.first * Entry->Cost;
2162 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2163 return LT.first * Entry->Cost;
2174 assert(ISD &&
"Invalid opcode");
2320 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2321 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2668 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2745 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
2969 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
2987 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2988 if (
auto KindCost = Entry->Cost[
CostKind])
2993 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2994 if (
auto KindCost = Entry->Cost[
CostKind])
2999 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3000 if (
auto KindCost = Entry->Cost[
CostKind])
3006 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3007 if (
auto KindCost = Entry->Cost[
CostKind])
3012 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3013 if (
auto KindCost = Entry->Cost[
CostKind])
3018 SimpleDstTy, SimpleSrcTy))
3019 if (
auto KindCost = Entry->Cost[
CostKind])
3024 SimpleDstTy, SimpleSrcTy))
3025 if (
auto KindCost = Entry->Cost[
CostKind])
3031 SimpleDstTy, SimpleSrcTy))
3032 if (
auto KindCost = Entry->Cost[
CostKind])
3038 SimpleDstTy, SimpleSrcTy))
3039 if (
auto KindCost = Entry->Cost[
CostKind])
3045 SimpleDstTy, SimpleSrcTy))
3046 if (
auto KindCost = Entry->Cost[
CostKind])
3062 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3063 if (
auto KindCost = Entry->Cost[
CostKind])
3064 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3068 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3069 if (
auto KindCost = Entry->Cost[
CostKind])
3070 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3074 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3075 if (
auto KindCost = Entry->Cost[
CostKind])
3076 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3081 LTDest.second, LTSrc.second))
3082 if (
auto KindCost = Entry->Cost[
CostKind])
3083 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3087 LTDest.second, LTSrc.second))
3088 if (
auto KindCost = Entry->Cost[
CostKind])
3089 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3093 LTDest.second, LTSrc.second))
3094 if (
auto KindCost = Entry->Cost[
CostKind])
3095 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3099 LTDest.second, LTSrc.second))
3100 if (
auto KindCost = Entry->Cost[
CostKind])
3101 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3105 LTDest.second, LTSrc.second))
3106 if (
auto KindCost = Entry->Cost[
CostKind])
3107 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3111 LTDest.second, LTSrc.second))
3112 if (
auto KindCost = Entry->Cost[
CostKind])
3113 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3117 LTDest.second, LTSrc.second))
3118 if (
auto KindCost = Entry->Cost[
CostKind])
3119 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3124 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3125 Type *ExtSrc = Src->getWithNewBitWidth(32);
3131 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3141 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3142 Type *TruncDst = Dst->getWithNewBitWidth(32);
3152 return Cost == 0 ? 0 :
N;
3172 MVT MTy = LT.second;
3175 assert(ISD &&
"Invalid opcode");
3178 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3191 Pred = cast<CmpInst>(
I)->getPredicate();
3193 bool CmpWithConstant =
false;
3194 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3195 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3200 ExtraCost = CmpWithConstant ? 0 : 1;
3205 ExtraCost = CmpWithConstant ? 0 : 1;
3211 ExtraCost = CmpWithConstant ? 1 : 2;
3222 ExtraCost = CmpWithConstant ? 2 : 3;
3229 if (CondTy && !ST->
hasAVX())
3398 if (ST->useSLMArithCosts())
3400 if (
auto KindCost = Entry->Cost[
CostKind])
3401 return LT.first * (ExtraCost + *KindCost);
3405 if (
auto KindCost = Entry->Cost[
CostKind])
3406 return LT.first * (ExtraCost + *KindCost);
3410 if (
auto KindCost = Entry->Cost[
CostKind])
3411 return LT.first * (ExtraCost + *KindCost);
3415 if (
auto KindCost = Entry->Cost[
CostKind])
3416 return LT.first * (ExtraCost + *KindCost);
3420 if (
auto KindCost = Entry->Cost[
CostKind])
3421 return LT.first * (ExtraCost + *KindCost);
3425 if (
auto KindCost = Entry->Cost[
CostKind])
3426 return LT.first * (ExtraCost + *KindCost);
3430 if (
auto KindCost = Entry->Cost[
CostKind])
3431 return LT.first * (ExtraCost + *KindCost);
3435 if (
auto KindCost = Entry->Cost[
CostKind])
3436 return LT.first * (ExtraCost + *KindCost);
3440 if (
auto KindCost = Entry->Cost[
CostKind])
3441 return LT.first * (ExtraCost + *KindCost);
3445 if (
auto KindCost = Entry->Cost[
CostKind])
3446 return LT.first * (ExtraCost + *KindCost);
3471 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3472 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3473 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3474 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3475 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3476 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3477 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3478 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3479 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3480 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3481 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3482 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3483 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3484 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3485 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3507 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3508 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3509 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3510 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3511 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3512 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3513 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3514 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3515 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3516 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3517 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3518 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3520 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3521 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3522 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3523 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3524 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3525 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3528 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3529 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3551 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3552 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3553 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3554 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3555 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3556 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3557 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3558 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3559 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3560 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3561 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3562 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3563 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3567 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3568 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3569 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3570 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3571 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3572 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3573 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3574 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3575 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3576 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3577 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3578 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3579 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3580 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3581 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3582 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3583 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3584 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3593 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3594 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3595 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3596 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3601 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3602 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3603 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3604 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3609 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3610 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3611 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3612 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3613 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3614 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3615 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3616 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3617 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3625 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3626 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3627 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3628 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3629 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3630 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3631 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3632 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3633 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3634 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3635 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3636 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3637 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3638 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3639 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3640 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3641 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3642 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3643 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3644 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3645 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3646 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3647 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3648 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3655 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3656 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3657 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3658 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3659 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3660 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3661 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3662 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3663 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3664 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3665 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3666 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3667 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3668 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3669 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3670 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3671 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3672 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3673 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3674 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3675 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3676 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3677 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3678 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3706 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3709 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3710 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3726 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3727 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3728 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3729 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3730 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3731 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3732 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3733 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3734 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3735 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3736 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3737 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3738 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3739 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3740 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3741 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3752 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3753 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3754 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3755 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3756 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3757 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3758 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3759 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3774 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3775 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3776 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3777 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3778 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3779 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3780 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3781 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3782 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3783 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3784 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3785 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3786 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3787 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3790 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3791 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3792 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3793 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3794 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3795 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3796 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3797 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3800 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3801 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3802 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3803 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3804 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3805 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3806 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3807 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3808 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3809 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3815 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3816 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3817 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3818 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3819 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3820 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3821 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3822 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3823 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3824 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3836 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3838 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3839 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3842 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3843 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3844 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3845 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3858 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3860 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3861 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3862 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3863 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3864 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3865 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3866 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3867 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3868 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3869 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3870 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3871 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3872 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3873 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3874 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3875 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3876 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3877 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3878 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3879 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3880 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3881 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3882 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3883 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3886 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3887 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3888 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3889 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3890 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3891 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3892 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3893 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3894 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3895 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3901 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3902 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3903 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3904 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3905 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3906 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3907 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3908 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3909 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3910 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3921 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
3922 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
3924 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
3925 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
3950 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
3952 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
3959 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
3961 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
3971 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
3974 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
3975 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
3976 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3977 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3978 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3979 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3980 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3981 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
3982 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3983 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3984 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
3985 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3986 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3989 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
3990 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
3991 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
3999 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4000 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4001 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4002 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4003 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4004 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4005 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4006 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4007 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4008 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4009 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4010 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4013 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4014 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4015 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4016 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4021 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4024 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4025 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4026 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4027 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4028 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4029 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4030 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4031 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4032 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4033 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4034 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4035 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4038 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4039 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4040 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4041 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4042 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4043 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4044 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4045 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4050 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4051 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4052 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4053 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4054 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4055 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4056 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4057 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4063 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4069 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4096 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4104 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4105 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4107 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4108 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4109 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4110 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4111 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4117 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4118 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4119 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4140 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4141 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4143 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4144 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4149 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4150 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4152 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4153 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4155 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4156 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4158 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4159 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4161 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4162 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4182 case Intrinsic::abs:
4185 case Intrinsic::bitreverse:
4188 case Intrinsic::bswap:
4191 case Intrinsic::ctlz:
4194 case Intrinsic::ctpop:
4197 case Intrinsic::cttz:
4200 case Intrinsic::fshl:
4204 if (Args[0] == Args[1]) {
4215 case Intrinsic::fshr:
4220 if (Args[0] == Args[1]) {
4231 case Intrinsic::lrint:
4232 case Intrinsic::llrint:
4241 case Intrinsic::maxnum:
4242 case Intrinsic::minnum:
4246 case Intrinsic::sadd_sat:
4249 case Intrinsic::smax:
4252 case Intrinsic::smin:
4255 case Intrinsic::ssub_sat:
4258 case Intrinsic::uadd_sat:
4261 case Intrinsic::umax:
4264 case Intrinsic::umin:
4267 case Intrinsic::usub_sat:
4270 case Intrinsic::sqrt:
4273 case Intrinsic::sadd_with_overflow:
4274 case Intrinsic::ssub_with_overflow:
4277 OpTy =
RetTy->getContainedType(0);
4279 case Intrinsic::uadd_with_overflow:
4280 case Intrinsic::usub_with_overflow:
4283 OpTy =
RetTy->getContainedType(0);
4285 case Intrinsic::umul_with_overflow:
4286 case Intrinsic::smul_with_overflow:
4289 OpTy =
RetTy->getContainedType(0);
4294 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4295 std::pair<InstructionCost, MVT> LT,
4298 MVT MTy = LT.second;
4305 return LegalizationCost * 1;
4310 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4312 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4314 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4315 if (LI->hasOneUse())
4322 return LegalizationCost * (int)
Cost;
4327 MVT MTy = LT.second;
4330 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4331 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4334 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4335 if (Cst->isAllOnesValue())
4343 if (ST->useGLMDivSqrtCosts())
4345 if (
auto KindCost = Entry->Cost[
CostKind])
4346 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4348 if (ST->useSLMArithCosts())
4350 if (
auto KindCost = Entry->Cost[
CostKind])
4351 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4354 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4355 if (
auto KindCost = Entry->Cost[
CostKind])
4356 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4358 if (ST->hasBITALG())
4359 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4360 if (
auto KindCost = Entry->Cost[
CostKind])
4361 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4363 if (ST->hasVPOPCNTDQ())
4364 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4365 if (
auto KindCost = Entry->Cost[
CostKind])
4366 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4370 if (
auto KindCost = Entry->Cost[
CostKind])
4371 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4375 if (
auto KindCost = Entry->Cost[
CostKind])
4376 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4380 if (
auto KindCost = Entry->Cost[
CostKind])
4381 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4385 if (
auto KindCost = Entry->Cost[
CostKind])
4386 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4390 if (
auto KindCost = Entry->Cost[
CostKind])
4391 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4395 if (
auto KindCost = Entry->Cost[
CostKind])
4396 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4400 if (
auto KindCost = Entry->Cost[
CostKind])
4401 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4405 if (
auto KindCost = Entry->Cost[
CostKind])
4406 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4410 if (
auto KindCost = Entry->Cost[
CostKind])
4411 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4415 if (
auto KindCost = Entry->Cost[
CostKind])
4416 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4420 if (
auto KindCost = Entry->Cost[
CostKind])
4421 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4425 if (
auto KindCost = Entry->Cost[
CostKind])
4426 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4431 if (
auto KindCost = Entry->Cost[
CostKind])
4432 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4435 if (
auto KindCost = Entry->Cost[
CostKind])
4436 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4439 if (ST->hasLZCNT()) {
4442 if (
auto KindCost = Entry->Cost[
CostKind])
4443 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4446 if (
auto KindCost = Entry->Cost[
CostKind])
4447 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4450 if (ST->hasPOPCNT()) {
4453 if (
auto KindCost = Entry->Cost[
CostKind])
4454 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4457 if (
auto KindCost = Entry->Cost[
CostKind])
4458 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4463 if (
auto KindCost = Entry->Cost[
CostKind])
4464 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4467 if (
auto KindCost = Entry->Cost[
CostKind])
4468 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4491 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4492 Opcode == Instruction::InsertElement)) {
4497 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4502 if (Opcode == Instruction::ExtractElement) {
4508 if (Opcode == Instruction::InsertElement) {
4516 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4517 Opcode == Instruction::InsertElement)) {
4519 if (Opcode == Instruction::ExtractElement &&
4521 cast<FixedVectorType>(Val)->getNumElements() > 1)
4528 if (!LT.second.isVector())
4532 unsigned SizeInBits = LT.second.getSizeInBits();
4533 unsigned NumElts = LT.second.getVectorNumElements();
4534 unsigned SubNumElts = NumElts;
4539 if (SizeInBits > 128) {
4540 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4541 unsigned NumSubVecs = SizeInBits / 128;
4542 SubNumElts = NumElts / NumSubVecs;
4543 if (SubNumElts <=
Index) {
4544 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4545 Index %= SubNumElts;
4549 MVT MScalarTy = LT.second.getScalarType();
4550 auto IsCheapPInsrPExtrInsertPS = [&]() {
4553 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4555 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4556 Opcode == Instruction::InsertElement);
4564 (Opcode != Instruction::InsertElement || !Op0 ||
4565 isa<UndefValue>(Op0)))
4566 return RegisterFileMoveCost;
4568 if (Opcode == Instruction::InsertElement &&
4569 isa_and_nonnull<UndefValue>(Op0)) {
4571 if (isa_and_nonnull<LoadInst>(Op1))
4572 return RegisterFileMoveCost;
4573 if (!IsCheapPInsrPExtrInsertPS()) {
4576 return 2 + RegisterFileMoveCost;
4578 return 1 + RegisterFileMoveCost;
4583 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4584 return 1 + RegisterFileMoveCost;
4588 assert(ISD &&
"Unexpected vector opcode");
4589 if (ST->useSLMArithCosts())
4591 return Entry->Cost + RegisterFileMoveCost;
4594 if (IsCheapPInsrPExtrInsertPS())
4595 return 1 + RegisterFileMoveCost;
4604 if (Opcode == Instruction::InsertElement) {
4605 auto *SubTy = cast<VectorType>(Val);
4613 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4617 RegisterFileMoveCost;
4622 bool Insert,
bool Extract,
4625 cast<FixedVectorType>(Ty)->getNumElements() &&
4626 "Vector size mismatch");
4629 MVT MScalarTy = LT.second.getScalarType();
4630 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4633 constexpr unsigned LaneBitWidth = 128;
4634 assert((LegalVectorBitWidth < LaneBitWidth ||
4635 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4638 const int NumLegalVectors = *LT.first.getValue();
4639 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4644 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4646 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4649 if (LegalVectorBitWidth <= LaneBitWidth) {
4665 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4666 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4667 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4668 unsigned NumLegalElts =
4669 LT.second.getVectorNumElements() * NumLegalVectors;
4671 "Vector has been legalized to smaller element count");
4672 assert((NumLegalElts % NumLanesTotal) == 0 &&
4673 "Unexpected elts per lane");
4674 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4676 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4680 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4682 NumEltsPerLane, NumEltsPerLane *
I);
4683 if (LaneEltMask.
isZero())
4694 APInt AffectedLanes =
4697 AffectedLanes, NumLegalVectors,
true);
4698 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4699 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4700 unsigned I = NumLegalLanes * LegalVec + Lane;
4703 if (!AffectedLanes[
I] ||
4704 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4711 }
else if (LT.second.isVector()) {
4722 unsigned NumElts = LT.second.getVectorNumElements();
4724 PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
4725 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4734 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4735 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4736 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4740 if (LT.second.isVector()) {
4741 unsigned NumLegalElts =
4742 LT.second.getVectorNumElements() * NumLegalVectors;
4744 "Vector has been legalized to smaller element count");
4748 if (LegalVectorBitWidth > LaneBitWidth) {
4749 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4750 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4751 assert((NumLegalElts % NumLanesTotal) == 0 &&
4752 "Unexpected elts per lane");
4753 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4757 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4761 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4763 NumEltsPerLane,
I * NumEltsPerLane);
4764 if (LaneEltMask.
isZero())
4769 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4786 int VF,
const APInt &DemandedDstElts,
4792 auto bailout = [&]() {
4802 unsigned PromEltTyBits = EltTyBits;
4803 switch (EltTyBits) {
4834 int NumDstElements = VF * ReplicationFactor;
4848 if (PromEltTyBits != EltTyBits) {
4854 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4861 ReplicationFactor, VF,
4867 "We expect that the legalization doesn't affect the element width, "
4868 "doesn't coalesce/split elements.");
4871 unsigned NumDstVectors =
4872 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4881 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
4882 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
4887 return NumDstVectorsDemanded * SingleShuffleCost;
4898 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
4901 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
4902 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
4909 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4919 auto *VTy = dyn_cast<FixedVectorType>(Src);
4924 if (Opcode == Instruction::Store && OpInfo.
isConstant())
4930 if (!VTy || !LT.second.isVector()) {
4932 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
4935 bool IsLoad = Opcode == Instruction::Load;
4937 Type *EltTy = VTy->getElementType();
4942 const unsigned SrcNumElt = VTy->getNumElements();
4945 int NumEltRemaining = SrcNumElt;
4947 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
4949 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
4952 const unsigned XMMBits = 128;
4953 if (XMMBits % EltTyBits != 0)
4957 const int NumEltPerXMM = XMMBits / EltTyBits;
4961 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4962 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4964 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4968 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4970 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
4971 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4972 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4973 "Unless we haven't halved the op size yet, "
4974 "we have less than two op's sized units of work left.");
4976 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4980 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4981 "After halving sizes, the vector elt count is no longer a multiple "
4982 "of number of elements per operation?");
4983 auto *CoalescedVecTy =
4984 CurrNumEltPerOp == 1
4988 EltTyBits * CurrNumEltPerOp),
4989 CurrVecTy->getNumElements() / CurrNumEltPerOp);
4992 "coalesciing elements doesn't change vector width.");
4994 while (NumEltRemaining > 0) {
4995 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
4999 if (NumEltRemaining < CurrNumEltPerOp &&
5000 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
5001 CurrOpSizeBytes != 1)
5004 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5007 if (SubVecEltsLeft == 0) {
5008 SubVecEltsLeft += CurrVecTy->getNumElements();
5013 VTy, std::nullopt,
CostKind, NumEltDone(),
5021 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5022 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5023 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5024 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5025 APInt DemandedElts =
5027 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5028 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5038 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5040 else if (CurrOpSizeBytes < 4)
5045 SubVecEltsLeft -= CurrNumEltPerOp;
5046 NumEltRemaining -= CurrNumEltPerOp;
5051 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5060 bool IsLoad = (Instruction::Load == Opcode);
5061 bool IsStore = (Instruction::Store == Opcode);
5063 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5068 unsigned NumElem = SrcVTy->getNumElements();
5076 MaskTy, DemandedElts,
false,
true,
CostKind);
5081 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5083 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5087 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5095 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5097 return Cost + LT.first;
5099 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5100 LT.second.getVectorNumElements() == NumElem)
5117 return Cost + LT.first * (IsLoad ? 2 : 8);
5120 return Cost + LT.first;
5128 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5132 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5134 return getGEPCost(BaseGEP->getSourceElementType(),
5135 BaseGEP->getPointerOperand(), Indices,
nullptr,
5150 const unsigned NumVectorInstToHideOverhead = 10;
5163 return NumVectorInstToHideOverhead;
5173 std::optional<FastMathFlags> FMF,
5214 assert(ISD &&
"Invalid opcode");
5222 if (ST->useSLMArithCosts())
5237 MVT MTy = LT.second;
5239 auto *ValVTy = cast<FixedVectorType>(ValTy);
5252 if (LT.first != 1 && MTy.
isVector() &&
5258 ArithmeticCost *= LT.first - 1;
5261 if (ST->useSLMArithCosts())
5263 return ArithmeticCost + Entry->Cost;
5267 return ArithmeticCost + Entry->Cost;
5271 return ArithmeticCost + Entry->Cost;
5320 if (ValVTy->getElementType()->isIntegerTy(1)) {
5322 if (LT.first != 1 && MTy.
isVector() &&
5328 ArithmeticCost *= LT.first - 1;
5332 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5333 return ArithmeticCost + Entry->Cost;
5336 return ArithmeticCost + Entry->Cost;
5339 return ArithmeticCost + Entry->Cost;
5342 return ArithmeticCost + Entry->Cost;
5347 unsigned NumVecElts = ValVTy->getNumElements();
5348 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5358 if (LT.first != 1 && MTy.
isVector() &&
5364 ReductionCost *= LT.first - 1;
5370 while (NumVecElts > 1) {
5372 unsigned Size = NumVecElts * ScalarSize;
5381 }
else if (
Size == 128) {
5384 if (ValVTy->isFloatingPointTy())
5391 std::nullopt,
CostKind, 0,
nullptr);
5392 }
else if (
Size == 64) {
5395 if (ValVTy->isFloatingPointTy())
5402 std::nullopt,
CostKind, 0,
nullptr);
5408 Instruction::LShr, ShiftTy,
CostKind,
5435 MVT MTy = LT.second;
5439 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5443 "Expected float point or integer vector type.");
5444 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5512 auto *ValVTy = cast<FixedVectorType>(ValTy);
5513 unsigned NumVecElts = ValVTy->getNumElements();
5517 if (LT.first != 1 && MTy.
isVector() &&
5523 MinMaxCost *= LT.first - 1;
5529 return MinMaxCost + Entry->Cost;
5533 return MinMaxCost + Entry->Cost;
5537 return MinMaxCost + Entry->Cost;
5541 return MinMaxCost + Entry->Cost;
5553 while (NumVecElts > 1) {
5555 unsigned Size = NumVecElts * ScalarSize;
5563 }
else if (
Size == 128) {
5572 std::nullopt,
CostKind, 0,
nullptr);
5573 }
else if (
Size == 64) {
5581 std::nullopt,
CostKind, 0,
nullptr);
5634 if (BitSize % 64 != 0)
5635 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5640 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5646 return std::max<InstructionCost>(1,
Cost);
5661 unsigned ImmIdx = ~0U;
5665 case Instruction::GetElementPtr:
5672 case Instruction::Store:
5675 case Instruction::ICmp:
5681 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5682 uint64_t ImmVal = Imm.getZExtValue();
5683 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5688 case Instruction::And:
5692 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5696 case Instruction::Add:
5697 case Instruction::Sub:
5699 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5703 case Instruction::UDiv:
5704 case Instruction::SDiv:
5705 case Instruction::URem:
5706 case Instruction::SRem:
5711 case Instruction::Mul:
5712 case Instruction::Or:
5713 case Instruction::Xor:
5717 case Instruction::Shl:
5718 case Instruction::LShr:
5719 case Instruction::AShr:
5723 case Instruction::Trunc:
5724 case Instruction::ZExt:
5725 case Instruction::SExt:
5726 case Instruction::IntToPtr:
5727 case Instruction::PtrToInt:
5728 case Instruction::BitCast:
5729 case Instruction::PHI:
5730 case Instruction::Call:
5731 case Instruction::Select:
5732 case Instruction::Ret:
5733 case Instruction::Load:
5737 if (
Idx == ImmIdx) {
5762 case Intrinsic::sadd_with_overflow:
5763 case Intrinsic::uadd_with_overflow:
5764 case Intrinsic::ssub_with_overflow:
5765 case Intrinsic::usub_with_overflow:
5766 case Intrinsic::smul_with_overflow:
5767 case Intrinsic::umul_with_overflow:
5768 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5771 case Intrinsic::experimental_stackmap:
5772 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5775 case Intrinsic::experimental_patchpoint_void:
5776 case Intrinsic::experimental_patchpoint:
5777 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5788 return Opcode == Instruction::PHI ? 0 : 1;
5793int X86TTIImpl::getGatherOverhead()
const {
5806int X86TTIImpl::getScatterOverhead()
const {
5820 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5821 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5831 if (IndexSize < 64 || !
GEP)
5834 unsigned NumOfVarIndices = 0;
5835 const Value *Ptrs =
GEP->getPointerOperand();
5838 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5839 if (isa<Constant>(
GEP->getOperand(
I)))
5841 Type *IndxTy =
GEP->getOperand(
I)->getType();
5842 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5843 IndxTy = IndexVTy->getElementType();
5845 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5846 ++NumOfVarIndices > 1)
5849 return (
unsigned)32;
5854 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5855 ? getIndexSizeInBits(
Ptr,
DL)
5863 *std::max(IdxsLT.first, SrcLT.first).getValue();
5864 if (SplitFactor > 1) {
5868 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
5878 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
5879 : getScatterOverhead();
5887 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
5890 if ((Opcode == Instruction::Load &&
5893 Align(Alignment)))) ||
5894 (Opcode == Instruction::Store &&
5897 Align(Alignment)))))
5903 if (!PtrTy &&
Ptr->getType()->isVectorTy())
5904 PtrTy = dyn_cast<PointerType>(
5905 cast<VectorType>(
Ptr->getType())->getElementType());
5906 assert(PtrTy &&
"Unexpected type for Ptr argument");
5908 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
5924 return ST->hasMacroFusion() || ST->hasBranchFusion();
5931 if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5943 if (ScalarTy->
isHalfTy() && ST->hasBWI())
5953 return IntWidth == 32 || IntWidth == 64 ||
5954 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
5966 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
5983 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6005 if (!isa<VectorType>(DataTy))
6012 if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
6015 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6024 return IntWidth == 32 || IntWidth == 64 ||
6025 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6032bool X86TTIImpl::supportsGather()
const {
6046 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6047 return NumElts == 1 ||
6048 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6063 return IntWidth == 32 || IntWidth == 64;
6067 if (!supportsGather() || !ST->preferGather())
6082 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6083 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6088 for (
int Lane : seq<int>(0, NumElements)) {
6089 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6091 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6093 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6097 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6099 return ST->
hasSSE3() && NumElements % 4 == 0;
6101 return ST->
hasSSE3() && NumElements % 2 == 0;
6107 if (!ST->
hasAVX512() || !ST->preferScatter())
6120 if (
I->getOpcode() == Instruction::FDiv)
6136 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6138 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6141 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6142 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6143 if (RealCallerBits == RealCalleeBits)
6148 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6152 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6154 if (CB->isInlineAsm())
6158 for (
Value *Arg : CB->args())
6159 Types.push_back(Arg->getType());
6160 if (!CB->getType()->isVoidTy())
6161 Types.push_back(CB->getType());
6164 auto IsSimpleTy = [](
Type *Ty) {
6165 return !Ty->isVectorTy() && !Ty->isAggregateType();
6167 if (
all_of(Types, IsSimpleTy))
6170 if (
Function *NestedCallee = CB->getCalledFunction()) {
6172 if (NestedCallee->isIntrinsic())
6207 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6216 Options.AllowOverlappingLoads =
true;
6221 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6222 Options.LoadSizes.push_back(64);
6223 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6224 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6226 if (ST->is64Bit()) {
6227 Options.LoadSizes.push_back(8);
6229 Options.LoadSizes.push_back(4);
6230 Options.LoadSizes.push_back(2);
6231 Options.LoadSizes.push_back(1);
6236 return supportsGather();
6247 return !(ST->isAtom());
6267 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6273 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6286 if (UseMaskedMemOp) {
6288 for (
unsigned Index : Indices) {
6289 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6290 for (
unsigned Elm = 0; Elm < VF; Elm++)
6291 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6298 UseMaskForGaps ? DemandedLoadStoreElts
6307 if (UseMaskForGaps) {
6313 if (Opcode == Instruction::Load) {
6320 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6321 {3, MVT::v16i8, 12},
6322 {3, MVT::v32i8, 14},
6323 {3, MVT::v64i8, 22},
6326 if (
const auto *Entry =
6328 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6338 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6340 unsigned NumOfLoadsInInterleaveGrp =
6341 Indices.
size() ? Indices.
size() : Factor;
6350 unsigned NumOfUnfoldedLoads =
6351 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6354 unsigned NumOfShufflesPerResult =
6355 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6362 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6365 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6372 assert(Opcode == Instruction::Store &&
6373 "Expected Store Instruction at this point");
6375 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6376 {3, MVT::v16i8, 12},
6377 {3, MVT::v32i8, 14},
6378 {3, MVT::v64i8, 26},
6381 {4, MVT::v16i8, 11},
6382 {4, MVT::v32i8, 14},
6386 if (
const auto *Entry =
6388 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6393 unsigned NumOfSources = Factor;
6396 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6400 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6403 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6411 bool UseMaskForCond,
bool UseMaskForGaps) {
6412 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6414 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6415 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6420 return ST->hasBWI();
6422 return ST->hasBF16();
6425 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6427 Opcode, VecTy, Factor, Indices, Alignment,
6430 if (UseMaskForCond || UseMaskForGaps)
6433 UseMaskForCond, UseMaskForGaps);
6453 unsigned VF = VecTy->getNumElements() / Factor;
6454 Type *ScalarTy = VecTy->getElementType();
6486 {2, MVT::v16i16, 9},
6487 {2, MVT::v32i16, 18},
6490 {2, MVT::v16i32, 8},
6491 {2, MVT::v32i32, 16},
6495 {2, MVT::v16i64, 16},
6496 {2, MVT::v32i64, 32},
6501 {3, MVT::v16i8, 11},
6502 {3, MVT::v32i8, 14},
6507 {3, MVT::v16i16, 28},
6508 {3, MVT::v32i16, 56},
6513 {3, MVT::v16i32, 14},
6514 {3, MVT::v32i32, 32},
6518 {3, MVT::v8i64, 10},
6519 {3, MVT::v16i64, 20},
6524 {4, MVT::v16i8, 24},
6525 {4, MVT::v32i8, 56},
6528 {4, MVT::v4i16, 17},
6529 {4, MVT::v8i16, 33},
6530 {4, MVT::v16i16, 75},
6531 {4, MVT::v32i16, 150},
6535 {4, MVT::v8i32, 16},
6536 {4, MVT::v16i32, 32},
6537 {4, MVT::v32i32, 68},
6541 {4, MVT::v8i64, 20},
6542 {4, MVT::v16i64, 40},
6547 {6, MVT::v16i8, 43},
6548 {6, MVT::v32i8, 82},
6550 {6, MVT::v2i16, 13},
6552 {6, MVT::v8i16, 39},
6553 {6, MVT::v16i16, 106},
6554 {6, MVT::v32i16, 212},
6557 {6, MVT::v4i32, 15},
6558 {6, MVT::v8i32, 31},
6559 {6, MVT::v16i32, 64},
6562 {6, MVT::v4i64, 18},
6563 {6, MVT::v8i64, 36},
6568 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6582 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6587 {2, MVT::v16i16, 4},
6588 {2, MVT::v32i16, 8},
6592 {2, MVT::v16i32, 8},
6593 {2, MVT::v32i32, 16},
6598 {2, MVT::v16i64, 16},
6599 {2, MVT::v32i64, 32},
6604 {3, MVT::v16i8, 11},
6605 {3, MVT::v32i8, 13},
6609 {3, MVT::v8i16, 12},
6610 {3, MVT::v16i16, 27},
6611 {3, MVT::v32i16, 54},
6615 {3, MVT::v8i32, 11},
6616 {3, MVT::v16i32, 22},
6617 {3, MVT::v32i32, 48},
6621 {3, MVT::v8i64, 12},
6622 {3, MVT::v16i64, 24},
6628 {4, MVT::v32i8, 12},
6632 {4, MVT::v8i16, 10},
6633 {4, MVT::v16i16, 32},
6634 {4, MVT::v32i16, 64},
6638 {4, MVT::v8i32, 16},
6639 {4, MVT::v16i32, 32},
6640 {4, MVT::v32i32, 64},
6644 {4, MVT::v8i64, 20},
6645 {4, MVT::v16i64, 40},
6650 {6, MVT::v16i8, 27},
6651 {6, MVT::v32i8, 90},
6653 {6, MVT::v2i16, 10},
6654 {6, MVT::v4i16, 15},
6655 {6, MVT::v8i16, 21},
6656 {6, MVT::v16i16, 58},
6657 {6, MVT::v32i16, 90},
6660 {6, MVT::v4i32, 12},
6661 {6, MVT::v8i32, 33},
6662 {6, MVT::v16i32, 66},
6665 {6, MVT::v4i64, 15},
6666 {6, MVT::v8i64, 30},
6669 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6680 if (Opcode == Instruction::Load) {
6681 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6685 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6689 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6691 return GetDiscountedCost(Entry);
6694 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6696 return GetDiscountedCost(Entry);
6699 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6701 return GetDiscountedCost(Entry);
6703 assert(Opcode == Instruction::Store &&
6704 "Expected Store Instruction at this point");
6706 "Interleaved store only supports fully-interleaved groups.");
6708 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6710 return MemOpCosts + Entry->Cost;
6713 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6715 return MemOpCosts + Entry->Cost;
6720 UseMaskForCond, UseMaskForGaps);
6725 bool HasBaseReg, int64_t Scale,
6726 unsigned AddrSpace)
const {
6754 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.