63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
165 bool Vector = (ClassID == 1);
184 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
186 if (ST->
hasAVX() && PreferVectorWidth >= 256)
188 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
229 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
246 assert(ISD &&
"Invalid opcode");
248 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
249 (LT.second.getScalarType() == MVT::i32 ||
250 LT.second.getScalarType() == MVT::i64)) {
252 bool Op1Signed =
false, Op2Signed =
false;
255 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
256 bool SignedMode = Op1Signed || Op2Signed;
261 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
262 LT.second.getScalarType() == MVT::i32) {
264 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
266 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
267 bool Op1Sext = isa<SExtInst>(Args[0]) &&
268 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
269 bool Op2Sext = isa<SExtInst>(Args[1]) &&
270 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
272 bool IsZeroExtended = !Op1Signed || !Op2Signed;
273 bool IsConstant = Op1Constant || Op2Constant;
274 bool IsSext = Op1Sext || Op2Sext;
275 if (IsConstant || IsZeroExtended || IsSext)
283 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
286 if (!SignedMode && OpMinSize <= 8)
290 if (!SignedMode && OpMinSize <= 16)
297 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
350 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
351 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
352 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
353 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
354 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
355 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
356 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
357 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
358 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
362 if (
const auto *Entry =
364 if (
auto KindCost = Entry->Cost[
CostKind])
365 return LT.first * *KindCost;
368 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
369 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
370 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
371 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
372 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
373 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
374 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
375 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
376 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
378 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
379 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
380 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
381 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
382 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
383 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
387 if (
const auto *Entry =
389 if (
auto KindCost = Entry->Cost[
CostKind])
390 return LT.first * *KindCost;
393 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
394 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
395 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
397 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
398 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
399 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
401 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
402 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
403 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
404 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
405 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
406 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
408 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
409 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
410 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
411 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
412 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
413 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
414 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
423 if (
const auto *Entry =
425 if (
auto KindCost = Entry->Cost[
CostKind])
426 return LT.first * *KindCost;
429 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
430 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
431 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
432 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
433 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
434 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
436 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
437 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
438 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
439 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
440 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
441 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
443 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
444 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
445 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
446 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
447 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
448 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
450 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
451 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
452 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
453 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
454 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
455 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
464 if (
const auto *Entry =
466 if (
auto KindCost = Entry->Cost[
CostKind])
467 return LT.first * *KindCost;
470 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
471 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
472 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
473 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
474 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
475 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
477 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
478 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
479 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
480 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
481 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
482 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
484 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
485 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
486 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
487 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
488 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
489 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
491 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
492 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
493 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
494 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
495 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
496 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
506 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
507 if (
const auto *Entry =
509 if (
auto KindCost = Entry->Cost[
CostKind])
510 return LT.first * *KindCost;
513 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
514 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
515 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
517 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
518 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
519 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
521 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
522 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
523 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
525 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
526 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
527 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
537 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
538 if (
const auto *Entry =
540 if (
auto KindCost = Entry->Cost[
CostKind])
541 return LT.first * *KindCost;
556 if (
const auto *Entry =
558 if (
auto KindCost = Entry->Cost[
CostKind])
559 return LT.first * *KindCost;
579 if (
const auto *Entry =
581 if (
auto KindCost = Entry->Cost[
CostKind])
582 return LT.first * *KindCost;
602 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
603 if (
auto KindCost = Entry->Cost[
CostKind])
604 return LT.first * *KindCost;
624 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
625 if (
auto KindCost = Entry->Cost[
CostKind])
626 return LT.first * *KindCost;
634 if (
const auto *Entry =
636 if (
auto KindCost = Entry->Cost[
CostKind])
637 return LT.first * *KindCost;
657 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
658 if (
auto KindCost = Entry->Cost[
CostKind])
659 return LT.first * *KindCost;
662 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
663 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
664 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
665 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
666 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
667 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
668 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
669 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
670 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
672 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
673 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
674 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
678 if (
const auto *Entry =
680 if (
auto KindCost = Entry->Cost[
CostKind])
681 return LT.first * *KindCost;
684 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
685 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
686 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
688 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
689 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
690 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
692 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
693 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
694 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
695 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
696 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
697 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
698 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
702 if (
const auto *Entry =
704 if (
auto KindCost = Entry->Cost[
CostKind])
705 return LT.first * *KindCost;
709 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
710 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
711 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
712 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
713 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
714 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
716 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
717 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
718 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
719 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
720 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
721 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
723 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
724 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
725 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
726 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
727 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
728 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
730 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
731 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
732 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
733 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
734 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
735 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
739 if (
const auto *Entry =
741 if (
auto KindCost = Entry->Cost[
CostKind])
742 return LT.first * *KindCost;
745 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
746 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
747 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
748 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
749 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
750 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
752 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
753 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
754 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
755 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
756 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
757 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
759 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
760 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
761 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
762 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
763 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
764 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
766 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
767 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
768 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
769 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
770 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
771 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
776 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
777 if (
const auto *Entry =
779 if (
auto KindCost = Entry->Cost[
CostKind])
780 return LT.first * *KindCost;
784 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
785 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
786 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
788 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
789 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
790 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
792 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
793 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
794 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
796 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
797 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
798 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
802 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
803 if (
const auto *Entry =
805 if (
auto KindCost = Entry->Cost[
CostKind])
806 return LT.first * *KindCost;
809 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
810 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
811 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
816 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
817 if (
auto KindCost = Entry->Cost[
CostKind])
818 return LT.first * *KindCost;
821 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
822 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
823 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
824 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
825 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
826 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
827 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
828 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
829 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
831 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
832 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
833 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
834 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
835 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
836 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
837 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
838 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
839 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
841 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
842 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
844 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
845 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
846 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
847 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
849 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
850 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
852 {
ISD::MUL, MVT::v64i8, { 5, 10,10,11 } },
853 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
855 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
856 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
857 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
858 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
863 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
864 if (
auto KindCost = Entry->Cost[
CostKind])
865 return LT.first * *KindCost;
868 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
869 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
870 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
872 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
873 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
874 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
876 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
877 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
878 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
879 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
880 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
881 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
882 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
883 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
884 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
886 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
887 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
888 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
889 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
890 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
891 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
892 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
893 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
894 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
896 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
897 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
899 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
900 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
902 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
903 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
904 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
905 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
907 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
908 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
909 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
910 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
912 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
913 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
914 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
915 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
918 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
919 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
920 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
925 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
926 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
927 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
928 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
929 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
930 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
931 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
932 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
935 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
936 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
937 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
938 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
940 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
941 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
942 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
943 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
944 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
945 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
946 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
947 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
950 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
951 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
952 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
953 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
957 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
958 if (
auto KindCost = Entry->Cost[
CostKind])
959 return LT.first * *KindCost;
964 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
965 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
966 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
967 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
968 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
969 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
970 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
971 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
972 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
973 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
985 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
986 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
993 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
994 if (
auto KindCost = Entry->Cost[
CostKind])
995 return LT.first * *KindCost;
1000 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1001 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1002 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1003 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1004 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1005 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1006 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1007 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1008 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1009 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1010 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1011 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1013 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1014 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1015 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1016 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1017 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1018 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1019 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1020 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1021 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1022 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1023 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1024 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1034 if (
const auto *Entry =
1036 if (
auto KindCost = Entry->Cost[
CostKind])
1037 return LT.first * *KindCost;
1044 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1045 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1050 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1051 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1052 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1053 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1056 if (ST->useGLMDivSqrtCosts())
1057 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1058 if (
auto KindCost = Entry->Cost[
CostKind])
1059 return LT.first * *KindCost;
1062 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1063 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1064 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1065 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1066 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1067 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1068 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1069 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1070 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1071 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1072 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1073 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1079 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1081 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1082 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1085 if (ST->useSLMArithCosts())
1086 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1087 if (
auto KindCost = Entry->Cost[
CostKind])
1088 return LT.first * *KindCost;
1091 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1092 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1093 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1094 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1096 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1097 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1098 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1099 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1101 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1102 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1103 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1104 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1105 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1106 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1108 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1109 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1110 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1111 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1112 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1113 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1114 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1115 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1117 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1118 {
ISD::MUL, MVT::v32i8, { 6, 11,10,19 } },
1119 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1120 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1121 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1122 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1123 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1127 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1128 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1130 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1131 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1132 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1133 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1134 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1135 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1137 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1138 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1139 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1140 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1141 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1142 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1144 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1145 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1146 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1147 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1148 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1149 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1151 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1152 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1153 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1154 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1155 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1156 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1161 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1162 if (
auto KindCost = Entry->Cost[
CostKind])
1163 return LT.first * *KindCost;
1169 {
ISD::MUL, MVT::v32i8, { 12, 13, 22, 23 } },
1170 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1171 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1172 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1173 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1175 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1176 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1177 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1178 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1180 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1181 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1182 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1183 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1185 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1186 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1187 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1188 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1190 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1191 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1192 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1193 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1194 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1195 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1196 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1197 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1198 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1199 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1201 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1202 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1203 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1204 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1205 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1206 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1207 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1208 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1210 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1211 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1212 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1213 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1214 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1215 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1216 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1217 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1219 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1220 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1221 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1222 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1223 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1224 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1225 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1226 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1228 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1229 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1231 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1232 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1233 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1234 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1235 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1236 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1238 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1239 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1240 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1241 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1242 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1243 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1245 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1246 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1247 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1248 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1249 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1250 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1252 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1253 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1254 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1255 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1256 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1257 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1261 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1262 if (
auto KindCost = Entry->Cost[
CostKind])
1263 return LT.first * *KindCost;
1266 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1267 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1268 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1269 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1271 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1272 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1273 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1274 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1276 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1277 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1278 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1279 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1281 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1282 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1283 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1284 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1286 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1290 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1291 if (
auto KindCost = Entry->Cost[
CostKind])
1292 return LT.first * *KindCost;
1295 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1296 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1297 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1299 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1300 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1301 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1302 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1304 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1305 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1306 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1307 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1309 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1310 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1314 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1315 if (
auto KindCost = Entry->Cost[
CostKind])
1316 return LT.first * *KindCost;
1321 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1322 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1323 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1324 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1326 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1327 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1328 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1329 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1331 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1332 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1333 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1334 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1336 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1337 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1338 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1339 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1341 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1342 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1343 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1344 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1346 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1347 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1348 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1349 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1351 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1352 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1354 {
ISD::MUL, MVT::v16i8, { 5, 18,12,12 } },
1355 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1356 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1357 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1361 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1362 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1363 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1364 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1366 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1367 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1368 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1369 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1371 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1372 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1373 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1375 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1376 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1377 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1379 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1380 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1384 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1385 if (
auto KindCost = Entry->Cost[
CostKind])
1386 return LT.first * *KindCost;
1389 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1390 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1392 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1393 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1395 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1396 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1398 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1399 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1401 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1402 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1406 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1407 if (
auto KindCost = Entry->Cost[
CostKind])
1408 return LT.first * *KindCost;
1413 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1418 if (
auto KindCost = Entry->Cost[
CostKind])
1419 return LT.first * *KindCost;
1430 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1431 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1432 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1434 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1435 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1436 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1437 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1438 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1442 if (
auto KindCost = Entry->Cost[
CostKind])
1443 return LT.first * *KindCost;
1457 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1506 CostKind, Mask.size() / 2, BaseTp);
1519 using namespace PatternMatch;
1522 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1527 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1528 LT.second = LT.second.changeVectorElementType(MVT::f16);
1533 int NumElts = LT.second.getVectorNumElements();
1534 if ((
Index % NumElts) == 0)
1537 if (SubLT.second.isVector()) {
1538 int NumSubElts = SubLT.second.getVectorNumElements();
1539 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1547 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1548 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1549 (NumSubElts % OrigSubElts) == 0 &&
1550 LT.second.getVectorElementType() ==
1551 SubLT.second.getVectorElementType() &&
1552 LT.second.getVectorElementType().getSizeInBits() ==
1554 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1555 "Unexpected number of elements!");
1557 LT.second.getVectorNumElements());
1559 SubLT.second.getVectorNumElements());
1568 return ExtractCost + 1;
1571 "Unexpected vector size");
1573 return ExtractCost + 2;
1584 int NumElts = LT.second.getVectorNumElements();
1586 if (SubLT.second.isVector()) {
1587 int NumSubElts = SubLT.second.getVectorNumElements();
1588 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1601 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1632 if (
const auto *Entry =
1641 MVT LegalVT = LT.second;
1646 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1650 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1657 if (!Mask.empty() && NumOfDests.
isValid()) {
1675 unsigned E = *NumOfDests.
getValue();
1676 unsigned NormalizedVF =
1682 unsigned PrevSrcReg = 0;
1686 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1687 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1692 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1693 PrevRegMask != RegMask)
1701 if (SrcReg != DestReg &&
1706 PrevSrcReg = SrcReg;
1707 PrevRegMask = RegMask;
1720 std::nullopt,
CostKind, 0,
nullptr);
1731 LT.first = NumOfDests * NumOfShufflesPerDest;
1747 if (
const auto *Entry =
1749 return LT.first * Entry->Cost;
1782 if (
const auto *Entry =
1784 return LT.first * Entry->Cost;
1861 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1862 if (
auto KindCost = Entry->Cost[
CostKind])
1863 return LT.first * *KindCost;
1916 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1917 return LT.first * Entry->Cost;
1938 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1939 return LT.first * Entry->Cost;
2001 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2002 return LT.first * Entry->Cost;
2015 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2016 return LT.first * Entry->Cost;
2047 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2048 return LT.first * Entry->Cost;
2104 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2106 if (
const auto *Entry =
2109 LT.second.getVectorElementCount()) &&
2110 "Table entry missing from isLegalBroadcastLoad()");
2111 return LT.first * Entry->Cost;
2114 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2115 return LT.first * Entry->Cost;
2128 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2129 return LT.first * Entry->Cost;
2140 assert(ISD &&
"Invalid opcode");
2286 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2287 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2634 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2711 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
2935 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
2953 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2954 if (
auto KindCost = Entry->Cost[
CostKind])
2959 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2960 if (
auto KindCost = Entry->Cost[
CostKind])
2965 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2966 if (
auto KindCost = Entry->Cost[
CostKind])
2972 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2973 if (
auto KindCost = Entry->Cost[
CostKind])
2978 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2979 if (
auto KindCost = Entry->Cost[
CostKind])
2984 SimpleDstTy, SimpleSrcTy))
2985 if (
auto KindCost = Entry->Cost[
CostKind])
2990 SimpleDstTy, SimpleSrcTy))
2991 if (
auto KindCost = Entry->Cost[
CostKind])
2997 SimpleDstTy, SimpleSrcTy))
2998 if (
auto KindCost = Entry->Cost[
CostKind])
3004 SimpleDstTy, SimpleSrcTy))
3005 if (
auto KindCost = Entry->Cost[
CostKind])
3011 SimpleDstTy, SimpleSrcTy))
3012 if (
auto KindCost = Entry->Cost[
CostKind])
3028 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3029 if (
auto KindCost = Entry->Cost[
CostKind])
3030 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3034 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3035 if (
auto KindCost = Entry->Cost[
CostKind])
3036 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3040 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3041 if (
auto KindCost = Entry->Cost[
CostKind])
3042 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3047 LTDest.second, LTSrc.second))
3048 if (
auto KindCost = Entry->Cost[
CostKind])
3049 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3053 LTDest.second, LTSrc.second))
3054 if (
auto KindCost = Entry->Cost[
CostKind])
3055 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3059 LTDest.second, LTSrc.second))
3060 if (
auto KindCost = Entry->Cost[
CostKind])
3061 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3065 LTDest.second, LTSrc.second))
3066 if (
auto KindCost = Entry->Cost[
CostKind])
3067 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3071 LTDest.second, LTSrc.second))
3072 if (
auto KindCost = Entry->Cost[
CostKind])
3073 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3077 LTDest.second, LTSrc.second))
3078 if (
auto KindCost = Entry->Cost[
CostKind])
3079 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3083 LTDest.second, LTSrc.second))
3084 if (
auto KindCost = Entry->Cost[
CostKind])
3085 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3090 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3091 Type *ExtSrc = Src->getWithNewBitWidth(32);
3097 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3107 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3108 Type *TruncDst = Dst->getWithNewBitWidth(32);
3118 return Cost == 0 ? 0 :
N;
3138 MVT MTy = LT.second;
3141 assert(ISD &&
"Invalid opcode");
3144 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3157 Pred = cast<CmpInst>(
I)->getPredicate();
3159 bool CmpWithConstant =
false;
3160 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3161 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3166 ExtraCost = CmpWithConstant ? 0 : 1;
3171 ExtraCost = CmpWithConstant ? 0 : 1;
3177 ExtraCost = CmpWithConstant ? 1 : 2;
3188 ExtraCost = CmpWithConstant ? 2 : 3;
3195 if (CondTy && !ST->
hasAVX())
3364 if (ST->useSLMArithCosts())
3366 if (
auto KindCost = Entry->Cost[
CostKind])
3367 return LT.first * (ExtraCost + *KindCost);
3371 if (
auto KindCost = Entry->Cost[
CostKind])
3372 return LT.first * (ExtraCost + *KindCost);
3376 if (
auto KindCost = Entry->Cost[
CostKind])
3377 return LT.first * (ExtraCost + *KindCost);
3381 if (
auto KindCost = Entry->Cost[
CostKind])
3382 return LT.first * (ExtraCost + *KindCost);
3386 if (
auto KindCost = Entry->Cost[
CostKind])
3387 return LT.first * (ExtraCost + *KindCost);
3391 if (
auto KindCost = Entry->Cost[
CostKind])
3392 return LT.first * (ExtraCost + *KindCost);
3396 if (
auto KindCost = Entry->Cost[
CostKind])
3397 return LT.first * (ExtraCost + *KindCost);
3401 if (
auto KindCost = Entry->Cost[
CostKind])
3402 return LT.first * (ExtraCost + *KindCost);
3406 if (
auto KindCost = Entry->Cost[
CostKind])
3407 return LT.first * (ExtraCost + *KindCost);
3411 if (
auto KindCost = Entry->Cost[
CostKind])
3412 return LT.first * (ExtraCost + *KindCost);
3437 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3438 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3439 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3440 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3441 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3442 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3443 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3444 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3445 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3446 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3447 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3448 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3449 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3450 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3451 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3473 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3474 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3475 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3476 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3477 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3478 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3479 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3480 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3481 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3482 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3483 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3484 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3486 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3487 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3488 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3489 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3490 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3491 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3494 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3495 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3517 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3518 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3519 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3520 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3521 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3522 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3523 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3524 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3525 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3526 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3527 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3528 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3529 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3533 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3534 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3535 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3536 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3537 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3538 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3539 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3540 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3541 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3542 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3543 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3544 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3545 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3546 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3547 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3548 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3549 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3550 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3559 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3560 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3561 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3562 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3567 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3568 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3569 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3570 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3575 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3576 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3577 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3578 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3579 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3580 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3581 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3582 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3583 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3591 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3592 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3593 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3594 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3595 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3596 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3597 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3598 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3599 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3600 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3601 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3602 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3603 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3604 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3605 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3606 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3607 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3608 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3609 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3610 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3611 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3612 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3613 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3614 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3621 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3622 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3623 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3624 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3625 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3626 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3627 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3628 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3629 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3630 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3631 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3632 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3633 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3634 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3635 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3636 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3637 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3638 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3639 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3640 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3641 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3642 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3643 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3644 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3672 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3675 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3676 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3692 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3693 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3694 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3695 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3696 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3697 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3698 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3699 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3700 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3701 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3702 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3703 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3704 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3705 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3706 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3707 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3718 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3719 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3720 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3721 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3722 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3723 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3724 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3725 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3740 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3741 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3742 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3743 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3744 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3745 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3746 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3747 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3748 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3749 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3750 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3751 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3752 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3753 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3756 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3757 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3758 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3759 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3760 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3761 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3762 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3763 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3766 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3767 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3768 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3769 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3770 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3771 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3772 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3773 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3774 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3775 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3781 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3782 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3783 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3784 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3785 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3786 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3787 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3788 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3789 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3790 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3802 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3804 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3805 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3808 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3809 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3810 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3811 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3824 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3826 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3827 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3828 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3829 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3830 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3831 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3832 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3833 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3834 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3835 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3836 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3837 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3838 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3839 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3840 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3841 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3842 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3843 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3844 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3845 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3846 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3847 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3848 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3849 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3852 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3853 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3854 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3855 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3856 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3857 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3858 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3859 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3860 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3861 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3867 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3868 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3869 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3870 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3871 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3872 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3873 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3874 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3875 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3876 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3887 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
3888 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
3890 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
3891 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
3916 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
3918 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
3925 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
3927 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
3937 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
3940 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
3941 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
3942 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3943 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
3944 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3945 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3946 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
3947 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
3948 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
3949 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
3950 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
3951 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
3952 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
3955 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
3956 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
3957 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
3965 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
3966 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
3967 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
3968 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
3969 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
3970 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
3971 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
3972 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
3973 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
3974 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
3975 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
3976 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
3979 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
3980 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
3981 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
3982 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
3987 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
3990 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
3991 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
3992 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
3993 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
3994 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
3995 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
3996 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
3997 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
3998 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
3999 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4000 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4001 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4004 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4005 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4006 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4007 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4008 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4009 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4010 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4011 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4016 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4017 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4018 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4019 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4020 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4021 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4022 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4023 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4029 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4035 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4062 {
ISD::ABS, MVT::i64, { 1, 2, 3, 4 } },
4070 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4071 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4073 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4074 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4075 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4076 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4077 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4083 {
ISD::ABS, MVT::i32, { 1, 2, 3, 4 } },
4084 {
ISD::ABS, MVT::i16, { 2, 2, 3, 4 } },
4085 {
ISD::ABS, MVT::i8, { 2, 4, 4, 4 } },
4106 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4107 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4109 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4110 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4115 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4116 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4118 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4119 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4121 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4122 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4124 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4125 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4127 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4128 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4148 case Intrinsic::abs:
4151 case Intrinsic::bitreverse:
4154 case Intrinsic::bswap:
4157 case Intrinsic::ctlz:
4160 case Intrinsic::ctpop:
4163 case Intrinsic::cttz:
4166 case Intrinsic::fshl:
4170 if (Args[0] == Args[1]) {
4181 case Intrinsic::fshr:
4186 if (Args[0] == Args[1]) {
4197 case Intrinsic::lrint:
4198 case Intrinsic::llrint:
4207 case Intrinsic::maxnum:
4208 case Intrinsic::minnum:
4212 case Intrinsic::sadd_sat:
4215 case Intrinsic::smax:
4218 case Intrinsic::smin:
4221 case Intrinsic::ssub_sat:
4224 case Intrinsic::uadd_sat:
4227 case Intrinsic::umax:
4230 case Intrinsic::umin:
4233 case Intrinsic::usub_sat:
4236 case Intrinsic::sqrt:
4239 case Intrinsic::sadd_with_overflow:
4240 case Intrinsic::ssub_with_overflow:
4243 OpTy =
RetTy->getContainedType(0);
4245 case Intrinsic::uadd_with_overflow:
4246 case Intrinsic::usub_with_overflow:
4249 OpTy =
RetTy->getContainedType(0);
4251 case Intrinsic::umul_with_overflow:
4252 case Intrinsic::smul_with_overflow:
4255 OpTy =
RetTy->getContainedType(0);
4262 MVT MTy = LT.second;
4265 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4266 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4269 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4270 if (Cst->isAllOnesValue())
4278 auto adjustTableCost = [](
int ISD,
unsigned Cost,
4286 return LegalizationCost * 1;
4288 return LegalizationCost * (int)
Cost;
4291 if (ST->useGLMDivSqrtCosts())
4293 if (
auto KindCost = Entry->Cost[
CostKind])
4294 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4297 if (ST->useSLMArithCosts())
4299 if (
auto KindCost = Entry->Cost[
CostKind])
4300 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4304 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4305 if (
auto KindCost = Entry->Cost[
CostKind])
4306 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4309 if (ST->hasBITALG())
4310 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4311 if (
auto KindCost = Entry->Cost[
CostKind])
4312 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4315 if (ST->hasVPOPCNTDQ())
4316 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4317 if (
auto KindCost = Entry->Cost[
CostKind])
4318 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4323 if (
auto KindCost = Entry->Cost[
CostKind])
4324 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4329 if (
auto KindCost = Entry->Cost[
CostKind])
4330 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4335 if (
auto KindCost = Entry->Cost[
CostKind])
4336 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4341 if (
auto KindCost = Entry->Cost[
CostKind])
4342 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4347 if (
auto KindCost = Entry->Cost[
CostKind])
4348 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4353 if (
auto KindCost = Entry->Cost[
CostKind])
4354 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4359 if (
auto KindCost = Entry->Cost[
CostKind])
4360 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4365 if (
auto KindCost = Entry->Cost[
CostKind])
4366 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4371 if (
auto KindCost = Entry->Cost[
CostKind])
4372 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4377 if (
auto KindCost = Entry->Cost[
CostKind])
4378 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4383 if (
auto KindCost = Entry->Cost[
CostKind])
4384 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4389 if (
auto KindCost = Entry->Cost[
CostKind])
4390 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4396 if (
auto KindCost = Entry->Cost[
CostKind])
4397 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4401 if (
auto KindCost = Entry->Cost[
CostKind])
4402 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4406 if (ST->hasLZCNT()) {
4409 if (
auto KindCost = Entry->Cost[
CostKind])
4410 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4414 if (
auto KindCost = Entry->Cost[
CostKind])
4415 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4419 if (ST->hasPOPCNT()) {
4422 if (
auto KindCost = Entry->Cost[
CostKind])
4423 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4427 if (
auto KindCost = Entry->Cost[
CostKind])
4428 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4432 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4434 if (II->hasOneUse() && isa<StoreInst>(II->user_back()))
4436 if (
auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) {
4437 if (LI->hasOneUse())
4445 if (
auto KindCost = Entry->Cost[
CostKind])
4446 return adjustTableCost(Entry->ISD, *KindCost, LT.first,
4450 if (
auto KindCost = Entry->Cost[
CostKind])
4451 return adjustTableCost(Entry->ISD, *KindCost, LT.first, ICA.
getFlags());
4474 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4475 Opcode == Instruction::InsertElement)) {
4480 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4485 if (Opcode == Instruction::ExtractElement) {
4491 if (Opcode == Instruction::InsertElement) {
4499 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4500 Opcode == Instruction::InsertElement)) {
4502 if (Opcode == Instruction::ExtractElement &&
4504 cast<FixedVectorType>(Val)->getNumElements() > 1)
4511 if (!LT.second.isVector())
4515 unsigned SizeInBits = LT.second.getSizeInBits();
4516 unsigned NumElts = LT.second.getVectorNumElements();
4517 unsigned SubNumElts = NumElts;
4522 if (SizeInBits > 128) {
4523 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4524 unsigned NumSubVecs = SizeInBits / 128;
4525 SubNumElts = NumElts / NumSubVecs;
4526 if (SubNumElts <=
Index) {
4527 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4528 Index %= SubNumElts;
4532 MVT MScalarTy = LT.second.getScalarType();
4533 auto IsCheapPInsrPExtrInsertPS = [&]() {
4536 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4538 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4539 Opcode == Instruction::InsertElement);
4547 (Opcode != Instruction::InsertElement || !Op0 ||
4548 isa<UndefValue>(Op0)))
4549 return RegisterFileMoveCost;
4551 if (Opcode == Instruction::InsertElement &&
4552 isa_and_nonnull<UndefValue>(Op0)) {
4554 if (isa_and_nonnull<LoadInst>(Op1))
4555 return RegisterFileMoveCost;
4556 if (!IsCheapPInsrPExtrInsertPS()) {
4559 return 2 + RegisterFileMoveCost;
4561 return 1 + RegisterFileMoveCost;
4566 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4567 return 1 + RegisterFileMoveCost;
4571 assert(ISD &&
"Unexpected vector opcode");
4572 if (ST->useSLMArithCosts())
4574 return Entry->Cost + RegisterFileMoveCost;
4577 if (IsCheapPInsrPExtrInsertPS())
4578 return 1 + RegisterFileMoveCost;
4587 if (Opcode == Instruction::InsertElement) {
4588 auto *SubTy = cast<VectorType>(Val);
4596 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4600 RegisterFileMoveCost;
4605 bool Insert,
bool Extract,
4608 cast<FixedVectorType>(Ty)->getNumElements() &&
4609 "Vector size mismatch");
4612 MVT MScalarTy = LT.second.getScalarType();
4613 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4616 constexpr unsigned LaneBitWidth = 128;
4617 assert((LegalVectorBitWidth < LaneBitWidth ||
4618 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4621 const int NumLegalVectors = *LT.first.getValue();
4622 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4627 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4629 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4632 if (LegalVectorBitWidth <= LaneBitWidth) {
4648 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4649 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4650 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4651 unsigned NumLegalElts =
4652 LT.second.getVectorNumElements() * NumLegalVectors;
4654 "Vector has been legalized to smaller element count");
4655 assert((NumLegalElts % NumLanesTotal) == 0 &&
4656 "Unexpected elts per lane");
4657 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4659 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4663 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4665 NumEltsPerLane, NumEltsPerLane *
I);
4666 if (LaneEltMask.
isZero())
4677 APInt AffectedLanes =
4680 AffectedLanes, NumLegalVectors,
true);
4681 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4682 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4683 unsigned I = NumLegalLanes * LegalVec + Lane;
4686 if (!AffectedLanes[
I] ||
4687 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4694 }
else if (LT.second.isVector()) {
4705 unsigned NumElts = LT.second.getVectorNumElements();
4707 PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
4708 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4717 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4718 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4719 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4723 if (LT.second.isVector()) {
4724 unsigned NumLegalElts =
4725 LT.second.getVectorNumElements() * NumLegalVectors;
4727 "Vector has been legalized to smaller element count");
4731 if (LegalVectorBitWidth > LaneBitWidth) {
4732 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4733 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4734 assert((NumLegalElts % NumLanesTotal) == 0 &&
4735 "Unexpected elts per lane");
4736 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4740 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4744 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4746 NumEltsPerLane,
I * NumEltsPerLane);
4747 if (LaneEltMask.
isZero())
4752 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4769 int VF,
const APInt &DemandedDstElts,
4775 auto bailout = [&]() {
4785 unsigned PromEltTyBits = EltTyBits;
4786 switch (EltTyBits) {
4817 int NumDstElements = VF * ReplicationFactor;
4831 if (PromEltTyBits != EltTyBits) {
4837 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4844 ReplicationFactor, VF,
4850 "We expect that the legalization doesn't affect the element width, "
4851 "doesn't coalesce/split elements.");
4854 unsigned NumDstVectors =
4855 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
4864 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
4865 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
4870 return NumDstVectorsDemanded * SingleShuffleCost;
4881 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
4884 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
4885 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
4892 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4902 auto *VTy = dyn_cast<FixedVectorType>(Src);
4907 if (Opcode == Instruction::Store && OpInfo.
isConstant())
4913 if (!VTy || !LT.second.isVector()) {
4915 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
4918 bool IsLoad = Opcode == Instruction::Load;
4920 Type *EltTy = VTy->getElementType();
4925 const unsigned SrcNumElt = VTy->getNumElements();
4928 int NumEltRemaining = SrcNumElt;
4930 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
4932 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
4935 const unsigned XMMBits = 128;
4936 if (XMMBits % EltTyBits != 0)
4940 const int NumEltPerXMM = XMMBits / EltTyBits;
4944 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
4945 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
4947 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
4951 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
4953 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
4954 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
4955 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
4956 "Unless we haven't halved the op size yet, "
4957 "we have less than two op's sized units of work left.");
4959 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
4963 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
4964 "After halving sizes, the vector elt count is no longer a multiple "
4965 "of number of elements per operation?");
4966 auto *CoalescedVecTy =
4967 CurrNumEltPerOp == 1
4971 EltTyBits * CurrNumEltPerOp),
4972 CurrVecTy->getNumElements() / CurrNumEltPerOp);
4975 "coalesciing elements doesn't change vector width.");
4977 while (NumEltRemaining > 0) {
4978 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
4982 if (NumEltRemaining < CurrNumEltPerOp &&
4983 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
4984 CurrOpSizeBytes != 1)
4987 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
4990 if (SubVecEltsLeft == 0) {
4991 SubVecEltsLeft += CurrVecTy->getNumElements();
4996 VTy, std::nullopt,
CostKind, NumEltDone(),
5004 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5005 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5006 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5007 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5008 APInt DemandedElts =
5010 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5011 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5021 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5023 else if (CurrOpSizeBytes < 4)
5028 SubVecEltsLeft -= CurrNumEltPerOp;
5029 NumEltRemaining -= CurrNumEltPerOp;
5034 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5043 bool IsLoad = (Instruction::Load == Opcode);
5044 bool IsStore = (Instruction::Store == Opcode);
5046 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5051 unsigned NumElem = SrcVTy->getNumElements();
5059 MaskTy, DemandedElts,
false,
true,
CostKind);
5064 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5066 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5070 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5077 if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
5078 LT.second.getVectorNumElements() == NumElem)
5085 else if (LT.first * LT.second.getVectorNumElements() > NumElem) {
5087 LT.second.getVectorNumElements());
5095 return Cost + LT.first * (IsLoad ? 2 : 8);
5098 return Cost + LT.first;
5106 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5110 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5112 return getGEPCost(BaseGEP->getSourceElementType(),
5113 BaseGEP->getPointerOperand(), Indices,
nullptr,
5128 const unsigned NumVectorInstToHideOverhead = 10;
5141 return NumVectorInstToHideOverhead;
5151 std::optional<FastMathFlags> FMF,
5192 assert(ISD &&
"Invalid opcode");
5200 if (ST->useSLMArithCosts())
5215 MVT MTy = LT.second;
5217 auto *ValVTy = cast<FixedVectorType>(ValTy);
5230 if (LT.first != 1 && MTy.
isVector() &&
5236 ArithmeticCost *= LT.first - 1;
5239 if (ST->useSLMArithCosts())
5241 return ArithmeticCost + Entry->Cost;
5245 return ArithmeticCost + Entry->Cost;
5249 return ArithmeticCost + Entry->Cost;
5298 if (ValVTy->getElementType()->isIntegerTy(1)) {
5300 if (LT.first != 1 && MTy.
isVector() &&
5306 ArithmeticCost *= LT.first - 1;
5310 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5311 return ArithmeticCost + Entry->Cost;
5314 return ArithmeticCost + Entry->Cost;
5317 return ArithmeticCost + Entry->Cost;
5320 return ArithmeticCost + Entry->Cost;
5325 unsigned NumVecElts = ValVTy->getNumElements();
5326 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5336 if (LT.first != 1 && MTy.
isVector() &&
5342 ReductionCost *= LT.first - 1;
5348 while (NumVecElts > 1) {
5350 unsigned Size = NumVecElts * ScalarSize;
5359 }
else if (
Size == 128) {
5362 if (ValVTy->isFloatingPointTy())
5369 std::nullopt,
CostKind, 0,
nullptr);
5370 }
else if (
Size == 64) {
5373 if (ValVTy->isFloatingPointTy())
5380 std::nullopt,
CostKind, 0,
nullptr);
5386 Instruction::LShr, ShiftTy,
CostKind,
5413 MVT MTy = LT.second;
5417 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5421 "Expected float point or integer vector type.");
5422 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5490 auto *ValVTy = cast<FixedVectorType>(ValTy);
5491 unsigned NumVecElts = ValVTy->getNumElements();
5495 if (LT.first != 1 && MTy.
isVector() &&
5501 MinMaxCost *= LT.first - 1;
5507 return MinMaxCost + Entry->Cost;
5511 return MinMaxCost + Entry->Cost;
5515 return MinMaxCost + Entry->Cost;
5519 return MinMaxCost + Entry->Cost;
5531 while (NumVecElts > 1) {
5533 unsigned Size = NumVecElts * ScalarSize;
5541 }
else if (
Size == 128) {
5550 std::nullopt,
CostKind, 0,
nullptr);
5551 }
else if (
Size == 64) {
5559 std::nullopt,
CostKind, 0,
nullptr);
5612 if (BitSize % 64 != 0)
5613 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5618 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5624 return std::max<InstructionCost>(1,
Cost);
5639 unsigned ImmIdx = ~0U;
5643 case Instruction::GetElementPtr:
5650 case Instruction::Store:
5653 case Instruction::ICmp:
5659 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5660 uint64_t ImmVal = Imm.getZExtValue();
5661 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5666 case Instruction::And:
5670 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5674 case Instruction::Add:
5675 case Instruction::Sub:
5677 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5681 case Instruction::UDiv:
5682 case Instruction::SDiv:
5683 case Instruction::URem:
5684 case Instruction::SRem:
5689 case Instruction::Mul:
5690 case Instruction::Or:
5691 case Instruction::Xor:
5695 case Instruction::Shl:
5696 case Instruction::LShr:
5697 case Instruction::AShr:
5701 case Instruction::Trunc:
5702 case Instruction::ZExt:
5703 case Instruction::SExt:
5704 case Instruction::IntToPtr:
5705 case Instruction::PtrToInt:
5706 case Instruction::BitCast:
5707 case Instruction::PHI:
5708 case Instruction::Call:
5709 case Instruction::Select:
5710 case Instruction::Ret:
5711 case Instruction::Load:
5715 if (
Idx == ImmIdx) {
5740 case Intrinsic::sadd_with_overflow:
5741 case Intrinsic::uadd_with_overflow:
5742 case Intrinsic::ssub_with_overflow:
5743 case Intrinsic::usub_with_overflow:
5744 case Intrinsic::smul_with_overflow:
5745 case Intrinsic::umul_with_overflow:
5746 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5749 case Intrinsic::experimental_stackmap:
5750 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5753 case Intrinsic::experimental_patchpoint_void:
5754 case Intrinsic::experimental_patchpoint:
5755 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5766 return Opcode == Instruction::PHI ? 0 : 1;
5771int X86TTIImpl::getGatherOverhead()
const {
5784int X86TTIImpl::getScatterOverhead()
const {
5798 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5799 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5809 if (IndexSize < 64 || !
GEP)
5812 unsigned NumOfVarIndices = 0;
5813 const Value *Ptrs =
GEP->getPointerOperand();
5816 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5817 if (isa<Constant>(
GEP->getOperand(
I)))
5819 Type *IndxTy =
GEP->getOperand(
I)->getType();
5820 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5821 IndxTy = IndexVTy->getElementType();
5823 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5824 ++NumOfVarIndices > 1)
5827 return (
unsigned)32;
5832 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5833 ? getIndexSizeInBits(
Ptr,
DL)
5841 *std::max(IdxsLT.first, SrcLT.first).getValue();
5842 if (SplitFactor > 1) {
5846 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
5856 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
5857 : getScatterOverhead();
5865 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
5868 if (((Opcode == Instruction::Load &&
5871 Align(Alignment)))) ||
5872 (Opcode == Instruction::Store &&
5875 Align(Alignment))))))
5881 if (!PtrTy &&
Ptr->getType()->isVectorTy())
5882 PtrTy = dyn_cast<PointerType>(
5883 cast<VectorType>(
Ptr->getType())->getElementType());
5884 assert(PtrTy &&
"Unexpected type for Ptr argument");
5886 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
5902 return ST->hasMacroFusion() || ST->hasBranchFusion();
5910 if (isa<VectorType>(DataTy) &&
5911 cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5921 if (ScalarTy->
isHalfTy() && ST->hasBWI())
5931 return IntWidth == 32 || IntWidth == 64 ||
5932 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
5944 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
5961 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
5983 if (!isa<VectorType>(DataTy))
5990 if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
5993 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6002 return IntWidth == 32 || IntWidth == 64 ||
6003 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6010bool X86TTIImpl::supportsGather()
const {
6024 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6025 return NumElts == 1 ||
6026 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6041 return IntWidth == 32 || IntWidth == 64;
6045 if (!supportsGather() || !ST->preferGather())
6060 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6061 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6066 for (
int Lane : seq<int>(0, NumElements)) {
6067 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6069 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6071 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6075 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6077 return ST->
hasSSE3() && NumElements % 4 == 0;
6079 return ST->
hasSSE3() && NumElements % 2 == 0;
6085 if (!ST->
hasAVX512() || !ST->preferScatter())
6098 if (
I->getOpcode() == Instruction::FDiv)
6114 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6116 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6119 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6120 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6121 if (RealCallerBits == RealCalleeBits)
6126 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6130 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6132 if (CB->isInlineAsm())
6136 for (
Value *Arg : CB->args())
6137 Types.push_back(Arg->getType());
6138 if (!CB->getType()->isVoidTy())
6139 Types.push_back(CB->getType());
6142 auto IsSimpleTy = [](
Type *Ty) {
6143 return !Ty->isVectorTy() && !Ty->isAggregateType();
6145 if (
all_of(Types, IsSimpleTy))
6148 if (
Function *NestedCallee = CB->getCalledFunction()) {
6150 if (NestedCallee->isIntrinsic())
6185 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6194 Options.AllowOverlappingLoads =
true;
6199 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6200 Options.LoadSizes.push_back(64);
6201 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6202 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6204 if (ST->is64Bit()) {
6205 Options.LoadSizes.push_back(8);
6207 Options.LoadSizes.push_back(4);
6208 Options.LoadSizes.push_back(2);
6209 Options.LoadSizes.push_back(1);
6214 return supportsGather();
6225 return !(ST->isAtom());
6245 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6251 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6263 if (UseMaskedMemOp) {
6265 for (
unsigned Index : Indices) {
6266 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6267 for (
unsigned Elm = 0; Elm < VF; Elm++)
6268 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6275 UseMaskForGaps ? DemandedLoadStoreElts
6284 if (UseMaskForGaps) {
6290 if (Opcode == Instruction::Load) {
6297 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6298 {3, MVT::v16i8, 12},
6299 {3, MVT::v32i8, 14},
6300 {3, MVT::v64i8, 22},
6303 if (
const auto *Entry =
6305 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6315 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6317 unsigned NumOfLoadsInInterleaveGrp =
6318 Indices.
size() ? Indices.
size() : Factor;
6327 unsigned NumOfUnfoldedLoads =
6328 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6331 unsigned NumOfShufflesPerResult =
6332 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6339 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6342 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6349 assert(Opcode == Instruction::Store &&
6350 "Expected Store Instruction at this point");
6352 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6353 {3, MVT::v16i8, 12},
6354 {3, MVT::v32i8, 14},
6355 {3, MVT::v64i8, 26},
6358 {4, MVT::v16i8, 11},
6359 {4, MVT::v32i8, 14},
6363 if (
const auto *Entry =
6365 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6370 unsigned NumOfSources = Factor;
6373 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6377 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6380 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6388 bool UseMaskForCond,
bool UseMaskForGaps) {
6389 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6391 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6392 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6397 return ST->hasBWI();
6399 return ST->hasBF16();
6402 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6404 Opcode, VecTy, Factor, Indices, Alignment,
6407 if (UseMaskForCond || UseMaskForGaps)
6410 UseMaskForCond, UseMaskForGaps);
6430 unsigned VF = VecTy->getNumElements() / Factor;
6431 Type *ScalarTy = VecTy->getElementType();
6463 {2, MVT::v16i16, 9},
6464 {2, MVT::v32i16, 18},
6467 {2, MVT::v16i32, 8},
6468 {2, MVT::v32i32, 16},
6472 {2, MVT::v16i64, 16},
6473 {2, MVT::v32i64, 32},
6478 {3, MVT::v16i8, 11},
6479 {3, MVT::v32i8, 14},
6484 {3, MVT::v16i16, 28},
6485 {3, MVT::v32i16, 56},
6490 {3, MVT::v16i32, 14},
6491 {3, MVT::v32i32, 32},
6495 {3, MVT::v8i64, 10},
6496 {3, MVT::v16i64, 20},
6501 {4, MVT::v16i8, 24},
6502 {4, MVT::v32i8, 56},
6505 {4, MVT::v4i16, 17},
6506 {4, MVT::v8i16, 33},
6507 {4, MVT::v16i16, 75},
6508 {4, MVT::v32i16, 150},
6512 {4, MVT::v8i32, 16},
6513 {4, MVT::v16i32, 32},
6514 {4, MVT::v32i32, 68},
6518 {4, MVT::v8i64, 20},
6519 {4, MVT::v16i64, 40},
6524 {6, MVT::v16i8, 43},
6525 {6, MVT::v32i8, 82},
6527 {6, MVT::v2i16, 13},
6529 {6, MVT::v8i16, 39},
6530 {6, MVT::v16i16, 106},
6531 {6, MVT::v32i16, 212},
6534 {6, MVT::v4i32, 15},
6535 {6, MVT::v8i32, 31},
6536 {6, MVT::v16i32, 64},
6539 {6, MVT::v4i64, 18},
6540 {6, MVT::v8i64, 36},
6545 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6559 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6564 {2, MVT::v16i16, 4},
6565 {2, MVT::v32i16, 8},
6569 {2, MVT::v16i32, 8},
6570 {2, MVT::v32i32, 16},
6575 {2, MVT::v16i64, 16},
6576 {2, MVT::v32i64, 32},
6581 {3, MVT::v16i8, 11},
6582 {3, MVT::v32i8, 13},
6586 {3, MVT::v8i16, 12},
6587 {3, MVT::v16i16, 27},
6588 {3, MVT::v32i16, 54},
6592 {3, MVT::v8i32, 11},
6593 {3, MVT::v16i32, 22},
6594 {3, MVT::v32i32, 48},
6598 {3, MVT::v8i64, 12},
6599 {3, MVT::v16i64, 24},
6605 {4, MVT::v32i8, 12},
6609 {4, MVT::v8i16, 10},
6610 {4, MVT::v16i16, 32},
6611 {4, MVT::v32i16, 64},
6615 {4, MVT::v8i32, 16},
6616 {4, MVT::v16i32, 32},
6617 {4, MVT::v32i32, 64},
6621 {4, MVT::v8i64, 20},
6622 {4, MVT::v16i64, 40},
6627 {6, MVT::v16i8, 27},
6628 {6, MVT::v32i8, 90},
6630 {6, MVT::v2i16, 10},
6631 {6, MVT::v4i16, 15},
6632 {6, MVT::v8i16, 21},
6633 {6, MVT::v16i16, 58},
6634 {6, MVT::v32i16, 90},
6637 {6, MVT::v4i32, 12},
6638 {6, MVT::v8i32, 33},
6639 {6, MVT::v16i32, 66},
6642 {6, MVT::v4i64, 15},
6643 {6, MVT::v8i64, 30},
6646 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6657 if (Opcode == Instruction::Load) {
6658 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6662 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6666 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6668 return GetDiscountedCost(Entry);
6671 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6673 return GetDiscountedCost(Entry);
6676 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6678 return GetDiscountedCost(Entry);
6680 assert(Opcode == Instruction::Store &&
6681 "Expected Store Instruction at this point");
6683 "Interleaved store only supports fully-interleaved groups.");
6685 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6687 return MemOpCosts + Entry->Cost;
6690 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6692 return MemOpCosts + Entry->Cost;
6697 UseMaskForCond, UseMaskForGaps);
6702 bool HasBaseReg, int64_t Scale,
6703 unsigned AddrSpace)
const {
6731 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.