62#define DEBUG_TYPE "x86tti"
78 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
171 if (!
Vector && ST->hasEGPR())
187 auto *VTy = dyn_cast<FixedVectorType>(Ty);
188 if (!Ty->
isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
191 switch (cast<IntegerType>(ScalarTy)->
getBitWidth()) {
208 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
210 if (ST->
hasAVX() && PreferVectorWidth >= 256)
212 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
253 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
270 assert(ISD &&
"Invalid opcode");
272 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
273 (LT.second.getScalarType() == MVT::i32 ||
274 LT.second.getScalarType() == MVT::i64)) {
276 bool Op1Signed =
false, Op2Signed =
false;
279 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
280 bool SignedMode = Op1Signed || Op2Signed;
285 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
286 LT.second.getScalarType() == MVT::i32) {
288 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
290 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
291 bool Op1Sext = isa<SExtInst>(Args[0]) &&
292 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
293 bool Op2Sext = isa<SExtInst>(Args[1]) &&
294 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
296 bool IsZeroExtended = !Op1Signed || !Op2Signed;
297 bool IsConstant = Op1Constant || Op2Constant;
298 bool IsSext = Op1Sext || Op2Sext;
299 if (IsConstant || IsZeroExtended || IsSext)
307 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
310 if (!SignedMode && OpMinSize <= 8)
314 if (!SignedMode && OpMinSize <= 16)
321 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
374 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
375 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
376 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
377 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
378 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
379 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
380 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
381 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
382 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
386 if (
const auto *Entry =
388 if (
auto KindCost = Entry->Cost[
CostKind])
389 return LT.first * *KindCost;
392 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
393 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
394 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
395 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
396 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
397 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
398 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
399 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
400 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
402 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
403 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
404 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
405 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
406 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
407 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
411 if (
const auto *Entry =
413 if (
auto KindCost = Entry->Cost[
CostKind])
414 return LT.first * *KindCost;
417 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
418 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
419 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
421 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
422 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
423 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
425 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
426 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
427 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
428 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
429 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
430 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
432 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
433 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
434 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
435 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
436 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
437 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
438 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
447 if (
const auto *Entry =
449 if (
auto KindCost = Entry->Cost[
CostKind])
450 return LT.first * *KindCost;
453 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
454 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
455 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
456 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
457 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
458 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
460 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
461 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
462 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
463 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
464 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
465 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
467 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
468 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
469 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
470 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
471 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
472 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
474 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
475 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
476 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
477 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
478 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
479 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
488 if (
const auto *Entry =
490 if (
auto KindCost = Entry->Cost[
CostKind])
491 return LT.first * *KindCost;
494 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
495 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
496 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
497 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
498 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
499 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
501 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
502 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
503 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
504 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
505 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
506 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
508 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
509 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
510 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
511 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
512 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
513 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
515 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
516 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
517 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
518 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
519 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
520 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
530 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
531 if (
const auto *Entry =
533 if (
auto KindCost = Entry->Cost[
CostKind])
534 return LT.first * *KindCost;
537 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
538 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
539 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
541 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
542 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
543 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
545 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
546 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
547 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
549 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
550 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
551 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
561 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
562 if (
const auto *Entry =
564 if (
auto KindCost = Entry->Cost[
CostKind])
565 return LT.first * *KindCost;
580 if (
const auto *Entry =
582 if (
auto KindCost = Entry->Cost[
CostKind])
583 return LT.first * *KindCost;
603 if (
const auto *Entry =
605 if (
auto KindCost = Entry->Cost[
CostKind])
606 return LT.first * *KindCost;
626 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
627 if (
auto KindCost = Entry->Cost[
CostKind])
628 return LT.first * *KindCost;
648 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
649 if (
auto KindCost = Entry->Cost[
CostKind])
650 return LT.first * *KindCost;
658 if (
const auto *Entry =
660 if (
auto KindCost = Entry->Cost[
CostKind])
661 return LT.first * *KindCost;
681 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
682 if (
auto KindCost = Entry->Cost[
CostKind])
683 return LT.first * *KindCost;
686 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
687 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
688 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
689 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
690 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
691 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
692 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
693 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
694 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
696 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
697 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
698 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
702 if (
const auto *Entry =
704 if (
auto KindCost = Entry->Cost[
CostKind])
705 return LT.first * *KindCost;
708 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
709 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
710 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
712 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
713 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
714 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
716 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
717 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
718 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
719 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
720 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
721 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
722 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
726 if (
const auto *Entry =
728 if (
auto KindCost = Entry->Cost[
CostKind])
729 return LT.first * *KindCost;
733 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
734 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
735 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
736 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
737 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
738 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
740 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
741 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
742 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
743 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
744 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
745 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
747 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
748 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
749 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
750 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
751 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
752 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
754 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
755 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
756 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
757 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
758 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
759 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
763 if (
const auto *Entry =
765 if (
auto KindCost = Entry->Cost[
CostKind])
766 return LT.first * *KindCost;
769 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
770 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
771 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
772 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
773 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
774 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
776 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
777 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
778 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
779 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
780 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
781 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
783 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
784 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
785 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
786 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
787 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
788 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
790 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
791 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
792 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
793 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
794 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
795 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
800 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
801 if (
const auto *Entry =
803 if (
auto KindCost = Entry->Cost[
CostKind])
804 return LT.first * *KindCost;
808 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
809 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
810 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
812 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
813 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
814 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
816 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
817 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
818 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
820 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
821 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
822 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
826 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
827 if (
const auto *Entry =
829 if (
auto KindCost = Entry->Cost[
CostKind])
830 return LT.first * *KindCost;
833 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
834 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
835 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
840 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
841 if (
auto KindCost = Entry->Cost[
CostKind])
842 return LT.first * *KindCost;
845 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
846 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
847 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
848 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
849 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
850 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
851 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
852 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
853 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
855 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
856 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
857 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
858 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
859 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
860 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
861 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
862 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
863 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
865 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
866 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
868 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
869 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
870 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
871 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
873 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
874 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
876 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
877 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
878 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
879 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
881 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
882 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
883 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
884 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
889 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
890 if (
auto KindCost = Entry->Cost[
CostKind])
891 return LT.first * *KindCost;
894 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
895 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
896 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
898 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
899 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
900 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
902 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
903 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
904 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
905 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
906 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
907 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
908 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
909 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
910 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
912 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
913 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
914 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
915 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
916 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
918 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
919 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
920 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
922 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
923 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
925 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
926 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
928 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
929 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
930 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
931 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
933 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
934 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
935 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
936 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
938 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
939 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
940 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
941 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
943 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
944 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
945 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
946 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
951 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
952 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
953 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
954 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
955 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
956 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
957 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
958 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
961 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
962 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
963 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
964 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
966 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
967 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
968 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
969 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
970 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
971 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
972 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
973 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
976 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
977 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
978 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
979 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
983 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
984 if (
auto KindCost = Entry->Cost[
CostKind])
985 return LT.first * *KindCost;
990 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
991 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
992 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
993 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
994 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
995 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
996 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
997 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
998 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
999 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1011 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1012 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1019 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1020 if (
auto KindCost = Entry->Cost[
CostKind])
1021 return LT.first * *KindCost;
1026 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1027 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1028 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1029 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1030 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1031 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1032 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1033 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1034 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1035 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1036 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1037 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1039 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1040 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1041 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1042 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1043 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1044 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1045 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1046 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1047 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1048 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1049 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1050 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1060 if (
const auto *Entry =
1062 if (
auto KindCost = Entry->Cost[
CostKind])
1063 return LT.first * *KindCost;
1070 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1071 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1076 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1077 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1078 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1079 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1082 if (ST->useGLMDivSqrtCosts())
1083 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1084 if (
auto KindCost = Entry->Cost[
CostKind])
1085 return LT.first * *KindCost;
1088 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1089 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1090 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1091 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1092 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1093 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1094 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1095 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1096 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1097 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1098 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1099 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1105 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1107 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1108 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1111 if (ST->useSLMArithCosts())
1112 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1113 if (
auto KindCost = Entry->Cost[
CostKind])
1114 return LT.first * *KindCost;
1117 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1118 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1119 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1120 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1122 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1123 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1124 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1125 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1127 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1128 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1129 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1130 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1131 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1132 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1134 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1135 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1136 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1137 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1138 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1139 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1140 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1141 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1143 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1144 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1145 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1146 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1147 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1148 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1149 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1153 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1154 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1156 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1157 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1158 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1159 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1160 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1161 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1163 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1164 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1165 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1166 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1167 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1168 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1170 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1171 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1172 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1173 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1174 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1175 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1177 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1178 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1179 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1180 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1181 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1182 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1187 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1188 if (
auto KindCost = Entry->Cost[
CostKind])
1189 return LT.first * *KindCost;
1195 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1196 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1197 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1198 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1199 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1200 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1202 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1203 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1204 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1205 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1207 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1208 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1209 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1210 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1212 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1213 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1214 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1215 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1217 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1218 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1219 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1220 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1221 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1222 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1223 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1224 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1225 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1226 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1228 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1229 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1230 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1231 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1232 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1233 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1234 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1235 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1237 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1238 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1239 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1240 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1241 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1242 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1243 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1244 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1246 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1247 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1248 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1249 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1250 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1251 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1252 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1253 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1255 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1256 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1258 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1259 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1260 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1261 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1262 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1263 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1265 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1266 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1267 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1268 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1269 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1270 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1272 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1273 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1274 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1275 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1276 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1277 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1279 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1280 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1281 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1282 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1283 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1284 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1288 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1289 if (
auto KindCost = Entry->Cost[
CostKind])
1290 return LT.first * *KindCost;
1293 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1294 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1295 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1296 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1298 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1299 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1300 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1301 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1303 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1304 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1305 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1306 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1308 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1309 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1310 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1311 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1313 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1317 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1318 if (
auto KindCost = Entry->Cost[
CostKind])
1319 return LT.first * *KindCost;
1322 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1323 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1324 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1326 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1327 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1328 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1329 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1331 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1332 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1333 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1334 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1336 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1340 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1341 if (
auto KindCost = Entry->Cost[
CostKind])
1342 return LT.first * *KindCost;
1345 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1349 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1350 if (
auto KindCost = Entry->Cost[
CostKind])
1351 return LT.first * *KindCost;
1356 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1357 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1358 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1359 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1361 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1362 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1363 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1364 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1366 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1367 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1368 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1369 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1371 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1372 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1373 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1374 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1376 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1377 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1378 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1379 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1381 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1382 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1383 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1384 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1386 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1387 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1389 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1390 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1391 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1392 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1396 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1397 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1398 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1399 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1401 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1402 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1403 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1404 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1406 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1407 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1408 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1410 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1411 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1412 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1414 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1415 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1419 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1420 if (
auto KindCost = Entry->Cost[
CostKind])
1421 return LT.first * *KindCost;
1424 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1425 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1427 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1428 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1430 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1431 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1433 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1434 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1436 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1437 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1441 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1442 if (
auto KindCost = Entry->Cost[
CostKind])
1443 return LT.first * *KindCost;
1448 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1453 if (
auto KindCost = Entry->Cost[
CostKind])
1454 return LT.first * *KindCost;
1465 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1466 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1467 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1469 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1470 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1471 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1472 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1473 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1477 if (
auto KindCost = Entry->Cost[
CostKind])
1478 return LT.first * *KindCost;
1492 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1536 if (!Args.empty() &&
1537 all_of(Args, [](
const Value *Arg) {
return isa<Constant>(Arg); }))
1546 CostKind, Mask.size() / 2, BaseTp);
1559 using namespace PatternMatch;
1562 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1568 bool IsInLaneShuffle =
false;
1574 unsigned NumEltsPerLane = Mask.size() / NumLanes;
1575 if ((Mask.size() % NumLanes) == 0)
1578 ((
P.value() % Mask.size()) / NumEltsPerLane) ==
1579 (
P.index() / NumEltsPerLane);
1584 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1585 LT.second = LT.second.changeVectorElementType(MVT::f16);
1590 int NumElts = LT.second.getVectorNumElements();
1591 if ((Index % NumElts) == 0)
1594 if (SubLT.second.isVector()) {
1595 int NumSubElts = SubLT.second.getVectorNumElements();
1596 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1604 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1605 if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
1606 (NumSubElts % OrigSubElts) == 0 &&
1607 LT.second.getVectorElementType() ==
1608 SubLT.second.getVectorElementType() &&
1609 LT.second.getVectorElementType().getSizeInBits() ==
1611 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1612 "Unexpected number of elements!");
1614 LT.second.getVectorNumElements());
1616 SubLT.second.getVectorNumElements());
1617 int ExtractIndex =
alignDown((Index % NumElts), NumSubElts);
1624 return ExtractCost + 1;
1627 "Unexpected vector size");
1629 return ExtractCost + 2;
1642 int NumElts = LT.second.getVectorNumElements();
1644 if (SubLT.second.isVector()) {
1645 int NumSubElts = SubLT.second.getVectorNumElements();
1646 bool MatchingTypes =
1647 NumElts == NumSubElts &&
1649 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1662 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1693 if (
const auto *Entry =
1703 MVT LegalVT = LT.second;
1708 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1712 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1719 if (!Mask.empty() && NumOfDests.
isValid()) {
1737 unsigned E = *NumOfDests.
getValue();
1738 unsigned NormalizedVF =
1744 unsigned PrevSrcReg = 0;
1748 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1749 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1754 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1755 PrevRegMask != RegMask)
1763 if (SrcReg != DestReg &&
1768 PrevSrcReg = SrcReg;
1769 PrevRegMask = RegMask;
1801 if (
const auto *Entry =
1803 return LT.first * Entry->Cost;
1836 if (
const auto *Entry =
1838 return LT.first * Entry->Cost;
1915 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1916 if (
auto KindCost = Entry->Cost[
CostKind])
1917 return LT.first * *KindCost;
1933 if (IsInLaneShuffle && ST->
hasAVX2())
1934 if (
const auto *Entry =
1936 return LT.first * Entry->Cost;
1989 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1990 return LT.first * Entry->Cost;
2011 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
2012 return LT.first * Entry->Cost;
2039 if (IsInLaneShuffle && ST->
hasAVX())
2040 if (
const auto *Entry =
2042 return LT.first * Entry->Cost;
2104 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2105 return LT.first * Entry->Cost;
2118 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2119 return LT.first * Entry->Cost;
2150 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2151 return LT.first * Entry->Cost;
2207 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2209 if (
const auto *Entry =
2212 LT.second.getVectorElementCount()) &&
2213 "Table entry missing from isLegalBroadcastLoad()");
2214 return LT.first * Entry->Cost;
2217 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2218 return LT.first * Entry->Cost;
2231 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2232 return LT.first * Entry->Cost;
2243 assert(ISD &&
"Invalid opcode");
2370 {
ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1, 1, 1, 1 } },
2392 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2393 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2740 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2817 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
3041 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
3070 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3071 if (
auto KindCost = Entry->Cost[
CostKind])
3076 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3077 if (
auto KindCost = Entry->Cost[
CostKind])
3082 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3083 if (
auto KindCost = Entry->Cost[
CostKind])
3089 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3090 if (
auto KindCost = Entry->Cost[
CostKind])
3095 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3096 if (
auto KindCost = Entry->Cost[
CostKind])
3101 SimpleDstTy, SimpleSrcTy))
3102 if (
auto KindCost = Entry->Cost[
CostKind])
3107 SimpleDstTy, SimpleSrcTy))
3108 if (
auto KindCost = Entry->Cost[
CostKind])
3114 SimpleDstTy, SimpleSrcTy))
3115 if (
auto KindCost = Entry->Cost[
CostKind])
3119 if (ST->hasF16C()) {
3121 SimpleDstTy, SimpleSrcTy))
3122 if (
auto KindCost = Entry->Cost[
CostKind])
3128 SimpleDstTy, SimpleSrcTy))
3129 if (
auto KindCost = Entry->Cost[
CostKind])
3135 SimpleDstTy, SimpleSrcTy))
3136 if (
auto KindCost = Entry->Cost[
CostKind])
3159 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3160 if (
auto KindCost = Entry->Cost[
CostKind])
3161 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3165 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3166 if (
auto KindCost = Entry->Cost[
CostKind])
3167 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3171 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3172 if (
auto KindCost = Entry->Cost[
CostKind])
3173 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3178 LTDest.second, LTSrc.second))
3179 if (
auto KindCost = Entry->Cost[
CostKind])
3180 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3184 LTDest.second, LTSrc.second))
3185 if (
auto KindCost = Entry->Cost[
CostKind])
3186 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3190 LTDest.second, LTSrc.second))
3191 if (
auto KindCost = Entry->Cost[
CostKind])
3192 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3196 LTDest.second, LTSrc.second))
3197 if (
auto KindCost = Entry->Cost[
CostKind])
3198 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3202 LTDest.second, LTSrc.second))
3203 if (
auto KindCost = Entry->Cost[
CostKind])
3204 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3206 if (ST->hasF16C()) {
3208 LTDest.second, LTSrc.second))
3209 if (
auto KindCost = Entry->Cost[
CostKind])
3210 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3215 LTDest.second, LTSrc.second))
3216 if (
auto KindCost = Entry->Cost[
CostKind])
3217 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3221 LTDest.second, LTSrc.second))
3222 if (
auto KindCost = Entry->Cost[
CostKind])
3223 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3228 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3229 Type *ExtSrc = Src->getWithNewBitWidth(32);
3235 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3245 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3246 Type *TruncDst = Dst->getWithNewBitWidth(32);
3256 return Cost == 0 ? 0 :
N;
3270 Op1Info, Op2Info,
I);
3275 MVT MTy = LT.second;
3278 assert(ISD &&
"Invalid opcode");
3281 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3294 Pred = cast<CmpInst>(
I)->getPredicate();
3296 bool CmpWithConstant =
false;
3297 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3298 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3303 ExtraCost = CmpWithConstant ? 0 : 1;
3308 ExtraCost = CmpWithConstant ? 0 : 1;
3314 ExtraCost = CmpWithConstant ? 1 : 2;
3325 ExtraCost = CmpWithConstant ? 2 : 3;
3332 if (CondTy && !ST->
hasAVX())
3503 if (ST->useSLMArithCosts())
3505 if (
auto KindCost = Entry->Cost[
CostKind])
3506 return LT.first * (ExtraCost + *KindCost);
3510 if (
auto KindCost = Entry->Cost[
CostKind])
3511 return LT.first * (ExtraCost + *KindCost);
3515 if (
auto KindCost = Entry->Cost[
CostKind])
3516 return LT.first * (ExtraCost + *KindCost);
3520 if (
auto KindCost = Entry->Cost[
CostKind])
3521 return LT.first * (ExtraCost + *KindCost);
3525 if (
auto KindCost = Entry->Cost[
CostKind])
3526 return LT.first * (ExtraCost + *KindCost);
3530 if (
auto KindCost = Entry->Cost[
CostKind])
3531 return LT.first * (ExtraCost + *KindCost);
3535 if (
auto KindCost = Entry->Cost[
CostKind])
3536 return LT.first * (ExtraCost + *KindCost);
3540 if (
auto KindCost = Entry->Cost[
CostKind])
3541 return LT.first * (ExtraCost + *KindCost);
3545 if (
auto KindCost = Entry->Cost[
CostKind])
3546 return LT.first * (ExtraCost + *KindCost);
3550 if (
auto KindCost = Entry->Cost[
CostKind])
3551 return LT.first * (ExtraCost + *KindCost);
3559 Op1Info, Op2Info,
I);
3577 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3578 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3579 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3580 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3581 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3582 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3583 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3584 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3585 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3586 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3587 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3588 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3589 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3590 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3591 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3613 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3614 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3615 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3616 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3617 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3618 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3619 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3620 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3621 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3622 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3623 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3624 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3626 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3627 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3628 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3629 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3630 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3631 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3634 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3635 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3657 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3658 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3659 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3660 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3661 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3662 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3663 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3664 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3665 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3666 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3667 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3668 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3669 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3673 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3674 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3675 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3676 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3677 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3678 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3679 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3680 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3681 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3682 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3683 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3684 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3685 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3686 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3687 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3688 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3689 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3690 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3699 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3700 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3701 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3702 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3704 {
ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3706 {
ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
3711 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3712 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3713 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3714 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3719 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3720 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3721 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3722 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3723 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3724 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3725 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3726 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3727 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3735 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3736 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3737 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3738 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3739 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3740 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3741 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3742 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3743 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3744 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3745 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3746 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3747 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3748 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3749 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3750 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3751 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3752 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3753 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3754 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3755 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3756 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3757 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3758 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3773 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3774 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3775 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3776 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3777 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3778 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3779 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3780 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3781 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3782 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3783 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3784 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3785 {
ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3786 {
ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
3787 {
ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
3788 {
ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
3797 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3798 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3799 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3800 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3801 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3802 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3803 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3804 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3805 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3806 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3807 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3808 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3809 {
ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3810 {
ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
3811 {
ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
3812 {
ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
3839 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3842 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3843 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3859 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3860 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3861 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3862 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3863 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3864 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3865 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3866 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3867 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3868 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3869 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3870 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3871 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3872 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3873 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3874 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3885 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3886 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3887 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3888 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3889 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3890 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3891 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3892 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3907 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3908 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3909 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3910 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3911 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3912 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3913 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3914 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3915 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3916 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3917 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3918 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3919 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3920 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3923 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3924 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3925 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3926 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3927 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3928 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3929 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3930 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3937 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3938 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3939 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3940 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3941 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3942 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3943 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3944 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3945 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3946 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3947 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3948 {
ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
3949 {
ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
3950 {
ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
3951 {
ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
3953 {
ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
3954 {
ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
3966 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3967 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3968 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3969 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3970 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3971 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3972 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3973 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3974 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3975 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3976 {
ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
3977 {
ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
3978 {
ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
3979 {
ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
3980 {
ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
3982 {
ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
3983 {
ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
3997 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3999 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
4000 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
4003 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
4004 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
4005 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
4006 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
4019 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
4021 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
4022 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
4023 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
4024 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
4025 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
4026 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
4027 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
4028 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
4029 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
4030 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
4031 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
4032 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
4033 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
4034 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
4035 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
4036 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
4037 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
4038 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
4039 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
4040 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
4041 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
4042 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
4043 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
4044 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
4050 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
4051 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
4052 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4053 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4054 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4055 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
4056 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4057 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4058 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4059 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4060 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4061 {
ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
4062 {
ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
4063 {
ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
4064 {
ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
4066 {
ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
4067 {
ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
4078 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
4079 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
4080 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4081 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4082 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4083 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
4084 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
4085 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4086 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4087 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4088 {
ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
4089 {
ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
4090 {
ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
4091 {
ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
4092 {
ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
4094 {
ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
4095 {
ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
4109 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
4110 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
4112 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
4113 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
4138 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
4140 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
4147 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
4149 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
4157 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
4160 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
4165 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
4166 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4167 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4168 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4169 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4170 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4171 {
ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4172 {
ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4174 {
ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
4179 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
4180 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4181 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4182 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
4183 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4184 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4185 {
ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4186 {
ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4188 {
ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
4191 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
4192 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
4193 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4201 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4202 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4203 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4204 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4205 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4206 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4207 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4208 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4209 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4210 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4211 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4212 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4215 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4216 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4217 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4218 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4223 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4226 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4227 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4228 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4229 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4230 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4231 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4232 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4233 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4234 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4235 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4236 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4237 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4242 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4243 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4244 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4245 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4246 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4247 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4248 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4249 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4250 {
ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4251 {
ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4253 {
ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
4262 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4263 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4264 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4265 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4266 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4267 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4268 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4269 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4270 {
ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4271 {
ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4273 {
ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
4281 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4287 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4290 {
ISD::CTTZ, MVT::i64, { 1, 1, 1, 1 } },
4293 {
ISD::CTTZ, MVT::i32, { 1, 1, 1, 1 } },
4294 {
ISD::CTTZ, MVT::i16, { 2, 1, 1, 1 } },
4298 {
ISD::CTLZ, MVT::i64, { 1, 1, 1, 1 } },
4301 {
ISD::CTLZ, MVT::i32, { 1, 1, 1, 1 } },
4302 {
ISD::CTLZ, MVT::i16, { 2, 1, 1, 1 } },
4314 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4317 {
ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } },
4319 {
ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } },
4322 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4323 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4325 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4330 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4331 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4332 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4333 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4340 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4341 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4342 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4348 {
ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } },
4349 {
ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } },
4354 {
ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } },
4355 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } },
4363 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4364 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4366 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4367 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4372 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4373 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4387 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4388 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4390 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4391 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4393 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4394 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4396 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4397 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4420 case Intrinsic::abs:
4423 case Intrinsic::bitreverse:
4426 case Intrinsic::bswap:
4429 case Intrinsic::ctlz:
4432 case Intrinsic::ctpop:
4435 case Intrinsic::cttz:
4438 case Intrinsic::fshl:
4442 if (Args[0] == Args[1]) {
4453 case Intrinsic::fshr:
4458 if (Args[0] == Args[1]) {
4469 case Intrinsic::lrint:
4470 case Intrinsic::llrint:
4479 case Intrinsic::maxnum:
4480 case Intrinsic::minnum:
4484 case Intrinsic::sadd_sat:
4487 case Intrinsic::smax:
4490 case Intrinsic::smin:
4493 case Intrinsic::ssub_sat:
4496 case Intrinsic::uadd_sat:
4499 case Intrinsic::umax:
4502 case Intrinsic::umin:
4505 case Intrinsic::usub_sat:
4508 case Intrinsic::sqrt:
4511 case Intrinsic::sadd_with_overflow:
4512 case Intrinsic::ssub_with_overflow:
4515 OpTy =
RetTy->getContainedType(0);
4517 case Intrinsic::uadd_with_overflow:
4518 case Intrinsic::usub_with_overflow:
4521 OpTy =
RetTy->getContainedType(0);
4523 case Intrinsic::smul_with_overflow:
4525 OpTy =
RetTy->getContainedType(0);
4527 case Intrinsic::umul_with_overflow:
4529 OpTy =
RetTy->getContainedType(0);
4534 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4535 std::pair<InstructionCost, MVT> LT,
4538 MVT MTy = LT.second;
4545 return LegalizationCost * 1;
4550 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4552 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4554 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4555 if (LI->hasOneUse())
4562 return LegalizationCost * (int)
Cost;
4567 MVT MTy = LT.second;
4570 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4571 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4574 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4575 if (Cst->isAllOnesValue())
4583 if (ST->useGLMDivSqrtCosts())
4585 if (
auto KindCost = Entry->Cost[
CostKind])
4586 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4588 if (ST->useSLMArithCosts())
4590 if (
auto KindCost = Entry->Cost[
CostKind])
4591 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4594 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4595 if (
auto KindCost = Entry->Cost[
CostKind])
4596 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4598 if (ST->hasBITALG())
4599 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4600 if (
auto KindCost = Entry->Cost[
CostKind])
4601 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4603 if (ST->hasVPOPCNTDQ())
4604 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4605 if (
auto KindCost = Entry->Cost[
CostKind])
4606 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4610 if (
auto KindCost = Entry->Cost[
CostKind])
4611 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4615 if (
auto KindCost = Entry->Cost[
CostKind])
4616 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4620 if (
auto KindCost = Entry->Cost[
CostKind])
4621 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4625 if (
auto KindCost = Entry->Cost[
CostKind])
4626 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4630 if (
auto KindCost = Entry->Cost[
CostKind])
4631 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4635 if (
auto KindCost = Entry->Cost[
CostKind])
4636 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4640 if (
auto KindCost = Entry->Cost[
CostKind])
4641 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4645 if (
auto KindCost = Entry->Cost[
CostKind])
4646 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4650 if (
auto KindCost = Entry->Cost[
CostKind])
4651 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4655 if (
auto KindCost = Entry->Cost[
CostKind])
4656 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4660 if (
auto KindCost = Entry->Cost[
CostKind])
4661 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4665 if (
auto KindCost = Entry->Cost[
CostKind])
4666 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4671 if (
auto KindCost = Entry->Cost[
CostKind])
4672 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4675 if (
auto KindCost = Entry->Cost[
CostKind])
4676 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4679 if (ST->hasLZCNT()) {
4682 if (
auto KindCost = Entry->Cost[
CostKind])
4683 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4686 if (
auto KindCost = Entry->Cost[
CostKind])
4687 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4690 if (ST->hasPOPCNT()) {
4693 if (
auto KindCost = Entry->Cost[
CostKind])
4694 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4697 if (
auto KindCost = Entry->Cost[
CostKind])
4698 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4703 if (
auto KindCost = Entry->Cost[
CostKind])
4704 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4707 if (
auto KindCost = Entry->Cost[
CostKind])
4708 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4716 unsigned Index,
Value *Op0,
4731 if (Index == -1U && (Opcode == Instruction::ExtractElement ||
4732 Opcode == Instruction::InsertElement)) {
4737 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4742 if (Opcode == Instruction::ExtractElement) {
4748 if (Opcode == Instruction::InsertElement) {
4756 if (Index != -1U && (Opcode == Instruction::ExtractElement ||
4757 Opcode == Instruction::InsertElement)) {
4759 if (Opcode == Instruction::ExtractElement &&
4761 cast<FixedVectorType>(Val)->getNumElements() > 1)
4768 if (!LT.second.isVector())
4772 unsigned SizeInBits = LT.second.getSizeInBits();
4773 unsigned NumElts = LT.second.getVectorNumElements();
4774 unsigned SubNumElts = NumElts;
4775 Index = Index % NumElts;
4779 if (SizeInBits > 128) {
4780 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4781 unsigned NumSubVecs = SizeInBits / 128;
4782 SubNumElts = NumElts / NumSubVecs;
4783 if (SubNumElts <= Index) {
4784 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4785 Index %= SubNumElts;
4789 MVT MScalarTy = LT.second.getScalarType();
4790 auto IsCheapPInsrPExtrInsertPS = [&]() {
4793 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4795 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4796 Opcode == Instruction::InsertElement);
4804 (Opcode != Instruction::InsertElement || !Op0 ||
4805 isa<UndefValue>(Op0)))
4806 return RegisterFileMoveCost;
4808 if (Opcode == Instruction::InsertElement &&
4809 isa_and_nonnull<UndefValue>(Op0)) {
4811 if (isa_and_nonnull<LoadInst>(Op1))
4812 return RegisterFileMoveCost;
4813 if (!IsCheapPInsrPExtrInsertPS()) {
4816 return 2 + RegisterFileMoveCost;
4818 return 1 + RegisterFileMoveCost;
4823 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4824 return 1 + RegisterFileMoveCost;
4828 assert(ISD &&
"Unexpected vector opcode");
4829 if (ST->useSLMArithCosts())
4831 return Entry->Cost + RegisterFileMoveCost;
4834 if (IsCheapPInsrPExtrInsertPS())
4835 return 1 + RegisterFileMoveCost;
4844 if (Opcode == Instruction::InsertElement) {
4845 auto *SubTy = cast<VectorType>(Val);
4853 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4857 RegisterFileMoveCost;
4864 cast<FixedVectorType>(Ty)->getNumElements() &&
4865 "Vector size mismatch");
4868 MVT MScalarTy = LT.second.getScalarType();
4869 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4872 constexpr unsigned LaneBitWidth = 128;
4873 assert((LegalVectorBitWidth < LaneBitWidth ||
4874 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4877 const int NumLegalVectors = *LT.first.getValue();
4878 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4883 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4885 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4888 if (LegalVectorBitWidth <= LaneBitWidth) {
4904 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4905 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4906 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4907 unsigned NumLegalElts =
4908 LT.second.getVectorNumElements() * NumLegalVectors;
4910 "Vector has been legalized to smaller element count");
4911 assert((NumLegalElts % NumLanesTotal) == 0 &&
4912 "Unexpected elts per lane");
4913 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4915 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4919 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4921 NumEltsPerLane, NumEltsPerLane *
I);
4922 if (LaneEltMask.
isZero())
4928 I * NumEltsPerLane, LaneTy);
4933 APInt AffectedLanes =
4936 AffectedLanes, NumLegalVectors,
true);
4937 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4938 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4939 unsigned I = NumLegalLanes * LegalVec + Lane;
4942 if (!AffectedLanes[
I] ||
4943 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4946 I * NumEltsPerLane, LaneTy);
4950 }
else if (LT.second.isVector()) {
4961 unsigned NumElts = LT.second.getVectorNumElements();
4964 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4973 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4974 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4975 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4979 if (LT.second.isVector()) {
4980 unsigned NumLegalElts =
4981 LT.second.getVectorNumElements() * NumLegalVectors;
4983 "Vector has been legalized to smaller element count");
4987 if (LegalVectorBitWidth > LaneBitWidth) {
4988 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4989 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4990 assert((NumLegalElts % NumLanesTotal) == 0 &&
4991 "Unexpected elts per lane");
4992 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4996 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
5000 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
5002 NumEltsPerLane,
I * NumEltsPerLane);
5003 if (LaneEltMask.
isZero())
5006 I * NumEltsPerLane, LaneTy);
5008 LaneTy, LaneEltMask,
false, Extract,
CostKind);
5025 int VF,
const APInt &DemandedDstElts,
5031 auto bailout = [&]() {
5041 unsigned PromEltTyBits = EltTyBits;
5042 switch (EltTyBits) {
5073 int NumDstElements = VF * ReplicationFactor;
5087 if (PromEltTyBits != EltTyBits) {
5093 Instruction::SExt, PromSrcVecTy, SrcVecTy,
5100 ReplicationFactor, VF,
5106 "We expect that the legalization doesn't affect the element width, "
5107 "doesn't coalesce/split elements.");
5110 unsigned NumDstVectors =
5111 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
5120 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
5121 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
5126 return NumDstVectorsDemanded * SingleShuffleCost;
5137 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
5140 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
5141 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
5148 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5158 auto *VTy = dyn_cast<FixedVectorType>(Src);
5163 if (Opcode == Instruction::Store && OpInfo.
isConstant())
5169 if (!VTy || !LT.second.isVector()) {
5171 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
5174 bool IsLoad = Opcode == Instruction::Load;
5176 Type *EltTy = VTy->getElementType();
5181 const unsigned SrcNumElt = VTy->getNumElements();
5184 int NumEltRemaining = SrcNumElt;
5186 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
5188 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
5191 const unsigned XMMBits = 128;
5192 if (XMMBits % EltTyBits != 0)
5196 const int NumEltPerXMM = XMMBits / EltTyBits;
5200 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
5201 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
5203 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
5207 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
5209 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
5210 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
5211 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
5212 "Unless we haven't halved the op size yet, "
5213 "we have less than two op's sized units of work left.");
5215 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
5219 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
5220 "After halving sizes, the vector elt count is no longer a multiple "
5221 "of number of elements per operation?");
5222 auto *CoalescedVecTy =
5223 CurrNumEltPerOp == 1
5227 EltTyBits * CurrNumEltPerOp),
5228 CurrVecTy->getNumElements() / CurrNumEltPerOp);
5231 "coalesciing elements doesn't change vector width.");
5233 while (NumEltRemaining > 0) {
5234 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
5238 if (NumEltRemaining < CurrNumEltPerOp &&
5239 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
5240 CurrOpSizeBytes != 1)
5248 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5250 else if (CurrOpSizeBytes < 4)
5260 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5263 if (SubVecEltsLeft == 0) {
5264 SubVecEltsLeft += CurrVecTy->getNumElements();
5269 VTy, {},
CostKind, NumEltDone(), CurrVecTy);
5276 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5277 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5278 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5279 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5280 APInt DemandedElts =
5282 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5283 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5288 SubVecEltsLeft -= CurrNumEltPerOp;
5289 NumEltRemaining -= CurrNumEltPerOp;
5294 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5303 bool IsLoad = (Instruction::Load == Opcode);
5304 bool IsStore = (Instruction::Store == Opcode);
5306 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5311 unsigned NumElem = SrcVTy->getNumElements();
5319 MaskTy, DemandedElts,
false,
true,
CostKind);
5324 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5326 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5330 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5338 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5340 return Cost + LT.first;
5342 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5343 LT.second.getVectorNumElements() == NumElem)
5360 return Cost + LT.first * (IsLoad ? 2 : 8);
5363 return Cost + LT.first;
5371 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5375 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5377 return getGEPCost(BaseGEP->getSourceElementType(),
5378 BaseGEP->getPointerOperand(), Indices,
nullptr,
5393 const unsigned NumVectorInstToHideOverhead = 10;
5406 return NumVectorInstToHideOverhead;
5416 std::optional<FastMathFlags> FMF,
5457 assert(ISD &&
"Invalid opcode");
5465 if (ST->useSLMArithCosts())
5480 MVT MTy = LT.second;
5482 auto *ValVTy = cast<FixedVectorType>(ValTy);
5495 if (LT.first != 1 && MTy.
isVector() &&
5501 ArithmeticCost *= LT.first - 1;
5504 if (ST->useSLMArithCosts())
5506 return ArithmeticCost + Entry->Cost;
5510 return ArithmeticCost + Entry->Cost;
5514 return ArithmeticCost + Entry->Cost;
5563 if (ValVTy->getElementType()->isIntegerTy(1)) {
5565 if (LT.first != 1 && MTy.
isVector() &&
5571 ArithmeticCost *= LT.first - 1;
5575 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5576 return ArithmeticCost + Entry->Cost;
5579 return ArithmeticCost + Entry->Cost;
5582 return ArithmeticCost + Entry->Cost;
5585 return ArithmeticCost + Entry->Cost;
5590 unsigned NumVecElts = ValVTy->getNumElements();
5591 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5601 if (LT.first != 1 && MTy.
isVector() &&
5607 ReductionCost *= LT.first - 1;
5613 while (NumVecElts > 1) {
5615 unsigned Size = NumVecElts * ScalarSize;
5623 }
else if (
Size == 128) {
5626 if (ValVTy->isFloatingPointTy())
5634 }
else if (
Size == 64) {
5637 if (ValVTy->isFloatingPointTy())
5650 Instruction::LShr, ShiftTy,
CostKind,
5677 MVT MTy = LT.second;
5681 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5685 "Expected float point or integer vector type.");
5686 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5754 auto *ValVTy = cast<FixedVectorType>(ValTy);
5755 unsigned NumVecElts = ValVTy->getNumElements();
5759 if (LT.first != 1 && MTy.
isVector() &&
5765 MinMaxCost *= LT.first - 1;
5771 return MinMaxCost + Entry->Cost;
5775 return MinMaxCost + Entry->Cost;
5779 return MinMaxCost + Entry->Cost;
5783 return MinMaxCost + Entry->Cost;
5795 while (NumVecElts > 1) {
5797 unsigned Size = NumVecElts * ScalarSize;
5805 }
else if (
Size == 128) {
5815 }
else if (
Size == 64) {
5876 if (BitSize % 64 != 0)
5877 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5882 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5888 return std::max<InstructionCost>(1,
Cost);
5898 unsigned ImmBitWidth = Imm.getBitWidth();
5905 unsigned ImmIdx = ~0U;
5909 case Instruction::GetElementPtr:
5916 case Instruction::Store:
5919 case Instruction::ICmp:
5925 if (
Idx == 1 && ImmBitWidth == 64) {
5926 uint64_t ImmVal = Imm.getZExtValue();
5927 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5932 case Instruction::And:
5936 if (
Idx == 1 && ImmBitWidth == 64 && Imm.isIntN(32))
5939 if (
Idx == 1 && ImmBitWidth == 64 && ST->is64Bit() && ST->hasBMI() &&
5944 case Instruction::Add:
5945 case Instruction::Sub:
5947 if (
Idx == 1 && ImmBitWidth == 64 && Imm.getZExtValue() == 0x80000000)
5951 case Instruction::UDiv:
5952 case Instruction::SDiv:
5953 case Instruction::URem:
5954 case Instruction::SRem:
5959 case Instruction::Mul:
5960 case Instruction::Or:
5961 case Instruction::Xor:
5965 case Instruction::Shl:
5966 case Instruction::LShr:
5967 case Instruction::AShr:
5971 case Instruction::Trunc:
5972 case Instruction::ZExt:
5973 case Instruction::SExt:
5974 case Instruction::IntToPtr:
5975 case Instruction::PtrToInt:
5976 case Instruction::BitCast:
5977 case Instruction::PHI:
5978 case Instruction::Call:
5979 case Instruction::Select:
5980 case Instruction::Ret:
5981 case Instruction::Load:
5985 if (
Idx == ImmIdx) {
6010 case Intrinsic::sadd_with_overflow:
6011 case Intrinsic::uadd_with_overflow:
6012 case Intrinsic::ssub_with_overflow:
6013 case Intrinsic::usub_with_overflow:
6014 case Intrinsic::smul_with_overflow:
6015 case Intrinsic::umul_with_overflow:
6016 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
6019 case Intrinsic::experimental_stackmap:
6020 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6023 case Intrinsic::experimental_patchpoint_void:
6024 case Intrinsic::experimental_patchpoint:
6025 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6041int X86TTIImpl::getGatherOverhead()
const {
6054int X86TTIImpl::getScatterOverhead()
const {
6068 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
6069 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
6079 if (IndexSize < 64 || !
GEP)
6082 unsigned NumOfVarIndices = 0;
6083 const Value *Ptrs =
GEP->getPointerOperand();
6086 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
6087 if (isa<Constant>(
GEP->getOperand(
I)))
6089 Type *IndxTy =
GEP->getOperand(
I)->getType();
6090 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
6091 IndxTy = IndexVTy->getElementType();
6093 !isa<SExtInst>(
GEP->getOperand(
I))) ||
6094 ++NumOfVarIndices > 1)
6097 return (
unsigned)32;
6102 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
6103 ? getIndexSizeInBits(
Ptr,
DL)
6111 *std::max(IdxsLT.first, SrcLT.first).getValue();
6112 if (SplitFactor > 1) {
6116 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
6126 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
6127 : getScatterOverhead();
6135 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
6138 if ((Opcode == Instruction::Load &&
6141 Align(Alignment)))) ||
6142 (Opcode == Instruction::Store &&
6145 Align(Alignment)))))
6151 if (!PtrTy &&
Ptr->getType()->isVectorTy())
6152 PtrTy = dyn_cast<PointerType>(
6153 cast<VectorType>(
Ptr->getType())->getElementType());
6154 assert(PtrTy &&
"Unexpected type for Ptr argument");
6156 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
6172 return ST->hasMacroFusion() || ST->hasBranchFusion();
6179 if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->
getNumElements() == 1)
6191 if (ScalarTy->
isHalfTy() && ST->hasBWI())
6201 return IntWidth == 32 || IntWidth == 64 ||
6202 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
6214 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
6231 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6253 if (!isa<VectorType>(DataTy))
6263 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6272 return IntWidth == 32 || IntWidth == 64 ||
6273 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6280bool X86TTIImpl::supportsGather()
const {
6294 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6295 return NumElts == 1 ||
6296 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6311 return IntWidth == 32 || IntWidth == 64;
6315 if (!supportsGather() || !ST->preferGather())
6330 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6331 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6336 for (
int Lane : seq<int>(0, NumElements)) {
6337 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6339 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6341 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6345 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6347 return ST->
hasSSE3() && NumElements % 4 == 0;
6349 return ST->
hasSSE3() && NumElements % 2 == 0;
6355 if (!ST->
hasAVX512() || !ST->preferScatter())
6368 if (
I->getOpcode() == Instruction::FDiv)
6384 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6386 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6389 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6390 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6391 if (RealCallerBits == RealCalleeBits)
6396 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6400 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6402 if (CB->isInlineAsm())
6406 for (
Value *Arg : CB->args())
6407 Types.push_back(Arg->getType());
6408 if (!CB->getType()->isVoidTy())
6409 Types.push_back(CB->getType());
6412 auto IsSimpleTy = [](
Type *Ty) {
6413 return !Ty->isVectorTy() && !Ty->isAggregateType();
6415 if (
all_of(Types, IsSimpleTy))
6418 if (
Function *NestedCallee = CB->getCalledFunction()) {
6420 if (NestedCallee->isIntrinsic())
6455 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6464 Options.AllowOverlappingLoads =
true;
6469 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6470 Options.LoadSizes.push_back(64);
6471 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6472 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6474 if (ST->is64Bit()) {
6475 Options.LoadSizes.push_back(8);
6477 Options.LoadSizes.push_back(4);
6478 Options.LoadSizes.push_back(2);
6479 Options.LoadSizes.push_back(1);
6484 return supportsGather();
6495 return !(ST->isAtom());
6515 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6521 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6534 if (UseMaskedMemOp) {
6536 for (
unsigned Index : Indices) {
6537 assert(Index < Factor &&
"Invalid index for interleaved memory op");
6538 for (
unsigned Elm = 0; Elm < VF; Elm++)
6539 DemandedLoadStoreElts.
setBit(Index + Elm * Factor);
6546 UseMaskForGaps ? DemandedLoadStoreElts
6555 if (UseMaskForGaps) {
6561 if (Opcode == Instruction::Load) {
6568 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6569 {3, MVT::v16i8, 12},
6570 {3, MVT::v32i8, 14},
6571 {3, MVT::v64i8, 22},
6574 if (
const auto *Entry =
6576 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6588 unsigned NumOfLoadsInInterleaveGrp =
6589 Indices.
size() ? Indices.
size() : Factor;
6598 unsigned NumOfUnfoldedLoads =
6599 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6602 unsigned NumOfShufflesPerResult =
6603 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6610 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6613 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6620 assert(Opcode == Instruction::Store &&
6621 "Expected Store Instruction at this point");
6623 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6624 {3, MVT::v16i8, 12},
6625 {3, MVT::v32i8, 14},
6626 {3, MVT::v64i8, 26},
6629 {4, MVT::v16i8, 11},
6630 {4, MVT::v32i8, 14},
6634 if (
const auto *Entry =
6636 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6641 unsigned NumOfSources = Factor;
6644 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6648 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6651 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6659 bool UseMaskForCond,
bool UseMaskForGaps) {
6660 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6662 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6663 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6668 return ST->hasBWI();
6670 return ST->hasBF16();
6673 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6675 Opcode, VecTy, Factor, Indices, Alignment,
6678 if (UseMaskForCond || UseMaskForGaps)
6681 UseMaskForCond, UseMaskForGaps);
6701 unsigned VF = VecTy->getNumElements() / Factor;
6702 Type *ScalarTy = VecTy->getElementType();
6734 {2, MVT::v16i16, 9},
6735 {2, MVT::v32i16, 18},
6738 {2, MVT::v16i32, 8},
6739 {2, MVT::v32i32, 16},
6743 {2, MVT::v16i64, 16},
6744 {2, MVT::v32i64, 32},
6749 {3, MVT::v16i8, 11},
6750 {3, MVT::v32i8, 14},
6755 {3, MVT::v16i16, 28},
6756 {3, MVT::v32i16, 56},
6761 {3, MVT::v16i32, 14},
6762 {3, MVT::v32i32, 32},
6766 {3, MVT::v8i64, 10},
6767 {3, MVT::v16i64, 20},
6772 {4, MVT::v16i8, 24},
6773 {4, MVT::v32i8, 56},
6776 {4, MVT::v4i16, 17},
6777 {4, MVT::v8i16, 33},
6778 {4, MVT::v16i16, 75},
6779 {4, MVT::v32i16, 150},
6783 {4, MVT::v8i32, 16},
6784 {4, MVT::v16i32, 32},
6785 {4, MVT::v32i32, 68},
6789 {4, MVT::v8i64, 20},
6790 {4, MVT::v16i64, 40},
6795 {6, MVT::v16i8, 43},
6796 {6, MVT::v32i8, 82},
6798 {6, MVT::v2i16, 13},
6800 {6, MVT::v8i16, 39},
6801 {6, MVT::v16i16, 106},
6802 {6, MVT::v32i16, 212},
6805 {6, MVT::v4i32, 15},
6806 {6, MVT::v8i32, 31},
6807 {6, MVT::v16i32, 64},
6810 {6, MVT::v4i64, 18},
6811 {6, MVT::v8i64, 36},
6816 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6830 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6835 {2, MVT::v16i16, 4},
6836 {2, MVT::v32i16, 8},
6840 {2, MVT::v16i32, 8},
6841 {2, MVT::v32i32, 16},
6846 {2, MVT::v16i64, 16},
6847 {2, MVT::v32i64, 32},
6852 {3, MVT::v16i8, 11},
6853 {3, MVT::v32i8, 13},
6857 {3, MVT::v8i16, 12},
6858 {3, MVT::v16i16, 27},
6859 {3, MVT::v32i16, 54},
6863 {3, MVT::v8i32, 11},
6864 {3, MVT::v16i32, 22},
6865 {3, MVT::v32i32, 48},
6869 {3, MVT::v8i64, 12},
6870 {3, MVT::v16i64, 24},
6876 {4, MVT::v32i8, 12},
6880 {4, MVT::v8i16, 10},
6881 {4, MVT::v16i16, 32},
6882 {4, MVT::v32i16, 64},
6886 {4, MVT::v8i32, 16},
6887 {4, MVT::v16i32, 32},
6888 {4, MVT::v32i32, 64},
6892 {4, MVT::v8i64, 20},
6893 {4, MVT::v16i64, 40},
6898 {6, MVT::v16i8, 27},
6899 {6, MVT::v32i8, 90},
6901 {6, MVT::v2i16, 10},
6902 {6, MVT::v4i16, 15},
6903 {6, MVT::v8i16, 21},
6904 {6, MVT::v16i16, 58},
6905 {6, MVT::v32i16, 90},
6908 {6, MVT::v4i32, 12},
6909 {6, MVT::v8i32, 33},
6910 {6, MVT::v16i32, 66},
6913 {6, MVT::v4i64, 15},
6914 {6, MVT::v8i64, 30},
6917 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6928 if (Opcode == Instruction::Load) {
6929 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6933 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6937 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6939 return GetDiscountedCost(Entry);
6942 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6944 return GetDiscountedCost(Entry);
6947 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6949 return GetDiscountedCost(Entry);
6951 assert(Opcode == Instruction::Store &&
6952 "Expected Store Instruction at this point");
6954 "Interleaved store only supports fully-interleaved groups.");
6956 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6958 return MemOpCosts + Entry->Cost;
6961 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6963 return MemOpCosts + Entry->Cost;
6968 UseMaskForCond, UseMaskForGaps);
6973 bool HasBaseReg, int64_t Scale,
6974 unsigned AddrSpace)
const {
7002 return AM.
Scale != 0;
7016 if (ST->hasXOP() && (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64))
7021 if (ST->
hasAVX2() && (Bits == 32 || Bits == 64))
7025 if (ST->hasBWI() && Bits == 16)
7034 Type *ScalarValTy)
const {
7035 if (ST->hasF16C() && ScalarMemTy->
isHalfTy()) {
7049 if (
I->getOpcode() == Instruction::Mul &&
7051 for (
auto &
Op :
I->operands()) {
7053 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
7061 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
7070 return !Ops.
empty();
7076 int ShiftAmountOpNum = -1;
7078 ShiftAmountOpNum = 1;
7079 else if (
auto *
II = dyn_cast<IntrinsicInst>(
I)) {
7080 if (
II->getIntrinsicID() == Intrinsic::fshl ||
7081 II->getIntrinsicID() == Intrinsic::fshr)
7082 ShiftAmountOpNum = 2;
7085 if (ShiftAmountOpNum == -1)
7088 auto *Shuf = dyn_cast<ShuffleVectorInst>(
I->getOperand(ShiftAmountOpNum));
7091 Ops.
push_back(&
I->getOperandUse(ShiftAmountOpNum));
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isVectorShiftByScalarCheap(Type *Ty) const
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getBranchMispredictPenalty() const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMULO
Same for multiplication.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.