62#define DEBUG_TYPE "x86tti"
78 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
171 if (!
Vector && ST->hasEGPR())
187 auto *VTy = dyn_cast<FixedVectorType>(Ty);
188 if (!Ty->
isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
191 switch (cast<IntegerType>(ScalarTy)->
getBitWidth()) {
208 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
210 if (ST->
hasAVX() && PreferVectorWidth >= 256)
212 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
253 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
270 assert(ISD &&
"Invalid opcode");
272 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
273 (LT.second.getScalarType() == MVT::i32 ||
274 LT.second.getScalarType() == MVT::i64)) {
276 bool Op1Signed =
false, Op2Signed =
false;
279 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
280 bool SignedMode = Op1Signed || Op2Signed;
285 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
286 LT.second.getScalarType() == MVT::i32) {
288 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
290 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
291 bool Op1Sext = isa<SExtInst>(Args[0]) &&
292 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
293 bool Op2Sext = isa<SExtInst>(Args[1]) &&
294 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
296 bool IsZeroExtended = !Op1Signed || !Op2Signed;
297 bool IsConstant = Op1Constant || Op2Constant;
298 bool IsSext = Op1Sext || Op2Sext;
299 if (IsConstant || IsZeroExtended || IsSext)
307 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
310 if (!SignedMode && OpMinSize <= 8)
314 if (!SignedMode && OpMinSize <= 16)
321 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
374 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
375 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
376 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
377 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
378 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
379 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
380 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
381 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
382 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
386 if (
const auto *Entry =
388 if (
auto KindCost = Entry->Cost[
CostKind])
389 return LT.first * *KindCost;
392 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
393 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
394 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
395 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
396 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
397 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
398 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
399 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
400 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
402 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
403 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
404 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
405 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
406 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
407 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
411 if (
const auto *Entry =
413 if (
auto KindCost = Entry->Cost[
CostKind])
414 return LT.first * *KindCost;
417 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
418 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
419 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
421 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
422 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
423 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
425 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
426 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
427 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
428 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
429 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
430 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
432 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
433 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
434 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
435 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
436 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
437 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
438 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
447 if (
const auto *Entry =
449 if (
auto KindCost = Entry->Cost[
CostKind])
450 return LT.first * *KindCost;
453 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
454 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
455 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
456 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
457 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
458 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
460 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
461 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
462 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
463 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
464 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
465 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
467 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
468 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
469 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
470 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
471 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
472 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
474 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
475 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
476 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
477 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
478 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
479 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
488 if (
const auto *Entry =
490 if (
auto KindCost = Entry->Cost[
CostKind])
491 return LT.first * *KindCost;
494 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
495 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
496 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
497 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
498 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
499 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
501 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
502 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
503 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
504 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
505 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
506 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
508 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
509 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
510 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
511 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
512 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
513 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
515 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
516 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
517 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
518 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
519 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
520 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
530 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
531 if (
const auto *Entry =
533 if (
auto KindCost = Entry->Cost[
CostKind])
534 return LT.first * *KindCost;
537 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
538 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
539 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
541 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
542 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
543 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
545 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
546 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
547 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
549 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
550 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
551 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
561 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
562 if (
const auto *Entry =
564 if (
auto KindCost = Entry->Cost[
CostKind])
565 return LT.first * *KindCost;
580 if (
const auto *Entry =
582 if (
auto KindCost = Entry->Cost[
CostKind])
583 return LT.first * *KindCost;
603 if (
const auto *Entry =
605 if (
auto KindCost = Entry->Cost[
CostKind])
606 return LT.first * *KindCost;
626 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
627 if (
auto KindCost = Entry->Cost[
CostKind])
628 return LT.first * *KindCost;
648 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
649 if (
auto KindCost = Entry->Cost[
CostKind])
650 return LT.first * *KindCost;
658 if (
const auto *Entry =
660 if (
auto KindCost = Entry->Cost[
CostKind])
661 return LT.first * *KindCost;
681 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
682 if (
auto KindCost = Entry->Cost[
CostKind])
683 return LT.first * *KindCost;
686 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
687 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
688 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
689 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
690 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
691 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
692 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
693 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
694 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
696 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
697 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
698 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
702 if (
const auto *Entry =
704 if (
auto KindCost = Entry->Cost[
CostKind])
705 return LT.first * *KindCost;
708 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
709 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
710 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
712 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
713 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
714 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
716 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
717 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
718 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
719 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
720 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
721 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
722 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
726 if (
const auto *Entry =
728 if (
auto KindCost = Entry->Cost[
CostKind])
729 return LT.first * *KindCost;
733 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
734 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
735 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
736 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
737 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
738 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
740 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
741 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
742 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
743 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
744 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
745 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
747 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
748 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
749 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
750 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
751 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
752 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
754 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
755 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
756 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
757 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
758 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
759 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
763 if (
const auto *Entry =
765 if (
auto KindCost = Entry->Cost[
CostKind])
766 return LT.first * *KindCost;
769 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
770 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
771 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
772 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
773 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
774 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
776 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
777 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
778 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
779 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
780 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
781 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
783 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
784 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
785 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
786 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
787 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
788 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
790 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
791 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
792 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
793 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
794 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
795 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
800 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
801 if (
const auto *Entry =
803 if (
auto KindCost = Entry->Cost[
CostKind])
804 return LT.first * *KindCost;
808 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
809 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
810 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
812 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
813 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
814 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
816 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
817 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
818 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
820 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
821 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
822 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
826 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
827 if (
const auto *Entry =
829 if (
auto KindCost = Entry->Cost[
CostKind])
830 return LT.first * *KindCost;
833 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
834 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
835 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
840 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
841 if (
auto KindCost = Entry->Cost[
CostKind])
842 return LT.first * *KindCost;
845 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
846 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
847 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
848 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
849 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
850 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
851 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
852 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
853 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
855 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
856 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
857 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
858 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
859 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
860 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
861 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
862 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
863 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
865 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
866 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
868 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
869 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
870 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
871 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
873 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
874 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
876 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
877 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
878 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
879 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
881 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
882 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
883 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
884 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
889 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
890 if (
auto KindCost = Entry->Cost[
CostKind])
891 return LT.first * *KindCost;
894 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
895 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
896 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
898 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
899 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
900 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
902 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
903 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
904 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
905 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
906 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
907 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
908 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
909 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
910 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
912 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
913 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
914 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
915 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
916 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
918 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
919 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
920 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
922 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
923 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
925 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
926 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
928 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
929 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
930 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
931 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
933 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
934 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
935 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
936 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
938 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
939 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
940 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
941 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
943 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
944 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
945 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
946 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
951 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
952 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
953 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
954 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
955 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
956 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
957 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
958 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
961 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
962 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
963 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
964 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
966 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
967 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
968 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
969 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
970 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
971 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
972 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
973 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
976 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
977 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
978 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
979 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
983 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
984 if (
auto KindCost = Entry->Cost[
CostKind])
985 return LT.first * *KindCost;
990 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
991 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
992 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
993 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
994 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
995 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
996 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
997 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
998 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
999 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1011 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1012 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1019 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1020 if (
auto KindCost = Entry->Cost[
CostKind])
1021 return LT.first * *KindCost;
1026 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1027 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1028 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1029 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1030 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1031 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1032 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1033 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1034 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1035 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1036 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1037 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1039 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1040 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1041 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1042 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1043 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1044 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1045 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1046 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1047 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1048 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1049 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1050 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1060 if (
const auto *Entry =
1062 if (
auto KindCost = Entry->Cost[
CostKind])
1063 return LT.first * *KindCost;
1070 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1071 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1076 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1077 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1078 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1079 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1082 if (ST->useGLMDivSqrtCosts())
1083 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1084 if (
auto KindCost = Entry->Cost[
CostKind])
1085 return LT.first * *KindCost;
1088 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1089 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1090 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1091 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1092 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1093 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1094 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1095 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1096 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1097 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1098 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1099 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1105 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1107 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1108 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1111 if (ST->useSLMArithCosts())
1112 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1113 if (
auto KindCost = Entry->Cost[
CostKind])
1114 return LT.first * *KindCost;
1117 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1118 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1119 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1120 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1122 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1123 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1124 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1125 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1127 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1128 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1129 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1130 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1131 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1132 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1134 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1135 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1136 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1137 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1138 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1139 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1140 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1141 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1143 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1144 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1145 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1146 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1147 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1148 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1149 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1153 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1154 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1156 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1157 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1158 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1159 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1160 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1161 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1163 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1164 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1165 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1166 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1167 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1168 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1170 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1171 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1172 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1173 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1174 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1175 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1177 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1178 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1179 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1180 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1181 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1182 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1187 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1188 if (
auto KindCost = Entry->Cost[
CostKind])
1189 return LT.first * *KindCost;
1195 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1196 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1197 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1198 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1199 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1200 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1202 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1203 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1204 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1205 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1207 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1208 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1209 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1210 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1212 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1213 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1214 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1215 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1217 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1218 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1219 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1220 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1221 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1222 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1223 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1224 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1225 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1226 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1228 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1229 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1230 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1231 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1232 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1233 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1234 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1235 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1237 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1238 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1239 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1240 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1241 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1242 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1243 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1244 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1246 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1247 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1248 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1249 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1250 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1251 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1252 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1253 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1255 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1256 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1258 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1259 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1260 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1261 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1262 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1263 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1265 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1266 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1267 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1268 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1269 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1270 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1272 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1273 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1274 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1275 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1276 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1277 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1279 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1280 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1281 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1282 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1283 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1284 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1288 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1289 if (
auto KindCost = Entry->Cost[
CostKind])
1290 return LT.first * *KindCost;
1293 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1294 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1295 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1296 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1298 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1299 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1300 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1301 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1303 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1304 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1305 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1306 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1308 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1309 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1310 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1311 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1313 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1317 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1318 if (
auto KindCost = Entry->Cost[
CostKind])
1319 return LT.first * *KindCost;
1322 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1323 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1324 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1326 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1327 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1328 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1329 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1331 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1332 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1333 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1334 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1336 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1340 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1341 if (
auto KindCost = Entry->Cost[
CostKind])
1342 return LT.first * *KindCost;
1345 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1349 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1350 if (
auto KindCost = Entry->Cost[
CostKind])
1351 return LT.first * *KindCost;
1356 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1357 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1358 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1359 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1361 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1362 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1363 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1364 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1366 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1367 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1368 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1369 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1371 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1372 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1373 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1374 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1376 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1377 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1378 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1379 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1381 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1382 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1383 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1384 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1386 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1387 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1389 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1390 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1391 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1392 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1396 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1397 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1398 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1399 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1401 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1402 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1403 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1404 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1406 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1407 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1408 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1410 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1411 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1412 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1414 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1415 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1419 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1420 if (
auto KindCost = Entry->Cost[
CostKind])
1421 return LT.first * *KindCost;
1424 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1425 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1427 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1428 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1430 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1431 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1433 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1434 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1436 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1437 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1441 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1442 if (
auto KindCost = Entry->Cost[
CostKind])
1443 return LT.first * *KindCost;
1448 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1453 if (
auto KindCost = Entry->Cost[
CostKind])
1454 return LT.first * *KindCost;
1465 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1466 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1467 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1469 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1470 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1471 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1472 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1473 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1477 if (
auto KindCost = Entry->Cost[
CostKind])
1478 return LT.first * *KindCost;
1492 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1536 if (!Args.empty() &&
1537 all_of(Args, [](
const Value *Arg) {
return isa<Constant>(Arg); }))
1546 CostKind, Mask.size() / 2, BaseTp);
1559 using namespace PatternMatch;
1562 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1568 bool IsInLaneShuffle =
false;
1574 unsigned NumEltsPerLane = Mask.size() / NumLanes;
1575 if ((Mask.size() % NumLanes) == 0)
1578 ((
P.value() % Mask.size()) / NumEltsPerLane) ==
1579 (
P.index() / NumEltsPerLane);
1584 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1585 LT.second = LT.second.changeVectorElementType(MVT::f16);
1590 int NumElts = LT.second.getVectorNumElements();
1591 if ((Index % NumElts) == 0)
1594 if (SubLT.second.isVector()) {
1595 int NumSubElts = SubLT.second.getVectorNumElements();
1596 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1604 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1605 if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
1606 (NumSubElts % OrigSubElts) == 0 &&
1607 LT.second.getVectorElementType() ==
1608 SubLT.second.getVectorElementType() &&
1609 LT.second.getVectorElementType().getSizeInBits() ==
1611 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1612 "Unexpected number of elements!");
1614 LT.second.getVectorNumElements());
1616 SubLT.second.getVectorNumElements());
1617 int ExtractIndex =
alignDown((Index % NumElts), NumSubElts);
1624 return ExtractCost + 1;
1627 "Unexpected vector size");
1629 return ExtractCost + 2;
1642 int NumElts = LT.second.getVectorNumElements();
1644 if (SubLT.second.isVector()) {
1645 int NumSubElts = SubLT.second.getVectorNumElements();
1646 bool MatchingTypes =
1647 NumElts == NumSubElts &&
1649 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1662 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1693 if (
const auto *Entry =
1701 if (LT.first != 1) {
1702 MVT LegalVT = LT.second;
1707 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1711 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1718 if (!Mask.empty() && NumOfDests.
isValid()) {
1736 unsigned E = *NumOfDests.
getValue();
1737 unsigned NormalizedVF =
1743 unsigned PrevSrcReg = 0;
1747 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1748 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1753 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1754 PrevRegMask != RegMask)
1762 if (SrcReg != DestReg &&
1767 PrevSrcReg = SrcReg;
1768 PrevRegMask = RegMask;
1800 if (
const auto *Entry =
1802 return LT.first * Entry->Cost;
1835 if (
const auto *Entry =
1837 return LT.first * Entry->Cost;
1914 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1915 if (
auto KindCost = Entry->Cost[
CostKind])
1916 return LT.first * *KindCost;
1932 if (IsInLaneShuffle && ST->
hasAVX2())
1933 if (
const auto *Entry =
1935 return LT.first * Entry->Cost;
1988 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1989 return LT.first * Entry->Cost;
2010 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
2011 return LT.first * Entry->Cost;
2038 if (IsInLaneShuffle && ST->
hasAVX())
2039 if (
const auto *Entry =
2041 return LT.first * Entry->Cost;
2103 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2104 return LT.first * Entry->Cost;
2117 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2118 return LT.first * Entry->Cost;
2149 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2150 return LT.first * Entry->Cost;
2206 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2208 if (
const auto *Entry =
2211 LT.second.getVectorElementCount()) &&
2212 "Table entry missing from isLegalBroadcastLoad()");
2213 return LT.first * Entry->Cost;
2216 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2217 return LT.first * Entry->Cost;
2230 if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) {
2232 auto MatchSHUFPS = [](
int X,
int Y) {
2233 return X < 0 ||
Y < 0 || ((
X & 4) == (
Y & 4));
2235 if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3]))
2238 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2239 return LT.first * Entry->Cost;
2251 assert(ISD &&
"Invalid opcode");
2378 {
ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1, 1, 1, 1 } },
2400 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2401 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2748 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2825 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
3049 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
3078 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3079 if (
auto KindCost = Entry->Cost[
CostKind])
3084 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3085 if (
auto KindCost = Entry->Cost[
CostKind])
3090 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3091 if (
auto KindCost = Entry->Cost[
CostKind])
3097 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3098 if (
auto KindCost = Entry->Cost[
CostKind])
3103 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3104 if (
auto KindCost = Entry->Cost[
CostKind])
3109 SimpleDstTy, SimpleSrcTy))
3110 if (
auto KindCost = Entry->Cost[
CostKind])
3115 SimpleDstTy, SimpleSrcTy))
3116 if (
auto KindCost = Entry->Cost[
CostKind])
3122 SimpleDstTy, SimpleSrcTy))
3123 if (
auto KindCost = Entry->Cost[
CostKind])
3127 if (ST->hasF16C()) {
3129 SimpleDstTy, SimpleSrcTy))
3130 if (
auto KindCost = Entry->Cost[
CostKind])
3136 SimpleDstTy, SimpleSrcTy))
3137 if (
auto KindCost = Entry->Cost[
CostKind])
3143 SimpleDstTy, SimpleSrcTy))
3144 if (
auto KindCost = Entry->Cost[
CostKind])
3167 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3168 if (
auto KindCost = Entry->Cost[
CostKind])
3169 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3173 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3174 if (
auto KindCost = Entry->Cost[
CostKind])
3175 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3179 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3180 if (
auto KindCost = Entry->Cost[
CostKind])
3181 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3186 LTDest.second, LTSrc.second))
3187 if (
auto KindCost = Entry->Cost[
CostKind])
3188 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3192 LTDest.second, LTSrc.second))
3193 if (
auto KindCost = Entry->Cost[
CostKind])
3194 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3198 LTDest.second, LTSrc.second))
3199 if (
auto KindCost = Entry->Cost[
CostKind])
3200 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3204 LTDest.second, LTSrc.second))
3205 if (
auto KindCost = Entry->Cost[
CostKind])
3206 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3210 LTDest.second, LTSrc.second))
3211 if (
auto KindCost = Entry->Cost[
CostKind])
3212 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3214 if (ST->hasF16C()) {
3216 LTDest.second, LTSrc.second))
3217 if (
auto KindCost = Entry->Cost[
CostKind])
3218 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3223 LTDest.second, LTSrc.second))
3224 if (
auto KindCost = Entry->Cost[
CostKind])
3225 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3229 LTDest.second, LTSrc.second))
3230 if (
auto KindCost = Entry->Cost[
CostKind])
3231 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3236 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3237 Type *ExtSrc = Src->getWithNewBitWidth(32);
3243 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3253 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3254 Type *TruncDst = Dst->getWithNewBitWidth(32);
3264 return Cost == 0 ? 0 :
N;
3278 Op1Info, Op2Info,
I);
3283 MVT MTy = LT.second;
3286 assert(ISD &&
"Invalid opcode");
3289 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3302 Pred = cast<CmpInst>(
I)->getPredicate();
3304 bool CmpWithConstant =
false;
3305 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3306 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3311 ExtraCost = CmpWithConstant ? 0 : 1;
3316 ExtraCost = CmpWithConstant ? 0 : 1;
3322 ExtraCost = CmpWithConstant ? 1 : 2;
3333 ExtraCost = CmpWithConstant ? 2 : 3;
3340 if (CondTy && !ST->
hasAVX())
3511 if (ST->useSLMArithCosts())
3513 if (
auto KindCost = Entry->Cost[
CostKind])
3514 return LT.first * (ExtraCost + *KindCost);
3518 if (
auto KindCost = Entry->Cost[
CostKind])
3519 return LT.first * (ExtraCost + *KindCost);
3523 if (
auto KindCost = Entry->Cost[
CostKind])
3524 return LT.first * (ExtraCost + *KindCost);
3528 if (
auto KindCost = Entry->Cost[
CostKind])
3529 return LT.first * (ExtraCost + *KindCost);
3533 if (
auto KindCost = Entry->Cost[
CostKind])
3534 return LT.first * (ExtraCost + *KindCost);
3538 if (
auto KindCost = Entry->Cost[
CostKind])
3539 return LT.first * (ExtraCost + *KindCost);
3543 if (
auto KindCost = Entry->Cost[
CostKind])
3544 return LT.first * (ExtraCost + *KindCost);
3548 if (
auto KindCost = Entry->Cost[
CostKind])
3549 return LT.first * (ExtraCost + *KindCost);
3553 if (
auto KindCost = Entry->Cost[
CostKind])
3554 return LT.first * (ExtraCost + *KindCost);
3558 if (
auto KindCost = Entry->Cost[
CostKind])
3559 return LT.first * (ExtraCost + *KindCost);
3567 Op1Info, Op2Info,
I);
3585 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3586 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3587 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3588 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3589 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3590 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3591 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3592 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3593 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3594 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3595 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3596 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3597 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3598 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3599 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3621 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3622 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3623 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3624 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3625 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3626 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3627 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3628 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3629 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3630 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3631 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3632 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3634 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3635 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3636 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3637 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3638 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3639 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3642 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3643 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3665 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3666 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3667 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3668 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3669 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3670 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3671 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3672 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3673 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3674 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3675 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3676 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3677 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3681 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3682 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3683 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3684 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3685 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3686 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3687 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3688 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3689 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3690 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3691 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3692 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3693 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3694 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3695 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3696 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3697 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3698 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3707 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3708 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3709 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3710 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3712 {
ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3714 {
ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
3719 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3720 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3721 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3722 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3727 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3728 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3729 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3730 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3731 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3732 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3733 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3734 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3735 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3743 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3744 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3745 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3746 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3747 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3748 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3749 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3750 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3751 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3752 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3753 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3754 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3755 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3756 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3757 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3758 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3759 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3760 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3761 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3762 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3763 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3764 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3765 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3766 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3781 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3782 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3783 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3784 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3785 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3786 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3787 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3788 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3789 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3790 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3791 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3792 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3793 {
ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3794 {
ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
3795 {
ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
3796 {
ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
3805 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3806 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3807 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3808 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3809 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3810 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3811 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3812 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3813 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3814 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3815 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3816 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3817 {
ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3818 {
ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
3819 {
ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
3820 {
ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
3847 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3850 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3851 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3867 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3868 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3869 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3870 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3871 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3872 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3873 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3874 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3875 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3876 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3877 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3878 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3879 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3880 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3881 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3882 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3893 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3894 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3895 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3896 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3897 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3898 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3899 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3900 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3915 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3916 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3917 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3918 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3919 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3920 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3921 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3922 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3923 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3924 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3925 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3926 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3927 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3928 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3931 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3932 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3933 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3934 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3935 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3936 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3937 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3938 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3945 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3946 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3947 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3948 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3949 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3950 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3951 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3952 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3953 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3954 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3955 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3956 {
ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
3957 {
ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
3958 {
ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
3959 {
ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
3961 {
ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
3962 {
ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
3974 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3975 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3976 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3977 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3978 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3979 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3980 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3981 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3982 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3983 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3984 {
ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
3985 {
ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
3986 {
ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
3987 {
ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
3988 {
ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
3990 {
ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
3991 {
ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
4005 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
4007 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
4008 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
4011 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
4012 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
4013 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
4014 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
4027 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
4029 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
4030 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
4031 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
4032 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
4033 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
4034 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
4035 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
4036 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
4037 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
4038 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
4039 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
4040 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
4041 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
4042 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
4043 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
4044 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
4045 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
4046 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
4047 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
4048 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
4049 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
4050 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
4051 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
4052 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
4058 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
4059 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
4060 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4061 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4062 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4063 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
4064 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4065 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4066 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4067 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4068 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4069 {
ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
4070 {
ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
4071 {
ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
4072 {
ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
4074 {
ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
4075 {
ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
4086 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
4087 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
4088 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4089 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4090 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4091 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
4092 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
4093 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4094 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4095 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4096 {
ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
4097 {
ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
4098 {
ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
4099 {
ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
4100 {
ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
4102 {
ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
4103 {
ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
4117 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
4118 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
4120 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
4121 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
4146 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
4148 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
4155 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
4157 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
4165 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
4168 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
4173 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
4174 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4175 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4176 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4177 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4178 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4179 {
ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4180 {
ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4182 {
ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
4187 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
4188 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4189 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4190 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
4191 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4192 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4193 {
ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4194 {
ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4196 {
ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
4199 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
4200 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
4201 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4209 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4210 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4211 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4212 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4213 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4214 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4215 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4216 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4217 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4218 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4219 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4220 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4223 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4224 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4225 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4226 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4231 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4234 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4235 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4236 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4237 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4238 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4239 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4240 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4241 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4242 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4243 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4244 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4245 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4250 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4251 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4252 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4253 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4254 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4255 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4256 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4257 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4258 {
ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4259 {
ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4261 {
ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
4270 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4271 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4272 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4273 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4274 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4275 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4276 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4277 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4278 {
ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4279 {
ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4281 {
ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
4289 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4295 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4298 {
ISD::CTTZ, MVT::i64, { 1, 1, 1, 1 } },
4301 {
ISD::CTTZ, MVT::i32, { 1, 1, 1, 1 } },
4302 {
ISD::CTTZ, MVT::i16, { 2, 1, 1, 1 } },
4306 {
ISD::CTLZ, MVT::i64, { 1, 1, 1, 1 } },
4309 {
ISD::CTLZ, MVT::i32, { 1, 1, 1, 1 } },
4310 {
ISD::CTLZ, MVT::i16, { 2, 1, 1, 1 } },
4322 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4325 {
ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } },
4327 {
ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } },
4330 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4331 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4333 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4338 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4339 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4340 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4341 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4348 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4349 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4350 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4356 {
ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } },
4357 {
ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } },
4362 {
ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } },
4363 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } },
4371 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4372 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4374 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4375 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4380 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4381 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4395 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4396 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4398 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4399 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4401 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4402 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4404 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4405 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4428 case Intrinsic::abs:
4431 case Intrinsic::bitreverse:
4434 case Intrinsic::bswap:
4437 case Intrinsic::ctlz:
4440 case Intrinsic::ctpop:
4443 case Intrinsic::cttz:
4446 case Intrinsic::fshl:
4450 if (Args[0] == Args[1]) {
4461 case Intrinsic::fshr:
4466 if (Args[0] == Args[1]) {
4477 case Intrinsic::lrint:
4478 case Intrinsic::llrint:
4487 case Intrinsic::maxnum:
4488 case Intrinsic::minnum:
4492 case Intrinsic::sadd_sat:
4495 case Intrinsic::smax:
4498 case Intrinsic::smin:
4501 case Intrinsic::ssub_sat:
4504 case Intrinsic::uadd_sat:
4507 case Intrinsic::umax:
4510 case Intrinsic::umin:
4513 case Intrinsic::usub_sat:
4516 case Intrinsic::sqrt:
4519 case Intrinsic::sadd_with_overflow:
4520 case Intrinsic::ssub_with_overflow:
4523 OpTy =
RetTy->getContainedType(0);
4525 case Intrinsic::uadd_with_overflow:
4526 case Intrinsic::usub_with_overflow:
4529 OpTy =
RetTy->getContainedType(0);
4531 case Intrinsic::smul_with_overflow:
4533 OpTy =
RetTy->getContainedType(0);
4535 case Intrinsic::umul_with_overflow:
4537 OpTy =
RetTy->getContainedType(0);
4542 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4543 std::pair<InstructionCost, MVT> LT,
4546 MVT MTy = LT.second;
4553 return LegalizationCost * 1;
4558 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4560 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4562 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4563 if (LI->hasOneUse())
4570 return LegalizationCost * (int)
Cost;
4575 MVT MTy = LT.second;
4578 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4579 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4582 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4583 if (Cst->isAllOnesValue())
4591 if (ST->useGLMDivSqrtCosts())
4593 if (
auto KindCost = Entry->Cost[
CostKind])
4594 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4596 if (ST->useSLMArithCosts())
4598 if (
auto KindCost = Entry->Cost[
CostKind])
4599 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4602 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4603 if (
auto KindCost = Entry->Cost[
CostKind])
4604 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4606 if (ST->hasBITALG())
4607 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4608 if (
auto KindCost = Entry->Cost[
CostKind])
4609 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4611 if (ST->hasVPOPCNTDQ())
4612 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4613 if (
auto KindCost = Entry->Cost[
CostKind])
4614 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4618 if (
auto KindCost = Entry->Cost[
CostKind])
4619 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4623 if (
auto KindCost = Entry->Cost[
CostKind])
4624 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4628 if (
auto KindCost = Entry->Cost[
CostKind])
4629 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4633 if (
auto KindCost = Entry->Cost[
CostKind])
4634 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4638 if (
auto KindCost = Entry->Cost[
CostKind])
4639 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4643 if (
auto KindCost = Entry->Cost[
CostKind])
4644 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4648 if (
auto KindCost = Entry->Cost[
CostKind])
4649 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4653 if (
auto KindCost = Entry->Cost[
CostKind])
4654 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4658 if (
auto KindCost = Entry->Cost[
CostKind])
4659 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4663 if (
auto KindCost = Entry->Cost[
CostKind])
4664 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4668 if (
auto KindCost = Entry->Cost[
CostKind])
4669 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4673 if (
auto KindCost = Entry->Cost[
CostKind])
4674 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4679 if (
auto KindCost = Entry->Cost[
CostKind])
4680 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4683 if (
auto KindCost = Entry->Cost[
CostKind])
4684 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4687 if (ST->hasLZCNT()) {
4690 if (
auto KindCost = Entry->Cost[
CostKind])
4691 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4694 if (
auto KindCost = Entry->Cost[
CostKind])
4695 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4698 if (ST->hasPOPCNT()) {
4701 if (
auto KindCost = Entry->Cost[
CostKind])
4702 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4705 if (
auto KindCost = Entry->Cost[
CostKind])
4706 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4711 if (
auto KindCost = Entry->Cost[
CostKind])
4712 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4715 if (
auto KindCost = Entry->Cost[
CostKind])
4716 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4724 unsigned Index,
Value *Op0,
4739 if (Index == -1U && (Opcode == Instruction::ExtractElement ||
4740 Opcode == Instruction::InsertElement)) {
4745 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4750 if (Opcode == Instruction::ExtractElement) {
4756 if (Opcode == Instruction::InsertElement) {
4764 if (Index != -1U && (Opcode == Instruction::ExtractElement ||
4765 Opcode == Instruction::InsertElement)) {
4767 if (Opcode == Instruction::ExtractElement &&
4769 cast<FixedVectorType>(Val)->getNumElements() > 1)
4776 if (!LT.second.isVector())
4780 unsigned SizeInBits = LT.second.getSizeInBits();
4781 unsigned NumElts = LT.second.getVectorNumElements();
4782 unsigned SubNumElts = NumElts;
4783 Index = Index % NumElts;
4787 if (SizeInBits > 128) {
4788 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4789 unsigned NumSubVecs = SizeInBits / 128;
4790 SubNumElts = NumElts / NumSubVecs;
4791 if (SubNumElts <= Index) {
4792 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4793 Index %= SubNumElts;
4797 MVT MScalarTy = LT.second.getScalarType();
4798 auto IsCheapPInsrPExtrInsertPS = [&]() {
4801 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4803 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4804 Opcode == Instruction::InsertElement);
4812 (Opcode != Instruction::InsertElement || !Op0 ||
4813 isa<UndefValue>(Op0)))
4814 return RegisterFileMoveCost;
4816 if (Opcode == Instruction::InsertElement &&
4817 isa_and_nonnull<UndefValue>(Op0)) {
4819 if (isa_and_nonnull<LoadInst>(Op1))
4820 return RegisterFileMoveCost;
4821 if (!IsCheapPInsrPExtrInsertPS()) {
4824 return 2 + RegisterFileMoveCost;
4826 return 1 + RegisterFileMoveCost;
4831 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4832 return 1 + RegisterFileMoveCost;
4836 assert(ISD &&
"Unexpected vector opcode");
4837 if (ST->useSLMArithCosts())
4839 return Entry->Cost + RegisterFileMoveCost;
4842 if (IsCheapPInsrPExtrInsertPS())
4843 return 1 + RegisterFileMoveCost;
4852 if (Opcode == Instruction::InsertElement) {
4853 auto *SubTy = cast<VectorType>(Val);
4861 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4865 RegisterFileMoveCost;
4872 cast<FixedVectorType>(Ty)->getNumElements() &&
4873 "Vector size mismatch");
4876 MVT MScalarTy = LT.second.getScalarType();
4877 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4880 constexpr unsigned LaneBitWidth = 128;
4881 assert((LegalVectorBitWidth < LaneBitWidth ||
4882 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4885 const int NumLegalVectors = *LT.first.getValue();
4886 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4891 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4893 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4896 if (LegalVectorBitWidth <= LaneBitWidth) {
4912 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4913 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4914 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4915 unsigned NumLegalElts =
4916 LT.second.getVectorNumElements() * NumLegalVectors;
4918 "Vector has been legalized to smaller element count");
4919 assert((NumLegalElts % NumLanesTotal) == 0 &&
4920 "Unexpected elts per lane");
4921 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4923 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4927 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4929 NumEltsPerLane, NumEltsPerLane *
I);
4930 if (LaneEltMask.
isZero())
4936 I * NumEltsPerLane, LaneTy);
4941 APInt AffectedLanes =
4944 AffectedLanes, NumLegalVectors,
true);
4945 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4946 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4947 unsigned I = NumLegalLanes * LegalVec + Lane;
4950 if (!AffectedLanes[
I] ||
4951 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4954 I * NumEltsPerLane, LaneTy);
4958 }
else if (LT.second.isVector()) {
4969 unsigned NumElts = LT.second.getVectorNumElements();
4972 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4981 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4982 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4983 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4987 if (LT.second.isVector()) {
4988 unsigned NumLegalElts =
4989 LT.second.getVectorNumElements() * NumLegalVectors;
4991 "Vector has been legalized to smaller element count");
4995 if (LegalVectorBitWidth > LaneBitWidth) {
4996 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4997 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4998 assert((NumLegalElts % NumLanesTotal) == 0 &&
4999 "Unexpected elts per lane");
5000 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
5004 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
5008 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
5010 NumEltsPerLane,
I * NumEltsPerLane);
5011 if (LaneEltMask.
isZero())
5014 I * NumEltsPerLane, LaneTy);
5016 LaneTy, LaneEltMask,
false, Extract,
CostKind);
5033 int VF,
const APInt &DemandedDstElts,
5039 auto bailout = [&]() {
5049 unsigned PromEltTyBits = EltTyBits;
5050 switch (EltTyBits) {
5081 int NumDstElements = VF * ReplicationFactor;
5095 if (PromEltTyBits != EltTyBits) {
5101 Instruction::SExt, PromSrcVecTy, SrcVecTy,
5108 ReplicationFactor, VF,
5114 "We expect that the legalization doesn't affect the element width, "
5115 "doesn't coalesce/split elements.");
5118 unsigned NumDstVectors =
5119 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
5128 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
5129 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
5134 return NumDstVectorsDemanded * SingleShuffleCost;
5145 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
5148 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
5149 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
5156 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5166 auto *VTy = dyn_cast<FixedVectorType>(Src);
5171 if (Opcode == Instruction::Store && OpInfo.
isConstant())
5177 if (!VTy || !LT.second.isVector()) {
5179 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
5182 bool IsLoad = Opcode == Instruction::Load;
5184 Type *EltTy = VTy->getElementType();
5189 const unsigned SrcNumElt = VTy->getNumElements();
5192 int NumEltRemaining = SrcNumElt;
5194 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
5196 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
5199 const unsigned XMMBits = 128;
5200 if (XMMBits % EltTyBits != 0)
5204 const int NumEltPerXMM = XMMBits / EltTyBits;
5208 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
5209 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
5211 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
5215 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
5217 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
5218 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
5219 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
5220 "Unless we haven't halved the op size yet, "
5221 "we have less than two op's sized units of work left.");
5223 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
5227 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
5228 "After halving sizes, the vector elt count is no longer a multiple "
5229 "of number of elements per operation?");
5230 auto *CoalescedVecTy =
5231 CurrNumEltPerOp == 1
5235 EltTyBits * CurrNumEltPerOp),
5236 CurrVecTy->getNumElements() / CurrNumEltPerOp);
5239 "coalesciing elements doesn't change vector width.");
5241 while (NumEltRemaining > 0) {
5242 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
5246 if (NumEltRemaining < CurrNumEltPerOp &&
5247 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
5248 CurrOpSizeBytes != 1)
5256 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5258 else if (CurrOpSizeBytes < 4)
5268 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5271 if (SubVecEltsLeft == 0) {
5272 SubVecEltsLeft += CurrVecTy->getNumElements();
5277 VTy, {},
CostKind, NumEltDone(), CurrVecTy);
5284 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5285 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5286 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5287 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5288 APInt DemandedElts =
5290 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5291 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5296 SubVecEltsLeft -= CurrNumEltPerOp;
5297 NumEltRemaining -= CurrNumEltPerOp;
5302 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5311 bool IsLoad = (Instruction::Load == Opcode);
5312 bool IsStore = (Instruction::Store == Opcode);
5314 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5319 unsigned NumElem = SrcVTy->getNumElements();
5327 MaskTy, DemandedElts,
false,
true,
CostKind);
5332 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5334 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5338 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5346 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5348 return Cost + LT.first;
5350 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5351 LT.second.getVectorNumElements() == NumElem)
5368 return Cost + LT.first * (IsLoad ? 2 : 8);
5371 return Cost + LT.first;
5379 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5383 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5385 return getGEPCost(BaseGEP->getSourceElementType(),
5386 BaseGEP->getPointerOperand(), Indices,
nullptr,
5401 const unsigned NumVectorInstToHideOverhead = 10;
5414 return NumVectorInstToHideOverhead;
5424 std::optional<FastMathFlags> FMF,
5465 assert(ISD &&
"Invalid opcode");
5473 if (ST->useSLMArithCosts())
5488 MVT MTy = LT.second;
5490 auto *ValVTy = cast<FixedVectorType>(ValTy);
5503 if (LT.first != 1 && MTy.
isVector() &&
5509 ArithmeticCost *= LT.first - 1;
5512 if (ST->useSLMArithCosts())
5514 return ArithmeticCost + Entry->Cost;
5518 return ArithmeticCost + Entry->Cost;
5522 return ArithmeticCost + Entry->Cost;
5571 if (ValVTy->getElementType()->isIntegerTy(1)) {
5573 if (LT.first != 1 && MTy.
isVector() &&
5579 ArithmeticCost *= LT.first - 1;
5583 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5584 return ArithmeticCost + Entry->Cost;
5587 return ArithmeticCost + Entry->Cost;
5590 return ArithmeticCost + Entry->Cost;
5593 return ArithmeticCost + Entry->Cost;
5598 unsigned NumVecElts = ValVTy->getNumElements();
5599 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5609 if (LT.first != 1 && MTy.
isVector() &&
5615 ReductionCost *= LT.first - 1;
5621 while (NumVecElts > 1) {
5623 unsigned Size = NumVecElts * ScalarSize;
5631 }
else if (
Size == 128) {
5634 if (ValVTy->isFloatingPointTy())
5642 }
else if (
Size == 64) {
5645 if (ValVTy->isFloatingPointTy())
5658 Instruction::LShr, ShiftTy,
CostKind,
5685 MVT MTy = LT.second;
5689 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5693 "Expected float point or integer vector type.");
5694 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5762 auto *ValVTy = cast<FixedVectorType>(ValTy);
5763 unsigned NumVecElts = ValVTy->getNumElements();
5767 if (LT.first != 1 && MTy.
isVector() &&
5773 MinMaxCost *= LT.first - 1;
5779 return MinMaxCost + Entry->Cost;
5783 return MinMaxCost + Entry->Cost;
5787 return MinMaxCost + Entry->Cost;
5791 return MinMaxCost + Entry->Cost;
5803 while (NumVecElts > 1) {
5805 unsigned Size = NumVecElts * ScalarSize;
5813 }
else if (
Size == 128) {
5823 }
else if (
Size == 64) {
5884 if (BitSize % 64 != 0)
5885 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5890 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5896 return std::max<InstructionCost>(1,
Cost);
5906 unsigned ImmBitWidth = Imm.getBitWidth();
5913 unsigned ImmIdx = ~0U;
5917 case Instruction::GetElementPtr:
5924 case Instruction::Store:
5927 case Instruction::ICmp:
5933 if (
Idx == 1 && ImmBitWidth == 64) {
5934 uint64_t ImmVal = Imm.getZExtValue();
5935 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5940 case Instruction::And:
5944 if (
Idx == 1 && ImmBitWidth == 64 && Imm.isIntN(32))
5947 if (
Idx == 1 && ImmBitWidth == 64 && ST->is64Bit() && ST->hasBMI() &&
5952 case Instruction::Add:
5953 case Instruction::Sub:
5955 if (
Idx == 1 && ImmBitWidth == 64 && Imm.getZExtValue() == 0x80000000)
5959 case Instruction::UDiv:
5960 case Instruction::SDiv:
5961 case Instruction::URem:
5962 case Instruction::SRem:
5967 case Instruction::Mul:
5968 case Instruction::Or:
5969 case Instruction::Xor:
5973 case Instruction::Shl:
5974 case Instruction::LShr:
5975 case Instruction::AShr:
5979 case Instruction::Trunc:
5980 case Instruction::ZExt:
5981 case Instruction::SExt:
5982 case Instruction::IntToPtr:
5983 case Instruction::PtrToInt:
5984 case Instruction::BitCast:
5985 case Instruction::PHI:
5986 case Instruction::Call:
5987 case Instruction::Select:
5988 case Instruction::Ret:
5989 case Instruction::Load:
5993 if (
Idx == ImmIdx) {
6018 case Intrinsic::sadd_with_overflow:
6019 case Intrinsic::uadd_with_overflow:
6020 case Intrinsic::ssub_with_overflow:
6021 case Intrinsic::usub_with_overflow:
6022 case Intrinsic::smul_with_overflow:
6023 case Intrinsic::umul_with_overflow:
6024 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
6027 case Intrinsic::experimental_stackmap:
6028 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6031 case Intrinsic::experimental_patchpoint_void:
6032 case Intrinsic::experimental_patchpoint:
6033 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6049int X86TTIImpl::getGatherOverhead()
const {
6062int X86TTIImpl::getScatterOverhead()
const {
6076 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
6077 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
6087 if (IndexSize < 64 || !
GEP)
6090 unsigned NumOfVarIndices = 0;
6091 const Value *Ptrs =
GEP->getPointerOperand();
6094 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
6095 if (isa<Constant>(
GEP->getOperand(
I)))
6097 Type *IndxTy =
GEP->getOperand(
I)->getType();
6098 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
6099 IndxTy = IndexVTy->getElementType();
6101 !isa<SExtInst>(
GEP->getOperand(
I))) ||
6102 ++NumOfVarIndices > 1)
6105 return (
unsigned)32;
6110 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
6111 ? getIndexSizeInBits(
Ptr,
DL)
6119 *std::max(IdxsLT.first, SrcLT.first).getValue();
6120 if (SplitFactor > 1) {
6124 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
6134 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
6135 : getScatterOverhead();
6143 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
6146 if ((Opcode == Instruction::Load &&
6149 Align(Alignment)))) ||
6150 (Opcode == Instruction::Store &&
6153 Align(Alignment)))))
6159 if (!PtrTy &&
Ptr->getType()->isVectorTy())
6160 PtrTy = dyn_cast<PointerType>(
6161 cast<VectorType>(
Ptr->getType())->getElementType());
6162 assert(PtrTy &&
"Unexpected type for Ptr argument");
6164 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
6180 return ST->hasMacroFusion() || ST->hasBranchFusion();
6187 if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->
getNumElements() == 1)
6199 if (ScalarTy->
isHalfTy() && ST->hasBWI())
6209 return IntWidth == 32 || IntWidth == 64 ||
6210 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
6222 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
6239 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6261 if (!isa<VectorType>(DataTy))
6271 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6280 return IntWidth == 32 || IntWidth == 64 ||
6281 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6288bool X86TTIImpl::supportsGather()
const {
6302 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6303 return NumElts == 1 ||
6304 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6319 return IntWidth == 32 || IntWidth == 64;
6323 if (!supportsGather() || !ST->preferGather())
6338 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6339 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6344 for (
int Lane : seq<int>(0, NumElements)) {
6345 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6347 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6349 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6353 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6355 return ST->
hasSSE3() && NumElements % 4 == 0;
6357 return ST->
hasSSE3() && NumElements % 2 == 0;
6363 if (!ST->
hasAVX512() || !ST->preferScatter())
6376 if (
I->getOpcode() == Instruction::FDiv)
6392 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6394 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6397 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6398 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6399 if (RealCallerBits == RealCalleeBits)
6404 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6408 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6410 if (CB->isInlineAsm())
6414 for (
Value *Arg : CB->args())
6415 Types.push_back(Arg->getType());
6416 if (!CB->getType()->isVoidTy())
6417 Types.push_back(CB->getType());
6420 auto IsSimpleTy = [](
Type *Ty) {
6421 return !Ty->isVectorTy() && !Ty->isAggregateType();
6423 if (
all_of(Types, IsSimpleTy))
6426 if (
Function *NestedCallee = CB->getCalledFunction()) {
6428 if (NestedCallee->isIntrinsic())
6463 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6472 Options.AllowOverlappingLoads =
true;
6477 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6478 Options.LoadSizes.push_back(64);
6479 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6480 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6482 if (ST->is64Bit()) {
6483 Options.LoadSizes.push_back(8);
6485 Options.LoadSizes.push_back(4);
6486 Options.LoadSizes.push_back(2);
6487 Options.LoadSizes.push_back(1);
6492 return supportsGather();
6503 return !(ST->isAtom());
6523 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6529 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6542 if (UseMaskedMemOp) {
6544 for (
unsigned Index : Indices) {
6545 assert(Index < Factor &&
"Invalid index for interleaved memory op");
6546 for (
unsigned Elm = 0; Elm < VF; Elm++)
6547 DemandedLoadStoreElts.
setBit(Index + Elm * Factor);
6554 UseMaskForGaps ? DemandedLoadStoreElts
6563 if (UseMaskForGaps) {
6569 if (Opcode == Instruction::Load) {
6576 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6577 {3, MVT::v16i8, 12},
6578 {3, MVT::v32i8, 14},
6579 {3, MVT::v64i8, 22},
6582 if (
const auto *Entry =
6584 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6596 unsigned NumOfLoadsInInterleaveGrp =
6597 Indices.
size() ? Indices.
size() : Factor;
6606 unsigned NumOfUnfoldedLoads =
6607 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6610 unsigned NumOfShufflesPerResult =
6611 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6618 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6621 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6628 assert(Opcode == Instruction::Store &&
6629 "Expected Store Instruction at this point");
6631 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6632 {3, MVT::v16i8, 12},
6633 {3, MVT::v32i8, 14},
6634 {3, MVT::v64i8, 26},
6637 {4, MVT::v16i8, 11},
6638 {4, MVT::v32i8, 14},
6642 if (
const auto *Entry =
6644 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6649 unsigned NumOfSources = Factor;
6652 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6656 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6659 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6667 bool UseMaskForCond,
bool UseMaskForGaps) {
6668 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6670 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6671 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6676 return ST->hasBWI();
6678 return ST->hasBF16();
6681 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6683 Opcode, VecTy, Factor, Indices, Alignment,
6686 if (UseMaskForCond || UseMaskForGaps)
6689 UseMaskForCond, UseMaskForGaps);
6709 unsigned VF = VecTy->getNumElements() / Factor;
6710 Type *ScalarTy = VecTy->getElementType();
6742 {2, MVT::v16i16, 9},
6743 {2, MVT::v32i16, 18},
6746 {2, MVT::v16i32, 8},
6747 {2, MVT::v32i32, 16},
6751 {2, MVT::v16i64, 16},
6752 {2, MVT::v32i64, 32},
6757 {3, MVT::v16i8, 11},
6758 {3, MVT::v32i8, 14},
6763 {3, MVT::v16i16, 28},
6764 {3, MVT::v32i16, 56},
6769 {3, MVT::v16i32, 14},
6770 {3, MVT::v32i32, 32},
6774 {3, MVT::v8i64, 10},
6775 {3, MVT::v16i64, 20},
6780 {4, MVT::v16i8, 24},
6781 {4, MVT::v32i8, 56},
6784 {4, MVT::v4i16, 17},
6785 {4, MVT::v8i16, 33},
6786 {4, MVT::v16i16, 75},
6787 {4, MVT::v32i16, 150},
6791 {4, MVT::v8i32, 16},
6792 {4, MVT::v16i32, 32},
6793 {4, MVT::v32i32, 68},
6797 {4, MVT::v8i64, 20},
6798 {4, MVT::v16i64, 40},
6803 {6, MVT::v16i8, 43},
6804 {6, MVT::v32i8, 82},
6806 {6, MVT::v2i16, 13},
6808 {6, MVT::v8i16, 39},
6809 {6, MVT::v16i16, 106},
6810 {6, MVT::v32i16, 212},
6813 {6, MVT::v4i32, 15},
6814 {6, MVT::v8i32, 31},
6815 {6, MVT::v16i32, 64},
6818 {6, MVT::v4i64, 18},
6819 {6, MVT::v8i64, 36},
6824 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6838 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6843 {2, MVT::v16i16, 4},
6844 {2, MVT::v32i16, 8},
6848 {2, MVT::v16i32, 8},
6849 {2, MVT::v32i32, 16},
6854 {2, MVT::v16i64, 16},
6855 {2, MVT::v32i64, 32},
6860 {3, MVT::v16i8, 11},
6861 {3, MVT::v32i8, 13},
6865 {3, MVT::v8i16, 12},
6866 {3, MVT::v16i16, 27},
6867 {3, MVT::v32i16, 54},
6871 {3, MVT::v8i32, 11},
6872 {3, MVT::v16i32, 22},
6873 {3, MVT::v32i32, 48},
6877 {3, MVT::v8i64, 12},
6878 {3, MVT::v16i64, 24},
6884 {4, MVT::v32i8, 12},
6888 {4, MVT::v8i16, 10},
6889 {4, MVT::v16i16, 32},
6890 {4, MVT::v32i16, 64},
6894 {4, MVT::v8i32, 16},
6895 {4, MVT::v16i32, 32},
6896 {4, MVT::v32i32, 64},
6900 {4, MVT::v8i64, 20},
6901 {4, MVT::v16i64, 40},
6906 {6, MVT::v16i8, 27},
6907 {6, MVT::v32i8, 90},
6909 {6, MVT::v2i16, 10},
6910 {6, MVT::v4i16, 15},
6911 {6, MVT::v8i16, 21},
6912 {6, MVT::v16i16, 58},
6913 {6, MVT::v32i16, 90},
6916 {6, MVT::v4i32, 12},
6917 {6, MVT::v8i32, 33},
6918 {6, MVT::v16i32, 66},
6921 {6, MVT::v4i64, 15},
6922 {6, MVT::v8i64, 30},
6925 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6936 if (Opcode == Instruction::Load) {
6937 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6941 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6945 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6947 return GetDiscountedCost(Entry);
6950 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6952 return GetDiscountedCost(Entry);
6955 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6957 return GetDiscountedCost(Entry);
6959 assert(Opcode == Instruction::Store &&
6960 "Expected Store Instruction at this point");
6962 "Interleaved store only supports fully-interleaved groups.");
6964 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6966 return MemOpCosts + Entry->Cost;
6969 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6971 return MemOpCosts + Entry->Cost;
6976 UseMaskForCond, UseMaskForGaps);
6981 bool HasBaseReg, int64_t Scale,
6982 unsigned AddrSpace)
const {
7010 return AM.
Scale != 0;
7024 if (ST->hasXOP() && (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64))
7029 if (ST->
hasAVX2() && (Bits == 32 || Bits == 64))
7033 if (ST->hasBWI() && Bits == 16)
7042 Type *ScalarValTy)
const {
7043 if (ST->hasF16C() && ScalarMemTy->
isHalfTy()) {
7057 if (
I->getOpcode() == Instruction::Mul &&
7059 for (
auto &
Op :
I->operands()) {
7061 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
7069 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
7078 return !Ops.
empty();
7084 int ShiftAmountOpNum = -1;
7086 ShiftAmountOpNum = 1;
7087 else if (
auto *
II = dyn_cast<IntrinsicInst>(
I)) {
7088 if (
II->getIntrinsicID() == Intrinsic::fshl ||
7089 II->getIntrinsicID() == Intrinsic::fshr)
7090 ShiftAmountOpNum = 2;
7093 if (ShiftAmountOpNum == -1)
7096 auto *Shuf = dyn_cast<ShuffleVectorInst>(
I->getOperand(ShiftAmountOpNum));
7099 Ops.
push_back(&
I->getOperandUse(ShiftAmountOpNum));
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isVectorShiftByScalarCheap(Type *Ty) const
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getBranchMispredictPenalty() const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMULO
Same for multiplication.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.