62#define DEBUG_TYPE "x86tti"
78 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
171 if (!
Vector && ST->hasEGPR())
187 auto *VTy = dyn_cast<FixedVectorType>(Ty);
188 if (!Ty->
isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
191 switch (cast<IntegerType>(ScalarTy)->
getBitWidth()) {
208 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
210 if (ST->
hasAVX() && PreferVectorWidth >= 256)
212 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
253 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
270 assert(ISD &&
"Invalid opcode");
272 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
273 (LT.second.getScalarType() == MVT::i32 ||
274 LT.second.getScalarType() == MVT::i64)) {
276 bool Op1Signed =
false, Op2Signed =
false;
279 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
280 bool SignedMode = Op1Signed || Op2Signed;
285 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
286 LT.second.getScalarType() == MVT::i32) {
288 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
290 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
291 bool Op1Sext = isa<SExtInst>(Args[0]) &&
292 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
293 bool Op2Sext = isa<SExtInst>(Args[1]) &&
294 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
296 bool IsZeroExtended = !Op1Signed || !Op2Signed;
297 bool IsConstant = Op1Constant || Op2Constant;
298 bool IsSext = Op1Sext || Op2Sext;
299 if (IsConstant || IsZeroExtended || IsSext)
307 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
310 if (!SignedMode && OpMinSize <= 8)
314 if (!SignedMode && OpMinSize <= 16)
321 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
374 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
375 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
376 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
377 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
378 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
379 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
380 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
381 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
382 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
386 if (
const auto *Entry =
388 if (
auto KindCost = Entry->Cost[
CostKind])
389 return LT.first * *KindCost;
392 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
393 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
394 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
395 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
396 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
397 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
398 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
399 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
400 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
402 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
403 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
404 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
405 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
406 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
407 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
411 if (
const auto *Entry =
413 if (
auto KindCost = Entry->Cost[
CostKind])
414 return LT.first * *KindCost;
417 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
418 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
419 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
421 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
422 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
423 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
425 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
426 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
427 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
428 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
429 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
430 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
432 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
433 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
434 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
435 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
436 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
437 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
438 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
447 if (
const auto *Entry =
449 if (
auto KindCost = Entry->Cost[
CostKind])
450 return LT.first * *KindCost;
453 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
454 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
455 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
456 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
457 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
458 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
460 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
461 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
462 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
463 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
464 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
465 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
467 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
468 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
469 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
470 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
471 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
472 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
474 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
475 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
476 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
477 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
478 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
479 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
488 if (
const auto *Entry =
490 if (
auto KindCost = Entry->Cost[
CostKind])
491 return LT.first * *KindCost;
494 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
495 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
496 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
497 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
498 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
499 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
501 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
502 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
503 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
504 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
505 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
506 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
508 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
509 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
510 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
511 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
512 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
513 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
515 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
516 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
517 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
518 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
519 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
520 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
530 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
531 if (
const auto *Entry =
533 if (
auto KindCost = Entry->Cost[
CostKind])
534 return LT.first * *KindCost;
537 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
538 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
539 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
541 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
542 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
543 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
545 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
546 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
547 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
549 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
550 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
551 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
561 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
562 if (
const auto *Entry =
564 if (
auto KindCost = Entry->Cost[
CostKind])
565 return LT.first * *KindCost;
580 if (
const auto *Entry =
582 if (
auto KindCost = Entry->Cost[
CostKind])
583 return LT.first * *KindCost;
603 if (
const auto *Entry =
605 if (
auto KindCost = Entry->Cost[
CostKind])
606 return LT.first * *KindCost;
626 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
627 if (
auto KindCost = Entry->Cost[
CostKind])
628 return LT.first * *KindCost;
648 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
649 if (
auto KindCost = Entry->Cost[
CostKind])
650 return LT.first * *KindCost;
658 if (
const auto *Entry =
660 if (
auto KindCost = Entry->Cost[
CostKind])
661 return LT.first * *KindCost;
681 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
682 if (
auto KindCost = Entry->Cost[
CostKind])
683 return LT.first * *KindCost;
686 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
687 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
688 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
689 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
690 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
691 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
692 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
693 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
694 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
696 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
697 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
698 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
702 if (
const auto *Entry =
704 if (
auto KindCost = Entry->Cost[
CostKind])
705 return LT.first * *KindCost;
708 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
709 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
710 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
712 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
713 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
714 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
716 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
717 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
718 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
719 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
720 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
721 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
722 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
726 if (
const auto *Entry =
728 if (
auto KindCost = Entry->Cost[
CostKind])
729 return LT.first * *KindCost;
733 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
734 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
735 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
736 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
737 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
738 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
740 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
741 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
742 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
743 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
744 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
745 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
747 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
748 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
749 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
750 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
751 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
752 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
754 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
755 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
756 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
757 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
758 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
759 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
763 if (
const auto *Entry =
765 if (
auto KindCost = Entry->Cost[
CostKind])
766 return LT.first * *KindCost;
769 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
770 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
771 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
772 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
773 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
774 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
776 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
777 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
778 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
779 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
780 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
781 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
783 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
784 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
785 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
786 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
787 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
788 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
790 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
791 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
792 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
793 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
794 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
795 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
800 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
801 if (
const auto *Entry =
803 if (
auto KindCost = Entry->Cost[
CostKind])
804 return LT.first * *KindCost;
808 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
809 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
810 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
812 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
813 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
814 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
816 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
817 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
818 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
820 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
821 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
822 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
826 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
827 if (
const auto *Entry =
829 if (
auto KindCost = Entry->Cost[
CostKind])
830 return LT.first * *KindCost;
833 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
834 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
835 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
840 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
841 if (
auto KindCost = Entry->Cost[
CostKind])
842 return LT.first * *KindCost;
845 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
846 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
847 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
848 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
849 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
850 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
851 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
852 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
853 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
855 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
856 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
857 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
858 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
859 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
860 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
861 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
862 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
863 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
865 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
866 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
868 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
869 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
870 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
871 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
873 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
874 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
876 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
877 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
878 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
879 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
881 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
882 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
883 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
884 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
889 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
890 if (
auto KindCost = Entry->Cost[
CostKind])
891 return LT.first * *KindCost;
894 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
895 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
896 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
898 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
899 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
900 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
902 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
903 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
904 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
905 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
906 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
907 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
908 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
909 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
910 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
912 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
913 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
914 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
915 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
916 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
918 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
919 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
920 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
922 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
923 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
925 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
926 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
928 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
929 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
930 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
931 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
933 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
934 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
935 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
936 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
938 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
939 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
940 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
941 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
943 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
944 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
945 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
946 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
951 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
952 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
953 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
954 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
955 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
956 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
957 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
958 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
961 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
962 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
963 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
964 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
966 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
967 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
968 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
969 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
970 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
971 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
972 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
973 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
976 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
977 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
978 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
979 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
983 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
984 if (
auto KindCost = Entry->Cost[
CostKind])
985 return LT.first * *KindCost;
990 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
991 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
992 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
993 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
994 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
995 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
996 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
997 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
998 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
999 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1011 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1012 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1019 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1020 if (
auto KindCost = Entry->Cost[
CostKind])
1021 return LT.first * *KindCost;
1026 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1027 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1028 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1029 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1030 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1031 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1032 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1033 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1034 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1035 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1036 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1037 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1039 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1040 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1041 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1042 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1043 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1044 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1045 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1046 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1047 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1048 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1049 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1050 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1060 if (
const auto *Entry =
1062 if (
auto KindCost = Entry->Cost[
CostKind])
1063 return LT.first * *KindCost;
1070 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1071 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1076 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1077 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1078 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1079 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1082 if (ST->useGLMDivSqrtCosts())
1083 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1084 if (
auto KindCost = Entry->Cost[
CostKind])
1085 return LT.first * *KindCost;
1088 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1089 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1090 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1091 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1092 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1093 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1094 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1095 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1096 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1097 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1098 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1099 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1105 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1107 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1108 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1111 if (ST->useSLMArithCosts())
1112 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1113 if (
auto KindCost = Entry->Cost[
CostKind])
1114 return LT.first * *KindCost;
1117 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1118 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1119 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1120 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1122 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1123 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1124 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1125 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1127 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1128 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1129 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1130 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1131 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1132 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1134 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1135 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1136 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1137 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1138 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1139 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1140 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1141 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1143 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1144 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1145 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1146 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1147 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1148 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1149 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1153 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1154 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1156 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1157 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1158 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1159 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1160 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1161 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1163 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1164 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1165 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1166 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1167 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1168 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1170 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1171 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1172 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1173 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1174 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1175 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1177 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1178 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1179 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1180 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1181 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1182 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1187 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1188 if (
auto KindCost = Entry->Cost[
CostKind])
1189 return LT.first * *KindCost;
1195 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1196 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1197 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1198 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1199 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1200 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1202 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1203 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1204 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1205 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1207 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1208 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1209 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1210 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1212 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1213 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1214 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1215 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1217 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1218 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1219 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1220 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1221 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1222 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1223 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1224 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1225 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1226 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1228 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1229 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1230 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1231 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1232 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1233 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1234 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1235 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1237 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1238 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1239 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1240 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1241 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1242 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1243 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1244 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1246 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1247 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1248 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1249 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1250 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1251 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1252 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1253 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1255 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1256 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1258 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1259 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1260 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1261 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1262 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1263 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1265 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1266 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1267 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1268 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1269 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1270 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1272 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1273 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1274 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1275 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1276 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1277 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1279 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1280 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1281 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1282 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1283 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1284 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1288 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1289 if (
auto KindCost = Entry->Cost[
CostKind])
1290 return LT.first * *KindCost;
1293 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1294 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1295 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1296 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1298 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1299 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1300 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1301 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1303 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1304 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1305 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1306 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1308 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1309 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1310 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1311 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1313 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1317 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1318 if (
auto KindCost = Entry->Cost[
CostKind])
1319 return LT.first * *KindCost;
1322 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1323 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1324 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1326 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1327 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1328 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1329 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1331 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1332 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1333 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1334 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1336 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1340 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1341 if (
auto KindCost = Entry->Cost[
CostKind])
1342 return LT.first * *KindCost;
1345 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1349 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1350 if (
auto KindCost = Entry->Cost[
CostKind])
1351 return LT.first * *KindCost;
1356 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1357 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1358 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1359 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1361 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1362 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1363 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1364 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1366 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1367 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1368 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1369 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1371 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1372 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1373 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1374 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1376 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1377 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1378 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1379 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1381 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1382 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1383 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1384 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1386 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1387 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1389 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1390 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1391 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1392 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1396 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1397 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1398 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1399 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1401 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1402 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1403 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1404 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1406 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1407 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1408 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1410 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1411 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1412 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1414 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1415 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1419 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1420 if (
auto KindCost = Entry->Cost[
CostKind])
1421 return LT.first * *KindCost;
1424 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1425 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1427 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1428 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1430 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1431 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1433 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1434 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1436 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1437 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1441 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1442 if (
auto KindCost = Entry->Cost[
CostKind])
1443 return LT.first * *KindCost;
1448 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1453 if (
auto KindCost = Entry->Cost[
CostKind])
1454 return LT.first * *KindCost;
1465 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1466 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1467 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1469 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1470 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1471 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1472 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1473 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1477 if (
auto KindCost = Entry->Cost[
CostKind])
1478 return LT.first * *KindCost;
1492 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1536 if (!Args.empty() &&
1537 all_of(Args, [](
const Value *Arg) {
return isa<Constant>(Arg); }))
1546 CostKind, Mask.size() / 2, BaseTp);
1559 using namespace PatternMatch;
1562 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1569 bool IsInLaneShuffle =
false;
1570 bool IsSingleElementMask =
false;
1576 unsigned NumEltsPerLane = Mask.size() / NumLanes;
1577 if ((Mask.size() % NumLanes) == 0) {
1580 ((
P.value() % Mask.size()) / NumEltsPerLane) ==
1581 (
P.index() / NumEltsPerLane);
1583 IsSingleElementMask =
1584 (Mask.size() - 1) ==
static_cast<unsigned>(
count_if(Mask, [](
int M) {
1591 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1592 LT.second = LT.second.changeVectorElementType(MVT::f16);
1597 int NumElts = LT.second.getVectorNumElements();
1598 if ((Index % NumElts) == 0)
1601 if (SubLT.second.isVector()) {
1602 int NumSubElts = SubLT.second.getVectorNumElements();
1603 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1611 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1612 if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
1613 (NumSubElts % OrigSubElts) == 0 &&
1614 LT.second.getVectorElementType() ==
1615 SubLT.second.getVectorElementType() &&
1616 LT.second.getVectorElementType().getSizeInBits() ==
1618 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1619 "Unexpected number of elements!");
1621 LT.second.getVectorNumElements());
1623 SubLT.second.getVectorNumElements());
1624 int ExtractIndex =
alignDown((Index % NumElts), NumSubElts);
1631 return ExtractCost + 1;
1634 "Unexpected vector size");
1636 return ExtractCost + 2;
1649 int NumElts = LT.second.getVectorNumElements();
1651 if (SubLT.second.isVector()) {
1652 int NumSubElts = SubLT.second.getVectorNumElements();
1653 bool MatchingTypes =
1654 NumElts == NumSubElts &&
1656 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1663 if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 &&
1664 SubLT.second == MVT::f32 && (Index == 0 || ST->
hasSSE41()))
1707 if (
const auto *Entry =
1709 if (
auto KindCost = Entry->Cost[
CostKind])
1710 return LT.first * *KindCost;
1716 if (LT.first != 1) {
1717 MVT LegalVT = LT.second;
1722 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1726 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1733 if (!Mask.empty() && NumOfDests.
isValid()) {
1751 unsigned E = *NumOfDests.
getValue();
1752 unsigned NormalizedVF =
1758 unsigned PrevSrcReg = 0;
1762 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1763 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1768 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1769 PrevRegMask != RegMask)
1777 if (SrcReg != DestReg &&
1782 PrevSrcReg = SrcReg;
1783 PrevRegMask = RegMask;
1804 if (LT.first == 1 && IsInLaneShuffle && IsSingleElementMask)
1818 if (
const auto *Entry =
1820 if (
auto KindCost = Entry->Cost[
CostKind])
1821 return LT.first * *KindCost;
1854 if (
const auto *Entry =
1856 if (
auto KindCost = Entry->Cost[
CostKind])
1857 return LT.first * *KindCost;
1934 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1935 if (
auto KindCost = Entry->Cost[
CostKind])
1936 return LT.first * *KindCost;
1952 if (IsInLaneShuffle && ST->
hasAVX2())
1953 if (
const auto *Entry =
1955 if (
auto KindCost = Entry->Cost[
CostKind])
1956 return LT.first * *KindCost;
2003 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
2004 if (
auto KindCost = Entry->Cost[
CostKind])
2005 return LT.first * *KindCost;
2027 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
2028 if (
auto KindCost = Entry->Cost[
CostKind])
2029 return LT.first * *KindCost;
2056 if (IsInLaneShuffle && ST->
hasAVX())
2057 if (
const auto *Entry =
2059 if (
auto KindCost = Entry->Cost[
CostKind])
2060 return LT.first * *KindCost;
2122 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2123 if (
auto KindCost = Entry->Cost[
CostKind])
2124 return LT.first * *KindCost;
2137 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2138 if (
auto KindCost = Entry->Cost[
CostKind])
2139 return LT.first * *KindCost;
2170 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2171 if (
auto KindCost = Entry->Cost[
CostKind])
2172 return LT.first * *KindCost;
2228 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2230 if (
const auto *Entry =
2233 LT.second.getVectorElementCount()) &&
2234 "Table entry missing from isLegalBroadcastLoad()");
2235 return LT.first * Entry->Cost;
2238 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2239 if (
auto KindCost = Entry->Cost[
CostKind])
2240 return LT.first * *KindCost;
2253 if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) {
2255 auto MatchSHUFPS = [](
int X,
int Y) {
2256 return X < 0 ||
Y < 0 || ((
X & 4) == (
Y & 4));
2258 if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3]))
2261 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2262 if (
auto KindCost = Entry->Cost[
CostKind])
2263 return LT.first * *KindCost;
2275 assert(ISD &&
"Invalid opcode");
2402 {
ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1, 1, 1, 1 } },
2424 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2425 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2772 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2849 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
3073 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
3102 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3103 if (
auto KindCost = Entry->Cost[
CostKind])
3108 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3109 if (
auto KindCost = Entry->Cost[
CostKind])
3114 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3115 if (
auto KindCost = Entry->Cost[
CostKind])
3121 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3122 if (
auto KindCost = Entry->Cost[
CostKind])
3127 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3128 if (
auto KindCost = Entry->Cost[
CostKind])
3133 SimpleDstTy, SimpleSrcTy))
3134 if (
auto KindCost = Entry->Cost[
CostKind])
3139 SimpleDstTy, SimpleSrcTy))
3140 if (
auto KindCost = Entry->Cost[
CostKind])
3146 SimpleDstTy, SimpleSrcTy))
3147 if (
auto KindCost = Entry->Cost[
CostKind])
3151 if (ST->hasF16C()) {
3153 SimpleDstTy, SimpleSrcTy))
3154 if (
auto KindCost = Entry->Cost[
CostKind])
3160 SimpleDstTy, SimpleSrcTy))
3161 if (
auto KindCost = Entry->Cost[
CostKind])
3167 SimpleDstTy, SimpleSrcTy))
3168 if (
auto KindCost = Entry->Cost[
CostKind])
3191 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3192 if (
auto KindCost = Entry->Cost[
CostKind])
3193 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3197 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3198 if (
auto KindCost = Entry->Cost[
CostKind])
3199 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3203 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3204 if (
auto KindCost = Entry->Cost[
CostKind])
3205 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3210 LTDest.second, LTSrc.second))
3211 if (
auto KindCost = Entry->Cost[
CostKind])
3212 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3216 LTDest.second, LTSrc.second))
3217 if (
auto KindCost = Entry->Cost[
CostKind])
3218 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3222 LTDest.second, LTSrc.second))
3223 if (
auto KindCost = Entry->Cost[
CostKind])
3224 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3228 LTDest.second, LTSrc.second))
3229 if (
auto KindCost = Entry->Cost[
CostKind])
3230 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3234 LTDest.second, LTSrc.second))
3235 if (
auto KindCost = Entry->Cost[
CostKind])
3236 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3238 if (ST->hasF16C()) {
3240 LTDest.second, LTSrc.second))
3241 if (
auto KindCost = Entry->Cost[
CostKind])
3242 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3247 LTDest.second, LTSrc.second))
3248 if (
auto KindCost = Entry->Cost[
CostKind])
3249 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3253 LTDest.second, LTSrc.second))
3254 if (
auto KindCost = Entry->Cost[
CostKind])
3255 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3260 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3261 Type *ExtSrc = Src->getWithNewBitWidth(32);
3267 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3277 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3278 Type *TruncDst = Dst->getWithNewBitWidth(32);
3288 return Cost == 0 ? 0 :
N;
3302 Op1Info, Op2Info,
I);
3307 MVT MTy = LT.second;
3310 assert(ISD &&
"Invalid opcode");
3313 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3326 Pred = cast<CmpInst>(
I)->getPredicate();
3328 bool CmpWithConstant =
false;
3329 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3330 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3335 ExtraCost = CmpWithConstant ? 0 : 1;
3340 ExtraCost = CmpWithConstant ? 0 : 1;
3346 ExtraCost = CmpWithConstant ? 1 : 2;
3357 ExtraCost = CmpWithConstant ? 2 : 3;
3364 if (CondTy && !ST->
hasAVX())
3535 if (ST->useSLMArithCosts())
3537 if (
auto KindCost = Entry->Cost[
CostKind])
3538 return LT.first * (ExtraCost + *KindCost);
3542 if (
auto KindCost = Entry->Cost[
CostKind])
3543 return LT.first * (ExtraCost + *KindCost);
3547 if (
auto KindCost = Entry->Cost[
CostKind])
3548 return LT.first * (ExtraCost + *KindCost);
3552 if (
auto KindCost = Entry->Cost[
CostKind])
3553 return LT.first * (ExtraCost + *KindCost);
3557 if (
auto KindCost = Entry->Cost[
CostKind])
3558 return LT.first * (ExtraCost + *KindCost);
3562 if (
auto KindCost = Entry->Cost[
CostKind])
3563 return LT.first * (ExtraCost + *KindCost);
3567 if (
auto KindCost = Entry->Cost[
CostKind])
3568 return LT.first * (ExtraCost + *KindCost);
3572 if (
auto KindCost = Entry->Cost[
CostKind])
3573 return LT.first * (ExtraCost + *KindCost);
3577 if (
auto KindCost = Entry->Cost[
CostKind])
3578 return LT.first * (ExtraCost + *KindCost);
3582 if (
auto KindCost = Entry->Cost[
CostKind])
3583 return LT.first * (ExtraCost + *KindCost);
3591 Op1Info, Op2Info,
I);
3609 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3610 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3611 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3612 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3613 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3614 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3615 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3616 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3617 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3618 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3619 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3620 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3621 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3622 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3623 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3645 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3646 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3647 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3648 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3649 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3650 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3651 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3652 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3653 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3654 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3655 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3656 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3658 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3659 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3660 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3661 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3662 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3663 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3666 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3667 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3689 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3690 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3691 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3692 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3693 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3694 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3695 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3696 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3697 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3698 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3699 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3700 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3701 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3705 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3706 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3707 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3708 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3709 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3710 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3711 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3712 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3713 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3714 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3715 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3716 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3717 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3718 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3719 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3720 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3721 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3722 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3731 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3732 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3733 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3734 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3736 {
ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3738 {
ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
3743 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3744 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3745 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3746 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3751 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3752 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3753 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3754 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3755 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3756 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3757 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3758 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3759 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3767 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3768 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3769 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3770 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3771 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3772 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3773 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3774 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3775 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3776 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3777 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3778 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3779 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3780 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3781 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3782 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3783 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3784 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3785 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3786 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3787 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3788 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3789 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3790 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3805 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3806 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3807 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3808 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3809 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3810 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3811 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3812 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3813 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3814 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3815 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3816 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3817 {
ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3818 {
ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
3819 {
ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
3820 {
ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
3829 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3830 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3831 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3832 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3833 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3834 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3835 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3836 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3837 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3838 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3839 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3840 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3841 {
ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3842 {
ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
3843 {
ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
3844 {
ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
3871 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3874 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3875 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3891 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3892 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3893 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3894 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3895 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3896 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3897 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3898 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3899 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3900 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3901 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3902 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3903 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3904 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3905 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3906 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3917 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3918 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3919 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3920 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3921 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3922 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3923 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3924 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3939 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3940 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3941 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3942 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3943 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3944 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3945 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3946 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3947 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3948 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3949 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3950 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3951 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3952 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3955 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3956 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3957 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3958 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3959 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3960 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3961 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3962 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3969 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3970 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3971 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3972 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3973 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3974 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3975 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3976 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3977 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3978 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3979 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3980 {
ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
3981 {
ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
3982 {
ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
3983 {
ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
3985 {
ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
3986 {
ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
3998 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3999 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
4000 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
4001 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
4002 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
4003 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
4004 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
4005 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
4006 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
4007 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
4008 {
ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
4009 {
ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
4010 {
ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
4011 {
ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
4012 {
ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
4014 {
ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
4015 {
ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
4029 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
4031 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
4032 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
4035 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
4036 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
4037 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
4038 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
4051 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
4053 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
4054 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
4055 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
4056 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
4057 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
4058 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
4059 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
4060 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
4061 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
4062 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
4063 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
4064 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
4065 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
4066 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
4067 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
4068 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
4069 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
4070 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
4071 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
4072 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
4073 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
4074 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
4075 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
4076 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
4082 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
4083 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
4084 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4085 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4086 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4087 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
4088 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4089 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4090 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4091 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4092 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4093 {
ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
4094 {
ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
4095 {
ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
4096 {
ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
4098 {
ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
4099 {
ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
4110 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
4111 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
4112 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4113 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4114 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4115 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
4116 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
4117 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4118 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4119 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4120 {
ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
4121 {
ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
4122 {
ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
4123 {
ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
4124 {
ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
4126 {
ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
4127 {
ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
4141 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
4142 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
4144 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
4145 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
4170 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
4172 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
4179 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
4181 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
4189 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
4192 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
4197 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
4198 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4199 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4200 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4201 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4202 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4203 {
ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4204 {
ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4206 {
ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
4211 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
4212 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4213 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4214 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
4215 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4216 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4217 {
ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4218 {
ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4220 {
ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
4223 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
4224 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
4225 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4233 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4234 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4235 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4236 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4237 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4238 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4239 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4240 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4241 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4242 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4243 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4244 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4247 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4248 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4249 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4250 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4255 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4258 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4259 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4260 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4261 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4262 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4263 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4264 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4265 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4266 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4267 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4268 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4269 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4274 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4275 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4276 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4277 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4278 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4279 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4280 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4281 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4282 {
ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4283 {
ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4285 {
ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
4294 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4295 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4296 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4297 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4298 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4299 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4300 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4301 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4302 {
ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4303 {
ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4305 {
ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
4313 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4319 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4322 {
ISD::CTTZ, MVT::i64, { 1, 1, 1, 1 } },
4325 {
ISD::CTTZ, MVT::i32, { 1, 1, 1, 1 } },
4326 {
ISD::CTTZ, MVT::i16, { 2, 1, 1, 1 } },
4330 {
ISD::CTLZ, MVT::i64, { 1, 1, 1, 1 } },
4333 {
ISD::CTLZ, MVT::i32, { 1, 1, 1, 1 } },
4334 {
ISD::CTLZ, MVT::i16, { 2, 1, 1, 1 } },
4346 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4349 {
ISD::CTLZ, MVT::i64, { 1, 2, 3, 3 } },
4350 {
ISD::CTLZ, MVT::i32, { 1, 2, 3, 3 } },
4351 {
ISD::CTLZ, MVT::i16, { 2, 2, 3, 3 } },
4354 {
ISD::CTTZ, MVT::i64, { 1, 2, 2, 2 } },
4355 {
ISD::CTTZ, MVT::i32, { 1, 2, 2, 2 } },
4356 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 2 } },
4360 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4361 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4363 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4368 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4369 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4370 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4371 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4378 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4379 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4380 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4386 {
ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } },
4387 {
ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } },
4392 {
ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } },
4393 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } },
4401 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4402 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4404 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4405 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4410 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4411 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4425 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4426 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4428 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4429 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4431 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4432 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4434 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4435 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4458 case Intrinsic::abs:
4461 case Intrinsic::bitreverse:
4464 case Intrinsic::bswap:
4467 case Intrinsic::ctlz:
4470 case Intrinsic::ctpop:
4473 case Intrinsic::cttz:
4476 case Intrinsic::fshl:
4480 if (Args[0] == Args[1]) {
4491 case Intrinsic::fshr:
4496 if (Args[0] == Args[1]) {
4507 case Intrinsic::lrint:
4508 case Intrinsic::llrint: {
4515 case Intrinsic::maxnum:
4516 case Intrinsic::minnum:
4520 case Intrinsic::sadd_sat:
4523 case Intrinsic::smax:
4526 case Intrinsic::smin:
4529 case Intrinsic::ssub_sat:
4532 case Intrinsic::uadd_sat:
4535 case Intrinsic::umax:
4538 case Intrinsic::umin:
4541 case Intrinsic::usub_sat:
4544 case Intrinsic::sqrt:
4547 case Intrinsic::sadd_with_overflow:
4548 case Intrinsic::ssub_with_overflow:
4551 OpTy =
RetTy->getContainedType(0);
4553 case Intrinsic::uadd_with_overflow:
4554 case Intrinsic::usub_with_overflow:
4557 OpTy =
RetTy->getContainedType(0);
4559 case Intrinsic::smul_with_overflow:
4561 OpTy =
RetTy->getContainedType(0);
4563 case Intrinsic::umul_with_overflow:
4565 OpTy =
RetTy->getContainedType(0);
4570 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4571 std::pair<InstructionCost, MVT> LT,
4574 MVT MTy = LT.second;
4581 return LegalizationCost * 1;
4586 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4588 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4590 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4591 if (LI->hasOneUse())
4598 return LegalizationCost * (int)
Cost;
4603 MVT MTy = LT.second;
4606 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4607 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4610 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4611 if (Cst->isAllOnesValue())
4619 if (ST->useGLMDivSqrtCosts())
4621 if (
auto KindCost = Entry->Cost[
CostKind])
4622 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4624 if (ST->useSLMArithCosts())
4626 if (
auto KindCost = Entry->Cost[
CostKind])
4627 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4630 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4631 if (
auto KindCost = Entry->Cost[
CostKind])
4632 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4634 if (ST->hasBITALG())
4635 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4636 if (
auto KindCost = Entry->Cost[
CostKind])
4637 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4639 if (ST->hasVPOPCNTDQ())
4640 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4641 if (
auto KindCost = Entry->Cost[
CostKind])
4642 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4646 if (
auto KindCost = Entry->Cost[
CostKind])
4647 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4651 if (
auto KindCost = Entry->Cost[
CostKind])
4652 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4656 if (
auto KindCost = Entry->Cost[
CostKind])
4657 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4661 if (
auto KindCost = Entry->Cost[
CostKind])
4662 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4666 if (
auto KindCost = Entry->Cost[
CostKind])
4667 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4671 if (
auto KindCost = Entry->Cost[
CostKind])
4672 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4676 if (
auto KindCost = Entry->Cost[
CostKind])
4677 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4681 if (
auto KindCost = Entry->Cost[
CostKind])
4682 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4686 if (
auto KindCost = Entry->Cost[
CostKind])
4687 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4691 if (
auto KindCost = Entry->Cost[
CostKind])
4692 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4696 if (
auto KindCost = Entry->Cost[
CostKind])
4697 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4701 if (
auto KindCost = Entry->Cost[
CostKind])
4702 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4707 if (
auto KindCost = Entry->Cost[
CostKind])
4708 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4711 if (
auto KindCost = Entry->Cost[
CostKind])
4712 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4715 if (ST->hasLZCNT()) {
4718 if (
auto KindCost = Entry->Cost[
CostKind])
4719 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4722 if (
auto KindCost = Entry->Cost[
CostKind])
4723 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4726 if (ST->hasPOPCNT()) {
4729 if (
auto KindCost = Entry->Cost[
CostKind])
4730 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4733 if (
auto KindCost = Entry->Cost[
CostKind])
4734 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4739 if (
auto KindCost = Entry->Cost[
CostKind])
4740 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4743 if (
auto KindCost = Entry->Cost[
CostKind])
4744 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4749 (IID == Intrinsic::fshl || IID == Intrinsic::fshr)) {
4750 Type *CondTy =
RetTy->getWithNewBitWidth(1);
4770 unsigned Index,
Value *Op0,
4785 if (Index == -1U && (Opcode == Instruction::ExtractElement ||
4786 Opcode == Instruction::InsertElement)) {
4791 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4796 if (Opcode == Instruction::ExtractElement) {
4802 if (Opcode == Instruction::InsertElement) {
4810 if (Index != -1U && (Opcode == Instruction::ExtractElement ||
4811 Opcode == Instruction::InsertElement)) {
4813 if (Opcode == Instruction::ExtractElement &&
4815 cast<FixedVectorType>(Val)->getNumElements() > 1)
4822 if (!LT.second.isVector())
4826 unsigned SizeInBits = LT.second.getSizeInBits();
4827 unsigned NumElts = LT.second.getVectorNumElements();
4828 unsigned SubNumElts = NumElts;
4829 Index = Index % NumElts;
4833 if (SizeInBits > 128) {
4834 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4835 unsigned NumSubVecs = SizeInBits / 128;
4836 SubNumElts = NumElts / NumSubVecs;
4837 if (SubNumElts <= Index) {
4838 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4839 Index %= SubNumElts;
4843 MVT MScalarTy = LT.second.getScalarType();
4844 auto IsCheapPInsrPExtrInsertPS = [&]() {
4848 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4850 (MScalarTy == MVT::f32 && ST->
hasSSE1() && Index == 0 &&
4851 Opcode == Instruction::InsertElement) ||
4852 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4853 Opcode == Instruction::InsertElement);
4861 (Opcode != Instruction::InsertElement || !Op0 ||
4862 isa<UndefValue>(Op0)))
4863 return RegisterFileMoveCost;
4865 if (Opcode == Instruction::InsertElement &&
4866 isa_and_nonnull<UndefValue>(Op0)) {
4868 if (isa_and_nonnull<LoadInst>(Op1))
4869 return RegisterFileMoveCost;
4870 if (!IsCheapPInsrPExtrInsertPS()) {
4873 return 2 + RegisterFileMoveCost;
4875 return 1 + RegisterFileMoveCost;
4880 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4881 return 1 + RegisterFileMoveCost;
4885 assert(ISD &&
"Unexpected vector opcode");
4886 if (ST->useSLMArithCosts())
4888 return Entry->Cost + RegisterFileMoveCost;
4891 if (IsCheapPInsrPExtrInsertPS())
4892 return 1 + RegisterFileMoveCost;
4901 if (Opcode == Instruction::InsertElement) {
4902 auto *SubTy = cast<VectorType>(Val);
4910 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4914 RegisterFileMoveCost;
4921 cast<FixedVectorType>(Ty)->getNumElements() &&
4922 "Vector size mismatch");
4925 MVT MScalarTy = LT.second.getScalarType();
4926 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4929 constexpr unsigned LaneBitWidth = 128;
4930 assert((LegalVectorBitWidth < LaneBitWidth ||
4931 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4934 const int NumLegalVectors = *LT.first.getValue();
4935 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4940 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4942 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4945 if (LegalVectorBitWidth <= LaneBitWidth) {
4961 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4962 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4963 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4964 unsigned NumLegalElts =
4965 LT.second.getVectorNumElements() * NumLegalVectors;
4967 "Vector has been legalized to smaller element count");
4968 assert((NumLegalElts % NumLanesTotal) == 0 &&
4969 "Unexpected elts per lane");
4970 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4972 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4976 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4978 NumEltsPerLane, NumEltsPerLane *
I);
4979 if (LaneEltMask.
isZero())
4985 I * NumEltsPerLane, LaneTy);
4990 APInt AffectedLanes =
4993 AffectedLanes, NumLegalVectors,
true);
4994 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4995 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4996 unsigned I = NumLegalLanes * LegalVec + Lane;
4999 if (!AffectedLanes[
I] ||
5000 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
5003 I * NumEltsPerLane, LaneTy);
5007 }
else if (LT.second.isVector()) {
5018 unsigned NumElts = LT.second.getVectorNumElements();
5021 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
5030 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
5031 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
5032 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
5036 if (LT.second.isVector()) {
5037 unsigned NumLegalElts =
5038 LT.second.getVectorNumElements() * NumLegalVectors;
5040 "Vector has been legalized to smaller element count");
5044 if (LegalVectorBitWidth > LaneBitWidth) {
5045 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
5046 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
5047 assert((NumLegalElts % NumLanesTotal) == 0 &&
5048 "Unexpected elts per lane");
5049 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
5053 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
5057 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
5059 NumEltsPerLane,
I * NumEltsPerLane);
5060 if (LaneEltMask.
isZero())
5063 I * NumEltsPerLane, LaneTy);
5065 LaneTy, LaneEltMask,
false, Extract,
CostKind);
5082 int VF,
const APInt &DemandedDstElts,
5088 auto bailout = [&]() {
5098 unsigned PromEltTyBits = EltTyBits;
5099 switch (EltTyBits) {
5130 int NumDstElements = VF * ReplicationFactor;
5144 if (PromEltTyBits != EltTyBits) {
5150 Instruction::SExt, PromSrcVecTy, SrcVecTy,
5157 ReplicationFactor, VF,
5163 "We expect that the legalization doesn't affect the element width, "
5164 "doesn't coalesce/split elements.");
5167 unsigned NumDstVectors =
5168 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
5177 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
5178 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
5183 return NumDstVectorsDemanded * SingleShuffleCost;
5194 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
5197 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
5198 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
5205 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5215 auto *VTy = dyn_cast<FixedVectorType>(Src);
5220 if (Opcode == Instruction::Store && OpInfo.
isConstant())
5226 if (!VTy || !LT.second.isVector()) {
5228 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
5231 bool IsLoad = Opcode == Instruction::Load;
5233 Type *EltTy = VTy->getElementType();
5238 const unsigned SrcNumElt = VTy->getNumElements();
5241 int NumEltRemaining = SrcNumElt;
5243 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
5245 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
5248 const unsigned XMMBits = 128;
5249 if (XMMBits % EltTyBits != 0)
5253 const int NumEltPerXMM = XMMBits / EltTyBits;
5257 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
5258 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
5260 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
5264 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
5266 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
5267 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
5268 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
5269 "Unless we haven't halved the op size yet, "
5270 "we have less than two op's sized units of work left.");
5272 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
5276 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
5277 "After halving sizes, the vector elt count is no longer a multiple "
5278 "of number of elements per operation?");
5279 auto *CoalescedVecTy =
5280 CurrNumEltPerOp == 1
5284 EltTyBits * CurrNumEltPerOp),
5285 CurrVecTy->getNumElements() / CurrNumEltPerOp);
5288 "coalesciing elements doesn't change vector width.");
5290 while (NumEltRemaining > 0) {
5291 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
5295 if (NumEltRemaining < CurrNumEltPerOp &&
5296 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
5297 CurrOpSizeBytes != 1)
5305 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5307 else if (CurrOpSizeBytes < 4)
5317 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5320 if (SubVecEltsLeft == 0) {
5321 SubVecEltsLeft += CurrVecTy->getNumElements();
5326 VTy, {},
CostKind, NumEltDone(), CurrVecTy);
5333 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5334 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5335 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5336 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5337 APInt DemandedElts =
5339 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5340 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5345 SubVecEltsLeft -= CurrNumEltPerOp;
5346 NumEltRemaining -= CurrNumEltPerOp;
5351 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5360 bool IsLoad = (Instruction::Load == Opcode);
5361 bool IsStore = (Instruction::Store == Opcode);
5363 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5368 unsigned NumElem = SrcVTy->getNumElements();
5376 MaskTy, DemandedElts,
false,
true,
CostKind);
5381 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5383 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5387 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5395 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5397 return Cost + LT.first;
5399 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5400 LT.second.getVectorNumElements() == NumElem)
5417 return Cost + LT.first * (IsLoad ? 2 : 8);
5420 return Cost + LT.first;
5428 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5432 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5434 return getGEPCost(BaseGEP->getSourceElementType(),
5435 BaseGEP->getPointerOperand(), Indices,
nullptr,
5450 const unsigned NumVectorInstToHideOverhead = 10;
5463 return NumVectorInstToHideOverhead;
5473 std::optional<FastMathFlags> FMF,
5514 assert(ISD &&
"Invalid opcode");
5522 if (ST->useSLMArithCosts())
5537 MVT MTy = LT.second;
5539 auto *ValVTy = cast<FixedVectorType>(ValTy);
5552 if (LT.first != 1 && MTy.
isVector() &&
5558 ArithmeticCost *= LT.first - 1;
5561 if (ST->useSLMArithCosts())
5563 return ArithmeticCost + Entry->Cost;
5567 return ArithmeticCost + Entry->Cost;
5571 return ArithmeticCost + Entry->Cost;
5620 if (ValVTy->getElementType()->isIntegerTy(1)) {
5622 if (LT.first != 1 && MTy.
isVector() &&
5628 ArithmeticCost *= LT.first - 1;
5632 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5633 return ArithmeticCost + Entry->Cost;
5636 return ArithmeticCost + Entry->Cost;
5639 return ArithmeticCost + Entry->Cost;
5642 return ArithmeticCost + Entry->Cost;
5647 unsigned NumVecElts = ValVTy->getNumElements();
5648 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5658 if (LT.first != 1 && MTy.
isVector() &&
5664 ReductionCost *= LT.first - 1;
5670 while (NumVecElts > 1) {
5672 unsigned Size = NumVecElts * ScalarSize;
5680 }
else if (
Size == 128) {
5683 if (ValVTy->isFloatingPointTy())
5691 }
else if (
Size == 64) {
5694 if (ValVTy->isFloatingPointTy())
5707 Instruction::LShr, ShiftTy,
CostKind,
5734 MVT MTy = LT.second;
5738 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5742 "Expected float point or integer vector type.");
5743 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5811 auto *ValVTy = cast<FixedVectorType>(ValTy);
5812 unsigned NumVecElts = ValVTy->getNumElements();
5816 if (LT.first != 1 && MTy.
isVector() &&
5822 MinMaxCost *= LT.first - 1;
5828 return MinMaxCost + Entry->Cost;
5832 return MinMaxCost + Entry->Cost;
5836 return MinMaxCost + Entry->Cost;
5840 return MinMaxCost + Entry->Cost;
5852 while (NumVecElts > 1) {
5854 unsigned Size = NumVecElts * ScalarSize;
5862 }
else if (
Size == 128) {
5872 }
else if (
Size == 64) {
5933 if (BitSize % 64 != 0)
5934 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5939 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5945 return std::max<InstructionCost>(1,
Cost);
5955 unsigned ImmBitWidth = Imm.getBitWidth();
5962 unsigned ImmIdx = ~0U;
5966 case Instruction::GetElementPtr:
5973 case Instruction::Store:
5976 case Instruction::ICmp:
5982 if (
Idx == 1 && ImmBitWidth == 64) {
5983 uint64_t ImmVal = Imm.getZExtValue();
5984 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5989 case Instruction::And:
5993 if (
Idx == 1 && ImmBitWidth == 64 && Imm.isIntN(32))
5996 if (
Idx == 1 && ImmBitWidth == 64 && ST->is64Bit() && ST->hasBMI() &&
6001 case Instruction::Add:
6002 case Instruction::Sub:
6004 if (
Idx == 1 && ImmBitWidth == 64 && Imm.getZExtValue() == 0x80000000)
6008 case Instruction::UDiv:
6009 case Instruction::SDiv:
6010 case Instruction::URem:
6011 case Instruction::SRem:
6016 case Instruction::Mul:
6017 case Instruction::Or:
6018 case Instruction::Xor:
6022 case Instruction::Shl:
6023 case Instruction::LShr:
6024 case Instruction::AShr:
6028 case Instruction::Trunc:
6029 case Instruction::ZExt:
6030 case Instruction::SExt:
6031 case Instruction::IntToPtr:
6032 case Instruction::PtrToInt:
6033 case Instruction::BitCast:
6034 case Instruction::PHI:
6035 case Instruction::Call:
6036 case Instruction::Select:
6037 case Instruction::Ret:
6038 case Instruction::Load:
6042 if (
Idx == ImmIdx) {
6067 case Intrinsic::sadd_with_overflow:
6068 case Intrinsic::uadd_with_overflow:
6069 case Intrinsic::ssub_with_overflow:
6070 case Intrinsic::usub_with_overflow:
6071 case Intrinsic::smul_with_overflow:
6072 case Intrinsic::umul_with_overflow:
6073 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
6076 case Intrinsic::experimental_stackmap:
6077 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6080 case Intrinsic::experimental_patchpoint_void:
6081 case Intrinsic::experimental_patchpoint:
6082 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6098int X86TTIImpl::getGatherOverhead()
const {
6111int X86TTIImpl::getScatterOverhead()
const {
6125 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
6126 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
6136 if (IndexSize < 64 || !
GEP)
6139 unsigned NumOfVarIndices = 0;
6140 const Value *Ptrs =
GEP->getPointerOperand();
6143 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
6144 if (isa<Constant>(
GEP->getOperand(
I)))
6146 Type *IndxTy =
GEP->getOperand(
I)->getType();
6147 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
6148 IndxTy = IndexVTy->getElementType();
6150 !isa<SExtInst>(
GEP->getOperand(
I))) ||
6151 ++NumOfVarIndices > 1)
6154 return (
unsigned)32;
6159 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
6160 ? getIndexSizeInBits(
Ptr,
DL)
6168 *std::max(IdxsLT.first, SrcLT.first).getValue();
6169 if (SplitFactor > 1) {
6173 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
6183 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
6184 : getScatterOverhead();
6192 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
6195 if ((Opcode == Instruction::Load &&
6198 Align(Alignment)))) ||
6199 (Opcode == Instruction::Store &&
6202 Align(Alignment)))))
6208 if (!PtrTy &&
Ptr->getType()->isVectorTy())
6209 PtrTy = dyn_cast<PointerType>(
6210 cast<VectorType>(
Ptr->getType())->getElementType());
6211 assert(PtrTy &&
"Unexpected type for Ptr argument");
6213 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
6229 return ST->hasMacroFusion() || ST->hasBranchFusion();
6236 if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->
getNumElements() == 1)
6248 if (ScalarTy->
isHalfTy() && ST->hasBWI())
6258 return IntWidth == 32 || IntWidth == 64 ||
6259 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
6271 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
6288 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6310 if (!isa<VectorType>(DataTy))
6320 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6329 return IntWidth == 32 || IntWidth == 64 ||
6330 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6337bool X86TTIImpl::supportsGather()
const {
6351 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6352 return NumElts == 1 ||
6353 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6368 return IntWidth == 32 || IntWidth == 64;
6372 if (!supportsGather() || !ST->preferGather())
6387 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6388 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6393 for (
int Lane : seq<int>(0, NumElements)) {
6394 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6396 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6398 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6402 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6404 return ST->
hasSSE3() && NumElements % 4 == 0;
6406 return ST->
hasSSE3() && NumElements % 2 == 0;
6412 if (!ST->
hasAVX512() || !ST->preferScatter())
6425 if (
I->getOpcode() == Instruction::FDiv)
6441 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6443 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6446 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6447 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6448 if (RealCallerBits == RealCalleeBits)
6453 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6457 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6459 if (CB->isInlineAsm())
6463 for (
Value *Arg : CB->args())
6464 Types.push_back(Arg->getType());
6465 if (!CB->getType()->isVoidTy())
6466 Types.push_back(CB->getType());
6469 auto IsSimpleTy = [](
Type *Ty) {
6470 return !Ty->isVectorTy() && !Ty->isAggregateType();
6472 if (
all_of(Types, IsSimpleTy))
6475 if (
Function *NestedCallee = CB->getCalledFunction()) {
6477 if (NestedCallee->isIntrinsic())
6512 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6521 Options.AllowOverlappingLoads =
true;
6526 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6527 Options.LoadSizes.push_back(64);
6528 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6529 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6531 if (ST->is64Bit()) {
6532 Options.LoadSizes.push_back(8);
6534 Options.LoadSizes.push_back(4);
6535 Options.LoadSizes.push_back(2);
6536 Options.LoadSizes.push_back(1);
6541 return supportsGather();
6552 return !(ST->isAtom());
6572 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6578 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6591 if (UseMaskedMemOp) {
6593 for (
unsigned Index : Indices) {
6594 assert(Index < Factor &&
"Invalid index for interleaved memory op");
6595 for (
unsigned Elm = 0; Elm < VF; Elm++)
6596 DemandedLoadStoreElts.
setBit(Index + Elm * Factor);
6603 UseMaskForGaps ? DemandedLoadStoreElts
6612 if (UseMaskForGaps) {
6618 if (Opcode == Instruction::Load) {
6625 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6626 {3, MVT::v16i8, 12},
6627 {3, MVT::v32i8, 14},
6628 {3, MVT::v64i8, 22},
6631 if (
const auto *Entry =
6633 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6645 unsigned NumOfLoadsInInterleaveGrp =
6646 Indices.
size() ? Indices.
size() : Factor;
6655 unsigned NumOfUnfoldedLoads =
6656 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6659 unsigned NumOfShufflesPerResult =
6660 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6667 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6670 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6677 assert(Opcode == Instruction::Store &&
6678 "Expected Store Instruction at this point");
6680 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6681 {3, MVT::v16i8, 12},
6682 {3, MVT::v32i8, 14},
6683 {3, MVT::v64i8, 26},
6686 {4, MVT::v16i8, 11},
6687 {4, MVT::v32i8, 14},
6691 if (
const auto *Entry =
6693 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6698 unsigned NumOfSources = Factor;
6701 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6705 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6708 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6716 bool UseMaskForCond,
bool UseMaskForGaps) {
6717 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6719 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6720 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6725 return ST->hasBWI();
6727 return ST->hasBF16();
6730 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6732 Opcode, VecTy, Factor, Indices, Alignment,
6735 if (UseMaskForCond || UseMaskForGaps)
6738 UseMaskForCond, UseMaskForGaps);
6758 unsigned VF = VecTy->getNumElements() / Factor;
6759 Type *ScalarTy = VecTy->getElementType();
6791 {2, MVT::v16i16, 9},
6792 {2, MVT::v32i16, 18},
6795 {2, MVT::v16i32, 8},
6796 {2, MVT::v32i32, 16},
6800 {2, MVT::v16i64, 16},
6801 {2, MVT::v32i64, 32},
6806 {3, MVT::v16i8, 11},
6807 {3, MVT::v32i8, 14},
6812 {3, MVT::v16i16, 28},
6813 {3, MVT::v32i16, 56},
6818 {3, MVT::v16i32, 14},
6819 {3, MVT::v32i32, 32},
6823 {3, MVT::v8i64, 10},
6824 {3, MVT::v16i64, 20},
6829 {4, MVT::v16i8, 24},
6830 {4, MVT::v32i8, 56},
6833 {4, MVT::v4i16, 17},
6834 {4, MVT::v8i16, 33},
6835 {4, MVT::v16i16, 75},
6836 {4, MVT::v32i16, 150},
6840 {4, MVT::v8i32, 16},
6841 {4, MVT::v16i32, 32},
6842 {4, MVT::v32i32, 68},
6846 {4, MVT::v8i64, 20},
6847 {4, MVT::v16i64, 40},
6852 {6, MVT::v16i8, 43},
6853 {6, MVT::v32i8, 82},
6855 {6, MVT::v2i16, 13},
6857 {6, MVT::v8i16, 39},
6858 {6, MVT::v16i16, 106},
6859 {6, MVT::v32i16, 212},
6862 {6, MVT::v4i32, 15},
6863 {6, MVT::v8i32, 31},
6864 {6, MVT::v16i32, 64},
6867 {6, MVT::v4i64, 18},
6868 {6, MVT::v8i64, 36},
6873 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6887 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6892 {2, MVT::v16i16, 4},
6893 {2, MVT::v32i16, 8},
6897 {2, MVT::v16i32, 8},
6898 {2, MVT::v32i32, 16},
6903 {2, MVT::v16i64, 16},
6904 {2, MVT::v32i64, 32},
6909 {3, MVT::v16i8, 11},
6910 {3, MVT::v32i8, 13},
6914 {3, MVT::v8i16, 12},
6915 {3, MVT::v16i16, 27},
6916 {3, MVT::v32i16, 54},
6920 {3, MVT::v8i32, 11},
6921 {3, MVT::v16i32, 22},
6922 {3, MVT::v32i32, 48},
6926 {3, MVT::v8i64, 12},
6927 {3, MVT::v16i64, 24},
6933 {4, MVT::v32i8, 12},
6937 {4, MVT::v8i16, 10},
6938 {4, MVT::v16i16, 32},
6939 {4, MVT::v32i16, 64},
6943 {4, MVT::v8i32, 16},
6944 {4, MVT::v16i32, 32},
6945 {4, MVT::v32i32, 64},
6949 {4, MVT::v8i64, 20},
6950 {4, MVT::v16i64, 40},
6955 {6, MVT::v16i8, 27},
6956 {6, MVT::v32i8, 90},
6958 {6, MVT::v2i16, 10},
6959 {6, MVT::v4i16, 15},
6960 {6, MVT::v8i16, 21},
6961 {6, MVT::v16i16, 58},
6962 {6, MVT::v32i16, 90},
6965 {6, MVT::v4i32, 12},
6966 {6, MVT::v8i32, 33},
6967 {6, MVT::v16i32, 66},
6970 {6, MVT::v4i64, 15},
6971 {6, MVT::v8i64, 30},
6974 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6985 if (Opcode == Instruction::Load) {
6986 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6990 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6994 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6996 return GetDiscountedCost(Entry);
6999 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
7001 return GetDiscountedCost(Entry);
7004 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
7006 return GetDiscountedCost(Entry);
7008 assert(Opcode == Instruction::Store &&
7009 "Expected Store Instruction at this point");
7011 "Interleaved store only supports fully-interleaved groups.");
7013 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
7015 return MemOpCosts + Entry->Cost;
7018 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
7020 return MemOpCosts + Entry->Cost;
7025 UseMaskForCond, UseMaskForGaps);
7030 bool HasBaseReg, int64_t Scale,
7031 unsigned AddrSpace)
const {
7059 return AM.
Scale != 0;
7073 if (ST->hasXOP() && (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64))
7078 if (ST->
hasAVX2() && (Bits == 32 || Bits == 64))
7082 if (ST->hasBWI() && Bits == 16)
7091 Type *ScalarValTy)
const {
7092 if (ST->hasF16C() && ScalarMemTy->
isHalfTy()) {
7106 if (
I->getOpcode() == Instruction::Mul &&
7108 for (
auto &
Op :
I->operands()) {
7110 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
7118 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
7127 return !Ops.
empty();
7133 int ShiftAmountOpNum = -1;
7135 ShiftAmountOpNum = 1;
7136 else if (
auto *
II = dyn_cast<IntrinsicInst>(
I)) {
7137 if (
II->getIntrinsicID() == Intrinsic::fshl ||
7138 II->getIntrinsicID() == Intrinsic::fshr)
7139 ShiftAmountOpNum = 2;
7142 if (ShiftAmountOpNum == -1)
7145 auto *Shuf = dyn_cast<ShuffleVectorInst>(
I->getOperand(ShiftAmountOpNum));
7148 Ops.
push_back(&
I->getOperandUse(ShiftAmountOpNum));
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isVectorShiftByScalarCheap(Type *Ty) const
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getBranchMispredictPenalty() const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMULO
Same for multiplication.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.