63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
165 bool Vector = (ClassID == 1);
172 if (!
Vector && ST->hasEGPR())
188 auto *VTy = dyn_cast<FixedVectorType>(Ty);
189 if (!Ty->
isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
192 switch (cast<IntegerType>(ScalarTy)->
getBitWidth()) {
209 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
211 if (ST->
hasAVX() && PreferVectorWidth >= 256)
213 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
254 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
271 assert(ISD &&
"Invalid opcode");
273 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
274 (LT.second.getScalarType() == MVT::i32 ||
275 LT.second.getScalarType() == MVT::i64)) {
277 bool Op1Signed =
false, Op2Signed =
false;
280 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
281 bool SignedMode = Op1Signed || Op2Signed;
286 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
287 LT.second.getScalarType() == MVT::i32) {
289 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
291 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
292 bool Op1Sext = isa<SExtInst>(Args[0]) &&
293 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
294 bool Op2Sext = isa<SExtInst>(Args[1]) &&
295 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
297 bool IsZeroExtended = !Op1Signed || !Op2Signed;
298 bool IsConstant = Op1Constant || Op2Constant;
299 bool IsSext = Op1Sext || Op2Sext;
300 if (IsConstant || IsZeroExtended || IsSext)
308 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
311 if (!SignedMode && OpMinSize <= 8)
315 if (!SignedMode && OpMinSize <= 16)
322 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
375 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
376 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
377 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
378 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
379 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
380 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
381 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
382 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
383 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
387 if (
const auto *Entry =
389 if (
auto KindCost = Entry->Cost[
CostKind])
390 return LT.first * *KindCost;
393 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
394 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
395 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
396 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
397 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
398 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
399 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
400 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
401 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
403 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
404 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
405 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
406 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
407 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
408 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
412 if (
const auto *Entry =
414 if (
auto KindCost = Entry->Cost[
CostKind])
415 return LT.first * *KindCost;
418 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
419 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
420 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
422 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
423 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
424 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
426 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
427 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
428 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
429 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
430 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
431 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
433 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
434 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
435 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
436 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
437 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
438 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
439 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
448 if (
const auto *Entry =
450 if (
auto KindCost = Entry->Cost[
CostKind])
451 return LT.first * *KindCost;
454 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
455 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
456 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
457 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
458 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
459 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
461 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
462 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
463 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
464 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
465 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
466 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
468 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
469 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
470 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
471 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
472 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
473 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
475 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
476 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
477 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
478 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
479 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
480 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
489 if (
const auto *Entry =
491 if (
auto KindCost = Entry->Cost[
CostKind])
492 return LT.first * *KindCost;
495 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
496 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
497 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
498 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
499 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
500 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
502 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
503 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
504 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
505 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
506 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
507 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
509 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
510 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
511 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
512 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
513 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
514 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
516 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
517 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
518 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
519 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
520 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
521 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
531 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
532 if (
const auto *Entry =
534 if (
auto KindCost = Entry->Cost[
CostKind])
535 return LT.first * *KindCost;
538 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
539 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
540 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
542 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
543 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
544 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
546 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
547 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
548 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
550 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
551 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
552 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
562 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
563 if (
const auto *Entry =
565 if (
auto KindCost = Entry->Cost[
CostKind])
566 return LT.first * *KindCost;
581 if (
const auto *Entry =
583 if (
auto KindCost = Entry->Cost[
CostKind])
584 return LT.first * *KindCost;
604 if (
const auto *Entry =
606 if (
auto KindCost = Entry->Cost[
CostKind])
607 return LT.first * *KindCost;
627 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
628 if (
auto KindCost = Entry->Cost[
CostKind])
629 return LT.first * *KindCost;
649 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
650 if (
auto KindCost = Entry->Cost[
CostKind])
651 return LT.first * *KindCost;
659 if (
const auto *Entry =
661 if (
auto KindCost = Entry->Cost[
CostKind])
662 return LT.first * *KindCost;
682 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
683 if (
auto KindCost = Entry->Cost[
CostKind])
684 return LT.first * *KindCost;
687 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
688 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
689 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
690 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
691 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
692 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
693 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
694 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
695 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
697 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
698 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
699 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
703 if (
const auto *Entry =
705 if (
auto KindCost = Entry->Cost[
CostKind])
706 return LT.first * *KindCost;
709 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
710 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
711 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
713 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
714 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
715 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
717 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
718 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
719 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
720 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
721 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
722 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
723 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
727 if (
const auto *Entry =
729 if (
auto KindCost = Entry->Cost[
CostKind])
730 return LT.first * *KindCost;
734 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
735 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
736 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
737 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
738 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
739 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
741 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
742 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
743 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
744 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
745 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
746 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
748 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
749 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
750 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
751 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
752 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
753 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
755 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
756 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
757 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
758 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
759 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
760 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
764 if (
const auto *Entry =
766 if (
auto KindCost = Entry->Cost[
CostKind])
767 return LT.first * *KindCost;
770 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
771 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
772 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
773 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
774 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
775 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
777 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
778 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
779 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
780 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
781 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
782 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
784 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
785 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
786 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
787 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
788 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
789 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
791 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
792 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
793 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
794 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
795 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
796 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
801 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
802 if (
const auto *Entry =
804 if (
auto KindCost = Entry->Cost[
CostKind])
805 return LT.first * *KindCost;
809 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
810 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
811 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
813 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
814 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
815 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
817 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
818 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
819 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
821 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
822 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
823 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
827 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
828 if (
const auto *Entry =
830 if (
auto KindCost = Entry->Cost[
CostKind])
831 return LT.first * *KindCost;
834 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
835 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
836 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
841 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
842 if (
auto KindCost = Entry->Cost[
CostKind])
843 return LT.first * *KindCost;
846 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
847 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
848 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
849 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
850 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
851 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
852 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
853 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
854 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
856 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
857 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
858 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
859 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
860 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
861 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
862 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
863 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
864 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
866 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
867 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
869 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
870 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
871 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
872 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
874 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
875 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
877 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
878 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
879 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
880 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
882 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
883 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
884 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
885 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
890 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
891 if (
auto KindCost = Entry->Cost[
CostKind])
892 return LT.first * *KindCost;
895 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
896 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
897 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
899 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
900 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
901 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
903 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
904 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
905 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
906 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
907 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
908 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
909 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
910 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
911 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
913 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
914 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
915 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
916 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
918 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
919 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
920 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
921 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
923 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
924 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
926 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
927 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
929 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
930 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
931 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
932 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
934 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
935 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
936 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
937 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
939 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
940 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
941 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
942 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
944 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
945 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
946 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
947 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
952 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
953 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
954 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
955 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
956 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
957 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
958 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
959 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
962 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
963 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
964 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
965 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
967 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
968 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
969 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
970 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
971 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
972 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
973 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
974 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
977 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
978 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
979 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
980 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
984 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
985 if (
auto KindCost = Entry->Cost[
CostKind])
986 return LT.first * *KindCost;
991 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
992 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
993 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
994 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
995 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
996 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
997 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
998 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
999 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
1000 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1012 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1013 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1020 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1021 if (
auto KindCost = Entry->Cost[
CostKind])
1022 return LT.first * *KindCost;
1027 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1028 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1029 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1030 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1031 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1032 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1033 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1034 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1035 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1036 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1037 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1038 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1040 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1041 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1042 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1043 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1044 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1045 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1046 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1047 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1048 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1049 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1050 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1051 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1061 if (
const auto *Entry =
1063 if (
auto KindCost = Entry->Cost[
CostKind])
1064 return LT.first * *KindCost;
1071 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1072 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1077 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1078 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1079 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1080 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1083 if (ST->useGLMDivSqrtCosts())
1084 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1085 if (
auto KindCost = Entry->Cost[
CostKind])
1086 return LT.first * *KindCost;
1089 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1090 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1091 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1092 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1093 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1094 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1095 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1096 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1097 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1098 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1099 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1100 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1106 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1108 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1109 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1112 if (ST->useSLMArithCosts())
1113 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1114 if (
auto KindCost = Entry->Cost[
CostKind])
1115 return LT.first * *KindCost;
1118 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1119 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1120 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1121 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1123 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1124 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1125 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1126 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1128 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1129 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1130 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1131 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1132 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1133 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1135 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1136 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1137 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1138 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1139 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1140 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1141 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1142 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1144 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1145 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1146 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1147 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1148 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1149 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1150 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1154 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1155 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1157 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1158 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1159 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1160 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1161 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1162 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1164 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1165 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1166 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1167 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1168 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1169 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1171 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1172 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1173 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1174 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1175 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1176 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1178 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1179 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1180 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1181 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1182 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1183 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1188 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1189 if (
auto KindCost = Entry->Cost[
CostKind])
1190 return LT.first * *KindCost;
1196 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1197 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1198 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1199 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1200 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1201 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1203 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1204 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1205 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1206 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1208 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1209 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1210 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1211 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1213 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1214 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1215 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1216 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1218 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1219 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1220 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1221 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1222 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1223 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1224 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1225 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1226 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1227 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1229 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1230 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1231 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1232 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1233 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1234 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1235 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1236 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1238 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1239 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1240 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1241 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1242 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1243 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1244 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1245 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1247 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1248 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1249 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1250 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1251 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1252 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1253 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1254 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1256 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1257 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1259 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1260 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1261 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1262 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1263 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1264 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1266 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1267 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1268 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1269 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1270 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1271 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1273 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1274 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1275 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1276 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1277 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1278 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1280 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1281 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1282 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1283 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1284 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1285 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1289 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1290 if (
auto KindCost = Entry->Cost[
CostKind])
1291 return LT.first * *KindCost;
1294 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1295 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1296 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1297 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1299 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1300 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1301 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1302 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1304 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1305 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1306 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1307 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1309 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1310 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1311 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1312 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1314 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1318 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1319 if (
auto KindCost = Entry->Cost[
CostKind])
1320 return LT.first * *KindCost;
1323 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1324 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1325 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1327 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1328 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1329 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1330 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1332 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1333 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1334 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1335 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1337 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1341 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1342 if (
auto KindCost = Entry->Cost[
CostKind])
1343 return LT.first * *KindCost;
1346 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1350 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1351 if (
auto KindCost = Entry->Cost[
CostKind])
1352 return LT.first * *KindCost;
1357 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1358 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1359 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1360 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1362 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1363 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1364 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1365 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1367 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1368 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1369 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1370 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1372 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1373 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1374 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1375 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1377 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1378 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1379 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1380 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1382 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1383 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1384 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1385 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1387 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1388 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1390 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1391 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1392 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1393 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1397 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1398 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1399 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1400 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1402 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1403 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1404 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1405 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1407 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1408 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1409 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1411 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1412 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1413 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1415 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1416 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1420 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1421 if (
auto KindCost = Entry->Cost[
CostKind])
1422 return LT.first * *KindCost;
1425 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1426 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1428 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1429 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1431 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1432 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1434 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1435 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1437 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1438 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1442 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1443 if (
auto KindCost = Entry->Cost[
CostKind])
1444 return LT.first * *KindCost;
1449 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1454 if (
auto KindCost = Entry->Cost[
CostKind])
1455 return LT.first * *KindCost;
1466 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1467 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1468 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1470 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1471 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1472 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1473 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1474 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1478 if (
auto KindCost = Entry->Cost[
CostKind])
1479 return LT.first * *KindCost;
1493 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1542 CostKind, Mask.size() / 2, BaseTp);
1555 using namespace PatternMatch;
1558 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1563 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1564 LT.second = LT.second.changeVectorElementType(MVT::f16);
1569 int NumElts = LT.second.getVectorNumElements();
1570 if ((
Index % NumElts) == 0)
1573 if (SubLT.second.isVector()) {
1574 int NumSubElts = SubLT.second.getVectorNumElements();
1575 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1583 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1584 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1585 (NumSubElts % OrigSubElts) == 0 &&
1586 LT.second.getVectorElementType() ==
1587 SubLT.second.getVectorElementType() &&
1588 LT.second.getVectorElementType().getSizeInBits() ==
1590 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1591 "Unexpected number of elements!");
1593 LT.second.getVectorNumElements());
1595 SubLT.second.getVectorNumElements());
1604 return ExtractCost + 1;
1607 "Unexpected vector size");
1609 return ExtractCost + 2;
1620 int NumElts = LT.second.getVectorNumElements();
1622 if (SubLT.second.isVector()) {
1623 int NumSubElts = SubLT.second.getVectorNumElements();
1624 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1637 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1668 if (
const auto *Entry =
1677 MVT LegalVT = LT.second;
1682 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1686 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1693 if (!Mask.empty() && NumOfDests.
isValid()) {
1711 unsigned E = *NumOfDests.
getValue();
1712 unsigned NormalizedVF =
1718 unsigned PrevSrcReg = 0;
1722 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1723 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1728 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1729 PrevRegMask != RegMask)
1737 if (SrcReg != DestReg &&
1742 PrevSrcReg = SrcReg;
1743 PrevRegMask = RegMask;
1756 std::nullopt,
CostKind, 0,
nullptr);
1767 LT.first = NumOfDests * NumOfShufflesPerDest;
1783 if (
const auto *Entry =
1785 return LT.first * Entry->Cost;
1818 if (
const auto *Entry =
1820 return LT.first * Entry->Cost;
1897 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1898 if (
auto KindCost = Entry->Cost[
CostKind])
1899 return LT.first * *KindCost;
1952 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1953 return LT.first * Entry->Cost;
1974 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1975 return LT.first * Entry->Cost;
2037 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2038 return LT.first * Entry->Cost;
2051 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2052 return LT.first * Entry->Cost;
2083 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2084 return LT.first * Entry->Cost;
2140 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2142 if (
const auto *Entry =
2145 LT.second.getVectorElementCount()) &&
2146 "Table entry missing from isLegalBroadcastLoad()");
2147 return LT.first * Entry->Cost;
2150 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2151 return LT.first * Entry->Cost;
2164 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2165 return LT.first * Entry->Cost;
2176 assert(ISD &&
"Invalid opcode");
2322 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2323 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2670 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2747 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
2971 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
2989 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2990 if (
auto KindCost = Entry->Cost[
CostKind])
2995 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2996 if (
auto KindCost = Entry->Cost[
CostKind])
3001 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3002 if (
auto KindCost = Entry->Cost[
CostKind])
3008 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3009 if (
auto KindCost = Entry->Cost[
CostKind])
3014 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3015 if (
auto KindCost = Entry->Cost[
CostKind])
3020 SimpleDstTy, SimpleSrcTy))
3021 if (
auto KindCost = Entry->Cost[
CostKind])
3026 SimpleDstTy, SimpleSrcTy))
3027 if (
auto KindCost = Entry->Cost[
CostKind])
3033 SimpleDstTy, SimpleSrcTy))
3034 if (
auto KindCost = Entry->Cost[
CostKind])
3040 SimpleDstTy, SimpleSrcTy))
3041 if (
auto KindCost = Entry->Cost[
CostKind])
3047 SimpleDstTy, SimpleSrcTy))
3048 if (
auto KindCost = Entry->Cost[
CostKind])
3064 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3065 if (
auto KindCost = Entry->Cost[
CostKind])
3066 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3070 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3071 if (
auto KindCost = Entry->Cost[
CostKind])
3072 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3076 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3077 if (
auto KindCost = Entry->Cost[
CostKind])
3078 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3083 LTDest.second, LTSrc.second))
3084 if (
auto KindCost = Entry->Cost[
CostKind])
3085 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3089 LTDest.second, LTSrc.second))
3090 if (
auto KindCost = Entry->Cost[
CostKind])
3091 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3095 LTDest.second, LTSrc.second))
3096 if (
auto KindCost = Entry->Cost[
CostKind])
3097 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3101 LTDest.second, LTSrc.second))
3102 if (
auto KindCost = Entry->Cost[
CostKind])
3103 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3107 LTDest.second, LTSrc.second))
3108 if (
auto KindCost = Entry->Cost[
CostKind])
3109 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3113 LTDest.second, LTSrc.second))
3114 if (
auto KindCost = Entry->Cost[
CostKind])
3115 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3119 LTDest.second, LTSrc.second))
3120 if (
auto KindCost = Entry->Cost[
CostKind])
3121 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3126 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3127 Type *ExtSrc = Src->getWithNewBitWidth(32);
3133 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3143 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3144 Type *TruncDst = Dst->getWithNewBitWidth(32);
3154 return Cost == 0 ? 0 :
N;
3174 MVT MTy = LT.second;
3177 assert(ISD &&
"Invalid opcode");
3180 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3193 Pred = cast<CmpInst>(
I)->getPredicate();
3195 bool CmpWithConstant =
false;
3196 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3197 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3202 ExtraCost = CmpWithConstant ? 0 : 1;
3207 ExtraCost = CmpWithConstant ? 0 : 1;
3213 ExtraCost = CmpWithConstant ? 1 : 2;
3224 ExtraCost = CmpWithConstant ? 2 : 3;
3231 if (CondTy && !ST->
hasAVX())
3400 if (ST->useSLMArithCosts())
3402 if (
auto KindCost = Entry->Cost[
CostKind])
3403 return LT.first * (ExtraCost + *KindCost);
3407 if (
auto KindCost = Entry->Cost[
CostKind])
3408 return LT.first * (ExtraCost + *KindCost);
3412 if (
auto KindCost = Entry->Cost[
CostKind])
3413 return LT.first * (ExtraCost + *KindCost);
3417 if (
auto KindCost = Entry->Cost[
CostKind])
3418 return LT.first * (ExtraCost + *KindCost);
3422 if (
auto KindCost = Entry->Cost[
CostKind])
3423 return LT.first * (ExtraCost + *KindCost);
3427 if (
auto KindCost = Entry->Cost[
CostKind])
3428 return LT.first * (ExtraCost + *KindCost);
3432 if (
auto KindCost = Entry->Cost[
CostKind])
3433 return LT.first * (ExtraCost + *KindCost);
3437 if (
auto KindCost = Entry->Cost[
CostKind])
3438 return LT.first * (ExtraCost + *KindCost);
3442 if (
auto KindCost = Entry->Cost[
CostKind])
3443 return LT.first * (ExtraCost + *KindCost);
3447 if (
auto KindCost = Entry->Cost[
CostKind])
3448 return LT.first * (ExtraCost + *KindCost);
3473 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3474 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3475 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3476 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3477 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3478 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3479 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3480 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3481 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3482 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3483 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3484 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3485 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3486 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3487 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3509 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3510 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3511 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3512 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3513 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3514 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3515 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3516 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3517 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3518 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3519 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3520 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3522 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3523 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3524 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3525 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3526 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3527 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3530 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3531 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3553 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3554 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3555 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3556 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3557 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3558 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3559 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3560 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3561 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3562 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3563 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3564 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3565 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3569 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3570 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3571 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3572 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3573 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3574 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3575 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3576 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3577 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3578 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3579 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3580 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3581 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3582 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3583 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3584 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3585 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3586 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3595 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3596 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3597 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3598 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3600 {
ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3602 {
ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
3607 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3608 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3609 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3610 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3615 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3616 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3617 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3618 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3619 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3620 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3621 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3622 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3623 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3631 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3632 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3633 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3634 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3635 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3636 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3637 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3638 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3639 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3640 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3641 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3642 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3643 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3644 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3645 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3646 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3647 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3648 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3649 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3650 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3651 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3652 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3653 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3654 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3669 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3670 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3671 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3672 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3673 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3674 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3675 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3676 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3677 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3678 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3679 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3680 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3681 {
ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3682 {
ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
3683 {
ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
3684 {
ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
3693 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3694 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3695 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3696 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3697 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3698 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3699 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3700 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3701 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3702 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3703 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3704 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3705 {
ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3706 {
ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
3707 {
ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
3708 {
ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
3735 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3738 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3739 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3755 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3756 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3757 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3758 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3759 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3760 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3761 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3762 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3763 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3764 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3765 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3766 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3767 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3768 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3769 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3770 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3781 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3782 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3783 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3784 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3785 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3786 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3787 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3788 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3803 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3804 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3805 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3806 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3807 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3808 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3809 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3810 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3811 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3812 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3813 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3814 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3815 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3816 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3819 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3820 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3821 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3822 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3823 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3824 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3825 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3826 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3833 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3834 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3835 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3836 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3837 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3838 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3839 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3840 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3841 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3842 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3843 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3844 {
ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
3845 {
ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
3846 {
ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
3847 {
ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
3849 {
ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
3850 {
ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
3862 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3863 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3864 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3865 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3866 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3867 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3868 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3869 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3870 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3871 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3872 {
ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
3873 {
ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
3874 {
ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
3875 {
ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
3876 {
ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
3878 {
ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
3879 {
ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
3893 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
3895 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
3896 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
3899 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
3900 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
3901 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
3902 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
3915 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
3917 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
3918 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
3919 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
3920 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
3921 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
3922 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
3923 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
3924 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
3925 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
3926 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
3927 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
3928 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
3929 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
3930 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
3931 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
3932 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
3933 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
3934 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
3935 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
3936 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
3937 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
3938 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
3939 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
3940 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
3946 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
3947 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
3948 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3949 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3950 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3951 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
3952 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
3953 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3954 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3955 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3956 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3957 {
ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
3958 {
ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
3959 {
ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
3960 {
ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
3962 {
ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
3963 {
ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
3974 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
3975 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
3976 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
3977 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
3978 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
3979 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
3980 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
3981 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
3982 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
3983 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
3984 {
ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
3985 {
ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
3986 {
ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
3987 {
ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
3988 {
ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
3990 {
ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
3991 {
ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
4005 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
4006 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
4008 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
4009 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
4034 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
4036 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
4043 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
4045 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
4053 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
4056 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
4061 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
4062 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4063 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4064 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4065 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4066 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4067 {
ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4068 {
ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4070 {
ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
4075 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
4076 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4077 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4078 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
4079 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4080 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4081 {
ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4082 {
ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4084 {
ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
4087 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
4088 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
4089 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4097 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4098 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4099 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4100 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4101 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4102 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4103 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4104 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4105 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4106 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4107 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4108 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4111 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4112 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4113 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4114 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4119 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4122 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4123 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4124 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4125 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4126 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4127 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4128 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4129 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4130 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4131 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4132 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4133 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4138 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4139 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4140 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4141 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4142 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4143 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4144 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4145 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4146 {
ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4147 {
ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4149 {
ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
4158 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4159 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4160 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4161 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4162 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4163 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4164 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4165 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4166 {
ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4167 {
ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4169 {
ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
4177 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4183 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4186 {
ISD::CTTZ, MVT::i64, { 1, 1, 1, 1 } },
4189 {
ISD::CTTZ, MVT::i32, { 1, 1, 1, 1 } },
4190 {
ISD::CTTZ, MVT::i16, { 2, 1, 1, 1 } },
4194 {
ISD::CTLZ, MVT::i64, { 1, 1, 1, 1 } },
4197 {
ISD::CTLZ, MVT::i32, { 1, 1, 1, 1 } },
4198 {
ISD::CTLZ, MVT::i16, { 2, 1, 1, 1 } },
4210 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4213 {
ISD::CTLZ, MVT::i64, { 3, 2, 6, 6 } },
4215 {
ISD::CTTZ, MVT::i64, { 2, 2, 5, 5 } },
4218 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4219 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4221 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4226 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4227 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4228 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4229 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4236 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4237 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4238 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4244 {
ISD::CTLZ, MVT::i32, { 3, 2, 6, 6 } },
4245 {
ISD::CTLZ, MVT::i16, { 3, 2, 6, 6 } },
4250 {
ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } },
4251 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } },
4259 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4260 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4262 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4263 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4268 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4269 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4283 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4284 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4286 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4287 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4289 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4290 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4292 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4293 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4316 case Intrinsic::abs:
4319 case Intrinsic::bitreverse:
4322 case Intrinsic::bswap:
4325 case Intrinsic::ctlz:
4328 case Intrinsic::ctpop:
4331 case Intrinsic::cttz:
4334 case Intrinsic::fshl:
4338 if (Args[0] == Args[1]) {
4349 case Intrinsic::fshr:
4354 if (Args[0] == Args[1]) {
4365 case Intrinsic::lrint:
4366 case Intrinsic::llrint:
4375 case Intrinsic::maxnum:
4376 case Intrinsic::minnum:
4380 case Intrinsic::sadd_sat:
4383 case Intrinsic::smax:
4386 case Intrinsic::smin:
4389 case Intrinsic::ssub_sat:
4392 case Intrinsic::uadd_sat:
4395 case Intrinsic::umax:
4398 case Intrinsic::umin:
4401 case Intrinsic::usub_sat:
4404 case Intrinsic::sqrt:
4407 case Intrinsic::sadd_with_overflow:
4408 case Intrinsic::ssub_with_overflow:
4411 OpTy =
RetTy->getContainedType(0);
4413 case Intrinsic::uadd_with_overflow:
4414 case Intrinsic::usub_with_overflow:
4417 OpTy =
RetTy->getContainedType(0);
4419 case Intrinsic::smul_with_overflow:
4421 OpTy =
RetTy->getContainedType(0);
4423 case Intrinsic::umul_with_overflow:
4425 OpTy =
RetTy->getContainedType(0);
4430 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4431 std::pair<InstructionCost, MVT> LT,
4434 MVT MTy = LT.second;
4441 return LegalizationCost * 1;
4446 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4448 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4450 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4451 if (LI->hasOneUse())
4458 return LegalizationCost * (int)
Cost;
4463 MVT MTy = LT.second;
4466 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4467 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4470 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4471 if (Cst->isAllOnesValue())
4479 if (ST->useGLMDivSqrtCosts())
4481 if (
auto KindCost = Entry->Cost[
CostKind])
4482 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4484 if (ST->useSLMArithCosts())
4486 if (
auto KindCost = Entry->Cost[
CostKind])
4487 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4490 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4491 if (
auto KindCost = Entry->Cost[
CostKind])
4492 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4494 if (ST->hasBITALG())
4495 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4496 if (
auto KindCost = Entry->Cost[
CostKind])
4497 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4499 if (ST->hasVPOPCNTDQ())
4500 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4501 if (
auto KindCost = Entry->Cost[
CostKind])
4502 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4506 if (
auto KindCost = Entry->Cost[
CostKind])
4507 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4511 if (
auto KindCost = Entry->Cost[
CostKind])
4512 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4516 if (
auto KindCost = Entry->Cost[
CostKind])
4517 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4521 if (
auto KindCost = Entry->Cost[
CostKind])
4522 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4526 if (
auto KindCost = Entry->Cost[
CostKind])
4527 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4531 if (
auto KindCost = Entry->Cost[
CostKind])
4532 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4536 if (
auto KindCost = Entry->Cost[
CostKind])
4537 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4541 if (
auto KindCost = Entry->Cost[
CostKind])
4542 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4546 if (
auto KindCost = Entry->Cost[
CostKind])
4547 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4551 if (
auto KindCost = Entry->Cost[
CostKind])
4552 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4556 if (
auto KindCost = Entry->Cost[
CostKind])
4557 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4561 if (
auto KindCost = Entry->Cost[
CostKind])
4562 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4567 if (
auto KindCost = Entry->Cost[
CostKind])
4568 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4571 if (
auto KindCost = Entry->Cost[
CostKind])
4572 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4575 if (ST->hasLZCNT()) {
4578 if (
auto KindCost = Entry->Cost[
CostKind])
4579 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4582 if (
auto KindCost = Entry->Cost[
CostKind])
4583 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4586 if (ST->hasPOPCNT()) {
4589 if (
auto KindCost = Entry->Cost[
CostKind])
4590 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4593 if (
auto KindCost = Entry->Cost[
CostKind])
4594 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4599 if (
auto KindCost = Entry->Cost[
CostKind])
4600 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4603 if (
auto KindCost = Entry->Cost[
CostKind])
4604 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4627 if (
Index == -1U && (Opcode == Instruction::ExtractElement ||
4628 Opcode == Instruction::InsertElement)) {
4633 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4638 if (Opcode == Instruction::ExtractElement) {
4644 if (Opcode == Instruction::InsertElement) {
4652 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
4653 Opcode == Instruction::InsertElement)) {
4655 if (Opcode == Instruction::ExtractElement &&
4657 cast<FixedVectorType>(Val)->getNumElements() > 1)
4664 if (!LT.second.isVector())
4668 unsigned SizeInBits = LT.second.getSizeInBits();
4669 unsigned NumElts = LT.second.getVectorNumElements();
4670 unsigned SubNumElts = NumElts;
4675 if (SizeInBits > 128) {
4676 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4677 unsigned NumSubVecs = SizeInBits / 128;
4678 SubNumElts = NumElts / NumSubVecs;
4679 if (SubNumElts <=
Index) {
4680 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4681 Index %= SubNumElts;
4685 MVT MScalarTy = LT.second.getScalarType();
4686 auto IsCheapPInsrPExtrInsertPS = [&]() {
4689 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4691 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4692 Opcode == Instruction::InsertElement);
4700 (Opcode != Instruction::InsertElement || !Op0 ||
4701 isa<UndefValue>(Op0)))
4702 return RegisterFileMoveCost;
4704 if (Opcode == Instruction::InsertElement &&
4705 isa_and_nonnull<UndefValue>(Op0)) {
4707 if (isa_and_nonnull<LoadInst>(Op1))
4708 return RegisterFileMoveCost;
4709 if (!IsCheapPInsrPExtrInsertPS()) {
4712 return 2 + RegisterFileMoveCost;
4714 return 1 + RegisterFileMoveCost;
4719 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4720 return 1 + RegisterFileMoveCost;
4724 assert(ISD &&
"Unexpected vector opcode");
4725 if (ST->useSLMArithCosts())
4727 return Entry->Cost + RegisterFileMoveCost;
4730 if (IsCheapPInsrPExtrInsertPS())
4731 return 1 + RegisterFileMoveCost;
4740 if (Opcode == Instruction::InsertElement) {
4741 auto *SubTy = cast<VectorType>(Val);
4749 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4753 RegisterFileMoveCost;
4758 bool Insert,
bool Extract,
4761 cast<FixedVectorType>(Ty)->getNumElements() &&
4762 "Vector size mismatch");
4765 MVT MScalarTy = LT.second.getScalarType();
4766 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4769 constexpr unsigned LaneBitWidth = 128;
4770 assert((LegalVectorBitWidth < LaneBitWidth ||
4771 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4774 const int NumLegalVectors = *LT.first.getValue();
4775 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4780 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4782 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4785 if (LegalVectorBitWidth <= LaneBitWidth) {
4801 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4802 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4803 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4804 unsigned NumLegalElts =
4805 LT.second.getVectorNumElements() * NumLegalVectors;
4807 "Vector has been legalized to smaller element count");
4808 assert((NumLegalElts % NumLanesTotal) == 0 &&
4809 "Unexpected elts per lane");
4810 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4812 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4816 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4818 NumEltsPerLane, NumEltsPerLane *
I);
4819 if (LaneEltMask.
isZero())
4830 APInt AffectedLanes =
4833 AffectedLanes, NumLegalVectors,
true);
4834 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4835 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4836 unsigned I = NumLegalLanes * LegalVec + Lane;
4839 if (!AffectedLanes[
I] ||
4840 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4847 }
else if (LT.second.isVector()) {
4858 unsigned NumElts = LT.second.getVectorNumElements();
4861 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
4870 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
4871 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
4872 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
4876 if (LT.second.isVector()) {
4877 unsigned NumLegalElts =
4878 LT.second.getVectorNumElements() * NumLegalVectors;
4880 "Vector has been legalized to smaller element count");
4884 if (LegalVectorBitWidth > LaneBitWidth) {
4885 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4886 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4887 assert((NumLegalElts % NumLanesTotal) == 0 &&
4888 "Unexpected elts per lane");
4889 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4893 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4897 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4899 NumEltsPerLane,
I * NumEltsPerLane);
4900 if (LaneEltMask.
isZero())
4905 LaneTy, LaneEltMask,
false, Extract,
CostKind);
4922 int VF,
const APInt &DemandedDstElts,
4928 auto bailout = [&]() {
4938 unsigned PromEltTyBits = EltTyBits;
4939 switch (EltTyBits) {
4970 int NumDstElements = VF * ReplicationFactor;
4984 if (PromEltTyBits != EltTyBits) {
4990 Instruction::SExt, PromSrcVecTy, SrcVecTy,
4997 ReplicationFactor, VF,
5003 "We expect that the legalization doesn't affect the element width, "
5004 "doesn't coalesce/split elements.");
5007 unsigned NumDstVectors =
5008 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
5017 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
5018 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
5023 return NumDstVectorsDemanded * SingleShuffleCost;
5034 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
5037 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
5038 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
5045 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5055 auto *VTy = dyn_cast<FixedVectorType>(Src);
5060 if (Opcode == Instruction::Store && OpInfo.
isConstant())
5066 if (!VTy || !LT.second.isVector()) {
5068 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
5071 bool IsLoad = Opcode == Instruction::Load;
5073 Type *EltTy = VTy->getElementType();
5078 const unsigned SrcNumElt = VTy->getNumElements();
5081 int NumEltRemaining = SrcNumElt;
5083 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
5085 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
5088 const unsigned XMMBits = 128;
5089 if (XMMBits % EltTyBits != 0)
5093 const int NumEltPerXMM = XMMBits / EltTyBits;
5097 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
5098 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
5100 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
5104 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
5106 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
5107 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
5108 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
5109 "Unless we haven't halved the op size yet, "
5110 "we have less than two op's sized units of work left.");
5112 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
5116 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
5117 "After halving sizes, the vector elt count is no longer a multiple "
5118 "of number of elements per operation?");
5119 auto *CoalescedVecTy =
5120 CurrNumEltPerOp == 1
5124 EltTyBits * CurrNumEltPerOp),
5125 CurrVecTy->getNumElements() / CurrNumEltPerOp);
5128 "coalesciing elements doesn't change vector width.");
5130 while (NumEltRemaining > 0) {
5131 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
5135 if (NumEltRemaining < CurrNumEltPerOp &&
5136 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
5137 CurrOpSizeBytes != 1)
5140 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5143 if (SubVecEltsLeft == 0) {
5144 SubVecEltsLeft += CurrVecTy->getNumElements();
5149 VTy, std::nullopt,
CostKind, NumEltDone(),
5157 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5158 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5159 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5160 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5161 APInt DemandedElts =
5163 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5164 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5174 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5176 else if (CurrOpSizeBytes < 4)
5181 SubVecEltsLeft -= CurrNumEltPerOp;
5182 NumEltRemaining -= CurrNumEltPerOp;
5187 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5196 bool IsLoad = (Instruction::Load == Opcode);
5197 bool IsStore = (Instruction::Store == Opcode);
5199 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5204 unsigned NumElem = SrcVTy->getNumElements();
5212 MaskTy, DemandedElts,
false,
true,
CostKind);
5217 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5219 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5223 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5231 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5233 return Cost + LT.first;
5235 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5236 LT.second.getVectorNumElements() == NumElem)
5253 return Cost + LT.first * (IsLoad ? 2 : 8);
5256 return Cost + LT.first;
5264 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5268 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5270 return getGEPCost(BaseGEP->getSourceElementType(),
5271 BaseGEP->getPointerOperand(), Indices,
nullptr,
5286 const unsigned NumVectorInstToHideOverhead = 10;
5299 return NumVectorInstToHideOverhead;
5309 std::optional<FastMathFlags> FMF,
5350 assert(ISD &&
"Invalid opcode");
5358 if (ST->useSLMArithCosts())
5373 MVT MTy = LT.second;
5375 auto *ValVTy = cast<FixedVectorType>(ValTy);
5388 if (LT.first != 1 && MTy.
isVector() &&
5394 ArithmeticCost *= LT.first - 1;
5397 if (ST->useSLMArithCosts())
5399 return ArithmeticCost + Entry->Cost;
5403 return ArithmeticCost + Entry->Cost;
5407 return ArithmeticCost + Entry->Cost;
5456 if (ValVTy->getElementType()->isIntegerTy(1)) {
5458 if (LT.first != 1 && MTy.
isVector() &&
5464 ArithmeticCost *= LT.first - 1;
5468 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5469 return ArithmeticCost + Entry->Cost;
5472 return ArithmeticCost + Entry->Cost;
5475 return ArithmeticCost + Entry->Cost;
5478 return ArithmeticCost + Entry->Cost;
5483 unsigned NumVecElts = ValVTy->getNumElements();
5484 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5494 if (LT.first != 1 && MTy.
isVector() &&
5500 ReductionCost *= LT.first - 1;
5506 while (NumVecElts > 1) {
5508 unsigned Size = NumVecElts * ScalarSize;
5517 }
else if (
Size == 128) {
5520 if (ValVTy->isFloatingPointTy())
5527 std::nullopt,
CostKind, 0,
nullptr);
5528 }
else if (
Size == 64) {
5531 if (ValVTy->isFloatingPointTy())
5538 std::nullopt,
CostKind, 0,
nullptr);
5544 Instruction::LShr, ShiftTy,
CostKind,
5571 MVT MTy = LT.second;
5575 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5579 "Expected float point or integer vector type.");
5580 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5648 auto *ValVTy = cast<FixedVectorType>(ValTy);
5649 unsigned NumVecElts = ValVTy->getNumElements();
5653 if (LT.first != 1 && MTy.
isVector() &&
5659 MinMaxCost *= LT.first - 1;
5665 return MinMaxCost + Entry->Cost;
5669 return MinMaxCost + Entry->Cost;
5673 return MinMaxCost + Entry->Cost;
5677 return MinMaxCost + Entry->Cost;
5689 while (NumVecElts > 1) {
5691 unsigned Size = NumVecElts * ScalarSize;
5699 }
else if (
Size == 128) {
5708 std::nullopt,
CostKind, 0,
nullptr);
5709 }
else if (
Size == 64) {
5717 std::nullopt,
CostKind, 0,
nullptr);
5770 if (BitSize % 64 != 0)
5771 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5776 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5782 return std::max<InstructionCost>(1,
Cost);
5797 unsigned ImmIdx = ~0U;
5801 case Instruction::GetElementPtr:
5808 case Instruction::Store:
5811 case Instruction::ICmp:
5817 if (
Idx == 1 && Imm.getBitWidth() == 64) {
5818 uint64_t ImmVal = Imm.getZExtValue();
5819 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5824 case Instruction::And:
5828 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.isIntN(32))
5832 case Instruction::Add:
5833 case Instruction::Sub:
5835 if (
Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
5839 case Instruction::UDiv:
5840 case Instruction::SDiv:
5841 case Instruction::URem:
5842 case Instruction::SRem:
5847 case Instruction::Mul:
5848 case Instruction::Or:
5849 case Instruction::Xor:
5853 case Instruction::Shl:
5854 case Instruction::LShr:
5855 case Instruction::AShr:
5859 case Instruction::Trunc:
5860 case Instruction::ZExt:
5861 case Instruction::SExt:
5862 case Instruction::IntToPtr:
5863 case Instruction::PtrToInt:
5864 case Instruction::BitCast:
5865 case Instruction::PHI:
5866 case Instruction::Call:
5867 case Instruction::Select:
5868 case Instruction::Ret:
5869 case Instruction::Load:
5873 if (
Idx == ImmIdx) {
5898 case Intrinsic::sadd_with_overflow:
5899 case Intrinsic::uadd_with_overflow:
5900 case Intrinsic::ssub_with_overflow:
5901 case Intrinsic::usub_with_overflow:
5902 case Intrinsic::smul_with_overflow:
5903 case Intrinsic::umul_with_overflow:
5904 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
5907 case Intrinsic::experimental_stackmap:
5908 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5911 case Intrinsic::experimental_patchpoint_void:
5912 case Intrinsic::experimental_patchpoint:
5913 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
5924 return Opcode == Instruction::PHI ? 0 : 1;
5929int X86TTIImpl::getGatherOverhead()
const {
5942int X86TTIImpl::getScatterOverhead()
const {
5956 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
5957 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
5967 if (IndexSize < 64 || !
GEP)
5970 unsigned NumOfVarIndices = 0;
5971 const Value *Ptrs =
GEP->getPointerOperand();
5974 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
5975 if (isa<Constant>(
GEP->getOperand(
I)))
5977 Type *IndxTy =
GEP->getOperand(
I)->getType();
5978 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
5979 IndxTy = IndexVTy->getElementType();
5981 !isa<SExtInst>(
GEP->getOperand(
I))) ||
5982 ++NumOfVarIndices > 1)
5985 return (
unsigned)32;
5990 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
5991 ? getIndexSizeInBits(
Ptr,
DL)
5999 *std::max(IdxsLT.first, SrcLT.first).getValue();
6000 if (SplitFactor > 1) {
6004 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
6014 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
6015 : getScatterOverhead();
6023 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
6026 if ((Opcode == Instruction::Load &&
6029 Align(Alignment)))) ||
6030 (Opcode == Instruction::Store &&
6033 Align(Alignment)))))
6039 if (!PtrTy &&
Ptr->getType()->isVectorTy())
6040 PtrTy = dyn_cast<PointerType>(
6041 cast<VectorType>(
Ptr->getType())->getElementType());
6042 assert(PtrTy &&
"Unexpected type for Ptr argument");
6044 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
6060 return ST->hasMacroFusion() || ST->hasBranchFusion();
6067 if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->
getNumElements() == 1)
6079 if (ScalarTy->
isHalfTy() && ST->hasBWI())
6089 return IntWidth == 32 || IntWidth == 64 ||
6090 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
6102 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
6119 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6141 if (!isa<VectorType>(DataTy))
6151 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6160 return IntWidth == 32 || IntWidth == 64 ||
6161 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6168bool X86TTIImpl::supportsGather()
const {
6182 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6183 return NumElts == 1 ||
6184 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6199 return IntWidth == 32 || IntWidth == 64;
6203 if (!supportsGather() || !ST->preferGather())
6218 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6219 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6224 for (
int Lane : seq<int>(0, NumElements)) {
6225 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6227 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6229 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6233 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6235 return ST->
hasSSE3() && NumElements % 4 == 0;
6237 return ST->
hasSSE3() && NumElements % 2 == 0;
6243 if (!ST->
hasAVX512() || !ST->preferScatter())
6256 if (
I->getOpcode() == Instruction::FDiv)
6272 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6274 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6277 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6278 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6279 if (RealCallerBits == RealCalleeBits)
6284 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6288 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6290 if (CB->isInlineAsm())
6294 for (
Value *Arg : CB->args())
6295 Types.push_back(Arg->getType());
6296 if (!CB->getType()->isVoidTy())
6297 Types.push_back(CB->getType());
6300 auto IsSimpleTy = [](
Type *Ty) {
6301 return !Ty->isVectorTy() && !Ty->isAggregateType();
6303 if (
all_of(Types, IsSimpleTy))
6306 if (
Function *NestedCallee = CB->getCalledFunction()) {
6308 if (NestedCallee->isIntrinsic())
6343 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6352 Options.AllowOverlappingLoads =
true;
6357 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6358 Options.LoadSizes.push_back(64);
6359 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6360 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6362 if (ST->is64Bit()) {
6363 Options.LoadSizes.push_back(8);
6365 Options.LoadSizes.push_back(4);
6366 Options.LoadSizes.push_back(2);
6367 Options.LoadSizes.push_back(1);
6372 return supportsGather();
6383 return !(ST->isAtom());
6403 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6409 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6422 if (UseMaskedMemOp) {
6424 for (
unsigned Index : Indices) {
6425 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
6426 for (
unsigned Elm = 0; Elm < VF; Elm++)
6427 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
6434 UseMaskForGaps ? DemandedLoadStoreElts
6443 if (UseMaskForGaps) {
6449 if (Opcode == Instruction::Load) {
6456 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6457 {3, MVT::v16i8, 12},
6458 {3, MVT::v32i8, 14},
6459 {3, MVT::v64i8, 22},
6462 if (
const auto *Entry =
6464 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6474 ShuffleKind, SingleMemOpTy, std::nullopt,
CostKind, 0,
nullptr);
6476 unsigned NumOfLoadsInInterleaveGrp =
6477 Indices.
size() ? Indices.
size() : Factor;
6486 unsigned NumOfUnfoldedLoads =
6487 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6490 unsigned NumOfShufflesPerResult =
6491 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6498 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6501 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6508 assert(Opcode == Instruction::Store &&
6509 "Expected Store Instruction at this point");
6511 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6512 {3, MVT::v16i8, 12},
6513 {3, MVT::v32i8, 14},
6514 {3, MVT::v64i8, 26},
6517 {4, MVT::v16i8, 11},
6518 {4, MVT::v32i8, 14},
6522 if (
const auto *Entry =
6524 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6529 unsigned NumOfSources = Factor;
6532 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6536 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6539 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6547 bool UseMaskForCond,
bool UseMaskForGaps) {
6548 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6550 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6551 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6556 return ST->hasBWI();
6558 return ST->hasBF16();
6561 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6563 Opcode, VecTy, Factor, Indices, Alignment,
6566 if (UseMaskForCond || UseMaskForGaps)
6569 UseMaskForCond, UseMaskForGaps);
6589 unsigned VF = VecTy->getNumElements() / Factor;
6590 Type *ScalarTy = VecTy->getElementType();
6622 {2, MVT::v16i16, 9},
6623 {2, MVT::v32i16, 18},
6626 {2, MVT::v16i32, 8},
6627 {2, MVT::v32i32, 16},
6631 {2, MVT::v16i64, 16},
6632 {2, MVT::v32i64, 32},
6637 {3, MVT::v16i8, 11},
6638 {3, MVT::v32i8, 14},
6643 {3, MVT::v16i16, 28},
6644 {3, MVT::v32i16, 56},
6649 {3, MVT::v16i32, 14},
6650 {3, MVT::v32i32, 32},
6654 {3, MVT::v8i64, 10},
6655 {3, MVT::v16i64, 20},
6660 {4, MVT::v16i8, 24},
6661 {4, MVT::v32i8, 56},
6664 {4, MVT::v4i16, 17},
6665 {4, MVT::v8i16, 33},
6666 {4, MVT::v16i16, 75},
6667 {4, MVT::v32i16, 150},
6671 {4, MVT::v8i32, 16},
6672 {4, MVT::v16i32, 32},
6673 {4, MVT::v32i32, 68},
6677 {4, MVT::v8i64, 20},
6678 {4, MVT::v16i64, 40},
6683 {6, MVT::v16i8, 43},
6684 {6, MVT::v32i8, 82},
6686 {6, MVT::v2i16, 13},
6688 {6, MVT::v8i16, 39},
6689 {6, MVT::v16i16, 106},
6690 {6, MVT::v32i16, 212},
6693 {6, MVT::v4i32, 15},
6694 {6, MVT::v8i32, 31},
6695 {6, MVT::v16i32, 64},
6698 {6, MVT::v4i64, 18},
6699 {6, MVT::v8i64, 36},
6704 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6718 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6723 {2, MVT::v16i16, 4},
6724 {2, MVT::v32i16, 8},
6728 {2, MVT::v16i32, 8},
6729 {2, MVT::v32i32, 16},
6734 {2, MVT::v16i64, 16},
6735 {2, MVT::v32i64, 32},
6740 {3, MVT::v16i8, 11},
6741 {3, MVT::v32i8, 13},
6745 {3, MVT::v8i16, 12},
6746 {3, MVT::v16i16, 27},
6747 {3, MVT::v32i16, 54},
6751 {3, MVT::v8i32, 11},
6752 {3, MVT::v16i32, 22},
6753 {3, MVT::v32i32, 48},
6757 {3, MVT::v8i64, 12},
6758 {3, MVT::v16i64, 24},
6764 {4, MVT::v32i8, 12},
6768 {4, MVT::v8i16, 10},
6769 {4, MVT::v16i16, 32},
6770 {4, MVT::v32i16, 64},
6774 {4, MVT::v8i32, 16},
6775 {4, MVT::v16i32, 32},
6776 {4, MVT::v32i32, 64},
6780 {4, MVT::v8i64, 20},
6781 {4, MVT::v16i64, 40},
6786 {6, MVT::v16i8, 27},
6787 {6, MVT::v32i8, 90},
6789 {6, MVT::v2i16, 10},
6790 {6, MVT::v4i16, 15},
6791 {6, MVT::v8i16, 21},
6792 {6, MVT::v16i16, 58},
6793 {6, MVT::v32i16, 90},
6796 {6, MVT::v4i32, 12},
6797 {6, MVT::v8i32, 33},
6798 {6, MVT::v16i32, 66},
6801 {6, MVT::v4i64, 15},
6802 {6, MVT::v8i64, 30},
6805 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6816 if (Opcode == Instruction::Load) {
6817 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6821 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6825 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6827 return GetDiscountedCost(Entry);
6830 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6832 return GetDiscountedCost(Entry);
6835 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6837 return GetDiscountedCost(Entry);
6839 assert(Opcode == Instruction::Store &&
6840 "Expected Store Instruction at this point");
6842 "Interleaved store only supports fully-interleaved groups.");
6844 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6846 return MemOpCosts + Entry->Cost;
6849 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6851 return MemOpCosts + Entry->Cost;
6856 UseMaskForCond, UseMaskForGaps);
6861 bool HasBaseReg, int64_t Scale,
6862 unsigned AddrSpace)
const {
6890 return AM.
Scale != 0;
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getBranchMispredictPenalty() const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMULO
Same for multiplication.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.