519 : ST(&_ST), MRI(&_MRI) {
521 addRulesForGOpcs({G_ADD, G_SUB},
Standard)
531 addRulesForGOpcs({G_UADDO, G_USUBO},
Standard)
535 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE},
Standard)
539 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT},
Standard)
547 bool HasVecMulU64 = ST->hasVectorMulU64();
559 bool hasMulHi = ST->hasScalarMulHiInsts();
560 addRulesForGOpcs({G_UMULH, G_SMULH},
Standard)
565 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32},
Standard)
569 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
570 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32},
Standard)
574 addRulesForGOpcs({G_XOR, G_OR, G_AND},
StandardB)
594 addRulesForGOpcs({G_LSHR},
Standard)
604 addRulesForGOpcs({G_ASHR},
Standard)
614 addRulesForGOpcs({G_FSHR},
Standard)
618 addRulesForGOpcs({G_BSWAP},
Standard)
626 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
627 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
635 addRulesForGOpcs({G_UBFX, G_SBFX},
Standard)
641 addRulesForGOpcs({G_SMIN, G_SMAX},
Standard)
649 addRulesForGOpcs({G_UMIN, G_UMAX},
Standard)
660 addRulesForGOpcs({G_CONSTANT})
663 addRulesForGOpcs({G_FREEZE})
670 addRulesForGOpcs({G_UNMERGE_VALUES})
675 addRulesForGOpcs({G_PHI})
681 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
696 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
697 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
698 G_AMDGPU_INTRIN_IMAGE_STORE,
699 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
714 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
716 addRulesForGOpcs({G_ICMP})
735 addRulesForGOpcs({G_BRCOND})
739 addRulesForGOpcs({G_BR}).
Any({{
_}, {{}, {
None}}});
749 addRulesForGOpcs({G_ANYEXT})
761 bool Has16bitCmp = ST->has16BitInsts();
765 addRulesForGOpcs({G_TRUNC})
782 addRulesForGOpcs({G_ZEXT})
797 addRulesForGOpcs({G_SEXT})
812 addRulesForGOpcs({G_SEXT_INREG})
818 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT},
Standard)
824 addRulesForGOpcs({G_ASSERT_ALIGN},
Standard)
836 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
837 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
838 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
839 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
840 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
848 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
849 bool HasAtomicBufferGlobalPkAddF16Insts =
850 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
851 ST->hasAtomicBufferGlobalPkAddF16Insts();
852 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
853 addRulesForGOpcs({G_ATOMICRMW_FADD})
861 HasAtomicFlatPkAdd16Insts)
863 HasAtomicBufferGlobalPkAddF16Insts)
865 HasAtomicDsPkAdd16Insts);
867 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
873 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
879 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP},
Standard)
885 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
886 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
887 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
888 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
889 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
890 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
891 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
896 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
897 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
898 bool usesTrue16 = ST->useRealTrue16Insts();
901 return (*
MI.memoperands_begin())->getAlign() >=
Align(16);
905 return (*
MI.memoperands_begin())->getAlign() >=
Align(4);
909 return (*
MI.memoperands_begin())->isAtomic();
925 return (*
MI.memoperands_begin())->isVolatile();
929 return (*
MI.memoperands_begin())->isInvariant();
944 return MemSize == 16 || MemSize == 8;
952 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
953 (isConst || isInvMMO || isNoClobberMMO);
957 addRulesForGOpcs({G_LOAD})
1064 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD})
1084 addRulesForGOpcs({G_STORE})
1120 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1121 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1132 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1133 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1139 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1144 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1157 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1166 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1167 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1168 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1169 G_AMDGPU_TBUFFER_STORE_FORMAT,
1170 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1182 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1191 addRulesForGOpcs({G_PTR_ADD})
1197 addRulesForGOpcs({G_INTTOPTR})
1205 addRulesForGOpcs({G_PTRTOINT})
1215 addRulesForGOpcs({G_PTRMASK})
1223 addRulesForGOpcs({G_BITREVERSE},
Standard)
1229 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_UNDEF,
1236 addRulesForGOpcs({G_FENCE}).
Any({{{}}, {{}, {}}});
1238 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER},
Standard)
1243 addRulesForGOpcs({G_GLOBAL_VALUE})
1250 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).
Any({{
UniP5}, {{
SgprP5}, {}}});
1252 addRulesForGOpcs({G_SI_CALL})
1258 bool hasSALUFloat = ST->hasSALUFloatInsts();
1260 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL},
Standard)
1274 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB},
Standard)
1282 addRulesForGOpcs({G_FMAD},
Standard)
1288 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP},
Standard)
1296 addRulesForGOpcs({G_FMA, G_STRICT_FMA},
Standard)
1314 addRulesForGOpcs({G_AMDGPU_FMED3},
Standard)
1323 addRulesForGOpcs({G_AMDGPU_SMED3},
Standard)
1331 addRulesForGOpcs({G_FNEG, G_FABS},
Standard)
1346 addRulesForGOpcs({G_FCANONICALIZE},
Standard)
1358 bool hasPST = ST->hasPseudoScalarTrans();
1359 addRulesForGOpcs({G_FSQRT},
Standard)
1364 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1376 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1388 addRulesForGOpcs({G_FPEXT})
1395 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32},
Standard)
1399 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY},
Standard)
1403 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1405 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM},
Standard)
1417 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1430 addRulesForGOpcs({G_FPTRUNC})
1439 addRulesForGOpcs({G_IS_FPCLASS})
1447 addRulesForGOpcs({G_FCMP},
Standard)
1461 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1477 addRulesForIOpcs({amdgcn_s_setreg})
1480 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1483 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1487 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime},
Standard)
1490 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1491 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1496 addRulesForIOpcs({amdgcn_endpgm,
1499 amdgcn_s_barrier_signal,
1500 amdgcn_s_barrier_wait,
1501 amdgcn_s_monitor_sleep,
1505 amdgcn_s_setprio_inc_wg,
1507 amdgcn_s_ttracedata_imm,
1508 amdgcn_s_wait_asynccnt,
1509 amdgcn_s_wait_bvhcnt,
1510 amdgcn_s_wait_dscnt,
1511 amdgcn_s_wait_event,
1512 amdgcn_s_wait_event_export_ready,
1513 amdgcn_s_wait_expcnt,
1514 amdgcn_s_wait_kmcnt,
1515 amdgcn_s_wait_loadcnt,
1516 amdgcn_s_wait_samplecnt,
1517 amdgcn_s_wait_storecnt,
1518 amdgcn_s_wait_tensorcnt,
1520 amdgcn_wave_barrier})
1521 .
Any({{}, {{}, {}}});
1523 addRulesForIOpcs({amdgcn_init_exec_from_input})
1528 addRulesForIOpcs({amdgcn_s_sleep_var})
1531 addRulesForIOpcs({amdgcn_s_prefetch_data})
1534 addRulesForIOpcs({amdgcn_class})
1543 addRulesForIOpcs({amdgcn_end_cf})
1547 addRulesForIOpcs({amdgcn_if_break},
Standard)
1551 addRulesForIOpcs({amdgcn_exp})
1555 addRulesForIOpcs({amdgcn_exp_compr})
1558 addRulesForIOpcs({amdgcn_exp_row})
1564 addRulesForIOpcs({amdgcn_lds_direct_load},
StandardB)
1567 addRulesForIOpcs({amdgcn_lds_param_load},
Standard)
1570 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi},
Standard)
1573 addRulesForIOpcs({amdgcn_readfirstlane})
1579 addRulesForIOpcs({amdgcn_readlane},
StandardB)
1582 addRulesForIOpcs({amdgcn_writelane},
StandardB)
1587 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16},
Standard)
1592 addRulesForIOpcs({amdgcn_perm},
Standard)
1596 addRulesForIOpcs({amdgcn_wave_reduce_umax, amdgcn_wave_reduce_umin},
Standard)
1602 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz},
Standard)
1608 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1609 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1610 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1615 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24},
Standard)
1621 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1622 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1628 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1634 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract},
Standard)
1642 addRulesForIOpcs({amdgcn_prng_b32})
1646 addRulesForIOpcs({amdgcn_sffbh},
Standard)
1650 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe},
Standard)
1656 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1657 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1662 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1666 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1670 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1674 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num},
Standard)
1677 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1681 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1684 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1688 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1691 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1694 addRulesForIOpcs({amdgcn_global_load_lds})
1697 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1713 addRulesForIOpcs({amdgcn_wqm_demote}).
Any({{}, {{}, {
IntrId,
Vcc}}});
1715 addRulesForIOpcs({amdgcn_inverse_ballot})
1719 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1722 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8},
StandardB)
1726 addRulesForIOpcs({amdgcn_update_dpp},
StandardB)
1730 addRulesForIOpcs({amdgcn_sin, amdgcn_cos},
Standard)
1736 addRulesForIOpcs({amdgcn_trig_preop},
Standard)
1740 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
1745 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume},
Standard)
1750 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn},
Standard)
1753 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn},
Standard)
1756 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn},
Standard)
1759 addRulesForIOpcs({amdgcn_ds_gws_sema_p, amdgcn_ds_gws_sema_v,
1760 amdgcn_ds_gws_sema_release_all})
1764 {amdgcn_ds_gws_barrier, amdgcn_ds_gws_init, amdgcn_ds_gws_sema_br})
1767 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap},
Standard)
1770 addRulesForIOpcs({amdgcn_ds_swizzle},
Standard)
1774 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var},
Standard)
1777 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap},
Standard)
1780 addRulesForIOpcs({amdgcn_permlane64},
StandardB)
1783 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
1786 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
1789 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
1792 addRulesForIOpcs({amdgcn_interp_p1},
Standard)
1795 addRulesForIOpcs({amdgcn_interp_p1_f16},
Standard)
1798 addRulesForIOpcs({amdgcn_interp_p2},
Standard)
1801 addRulesForIOpcs({amdgcn_interp_p2_f16},
Standard)
1805 addRulesForIOpcs({amdgcn_interp_mov},
Standard)
1808 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
1809 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
1814 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
1819 addRulesForIOpcs({amdgcn_div_fmas},
Standard)
1825 addRulesForIOpcs({amdgcn_div_fixup},
Standard)
1833 addRulesForIOpcs({amdgcn_div_scale},
Standard)
1839 addRulesForIOpcs({amdgcn_fdot2, amdgcn_sdot2, amdgcn_udot2},
Standard)
1843 addRulesForIOpcs({amdgcn_fdot2_f16_f16},
Standard)
1847 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8},
Standard)
1854 amdgcn_wmma_f32_16x16x16_f16, amdgcn_wmma_f32_16x16x16_bf16,
1855 amdgcn_wmma_f16_16x16x16_f16, amdgcn_wmma_bf16_16x16x16_bf16,
1856 amdgcn_wmma_f16_16x16x16_f16_tied, amdgcn_wmma_bf16_16x16x16_bf16_tied,
1857 amdgcn_wmma_i32_16x16x16_iu8, amdgcn_wmma_i32_16x16x16_iu4,
1859 amdgcn_wmma_f32_16x16x16_fp8_fp8, amdgcn_wmma_f32_16x16x16_fp8_bf8,
1860 amdgcn_wmma_f32_16x16x16_bf8_fp8, amdgcn_wmma_f32_16x16x16_bf8_bf8,
1861 amdgcn_wmma_i32_16x16x32_iu4,
1863 amdgcn_wmma_f32_16x16x4_f32, amdgcn_wmma_f32_16x16x32_bf16,
1864 amdgcn_wmma_f32_16x16x32_f16, amdgcn_wmma_f16_16x16x32_f16,
1865 amdgcn_wmma_bf16_16x16x32_bf16, amdgcn_wmma_bf16f32_16x16x32_bf16,
1866 amdgcn_wmma_f32_16x16x64_fp8_fp8, amdgcn_wmma_f32_16x16x64_fp8_bf8,
1867 amdgcn_wmma_f32_16x16x64_bf8_fp8, amdgcn_wmma_f32_16x16x64_bf8_bf8,
1868 amdgcn_wmma_f16_16x16x64_fp8_fp8, amdgcn_wmma_f16_16x16x64_fp8_bf8,
1869 amdgcn_wmma_f16_16x16x64_bf8_fp8, amdgcn_wmma_f16_16x16x64_bf8_bf8,
1870 amdgcn_wmma_f16_16x16x128_fp8_fp8, amdgcn_wmma_f16_16x16x128_fp8_bf8,
1871 amdgcn_wmma_f16_16x16x128_bf8_fp8, amdgcn_wmma_f16_16x16x128_bf8_bf8,
1872 amdgcn_wmma_f32_16x16x128_fp8_fp8, amdgcn_wmma_f32_16x16x128_fp8_bf8,
1873 amdgcn_wmma_f32_16x16x128_bf8_fp8, amdgcn_wmma_f32_16x16x128_bf8_bf8,
1874 amdgcn_wmma_i32_16x16x64_iu8, amdgcn_wmma_f32_16x16x128_f8f6f4,
1875 amdgcn_wmma_scale_f32_16x16x128_f8f6f4,
1876 amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, amdgcn_wmma_f32_32x16x128_f4,
1877 amdgcn_wmma_scale_f32_32x16x128_f4, amdgcn_wmma_scale16_f32_32x16x128_f4,
1879 amdgcn_swmmac_f32_16x16x32_f16, amdgcn_swmmac_f32_16x16x32_bf16,
1880 amdgcn_swmmac_f16_16x16x32_f16, amdgcn_swmmac_bf16_16x16x32_bf16,
1881 amdgcn_swmmac_i32_16x16x32_iu8, amdgcn_swmmac_i32_16x16x32_iu4,
1882 amdgcn_swmmac_i32_16x16x64_iu4, amdgcn_swmmac_f32_16x16x32_fp8_fp8,
1883 amdgcn_swmmac_f32_16x16x32_fp8_bf8, amdgcn_swmmac_f32_16x16x32_bf8_fp8,
1884 amdgcn_swmmac_f32_16x16x32_bf8_bf8,
1886 amdgcn_swmmac_f32_16x16x64_f16, amdgcn_swmmac_f32_16x16x64_bf16,
1887 amdgcn_swmmac_f16_16x16x64_f16, amdgcn_swmmac_bf16_16x16x64_bf16,
1888 amdgcn_swmmac_bf16f32_16x16x64_bf16, amdgcn_swmmac_f32_16x16x128_fp8_fp8,
1889 amdgcn_swmmac_f32_16x16x128_fp8_bf8, amdgcn_swmmac_f32_16x16x128_bf8_fp8,
1890 amdgcn_swmmac_f32_16x16x128_bf8_bf8, amdgcn_swmmac_f16_16x16x128_fp8_fp8,
1891 amdgcn_swmmac_f16_16x16x128_fp8_bf8, amdgcn_swmmac_f16_16x16x128_bf8_fp8,
1892 amdgcn_swmmac_f16_16x16x128_bf8_bf8, amdgcn_swmmac_i32_16x16x128_iu8})