530 : ST(&_ST), MRI(&_MRI) {
532 addRulesForGOpcs({G_ADD, G_SUB},
Standard)
542 addRulesForGOpcs({G_UADDO, G_USUBO},
Standard)
546 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE},
Standard)
550 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT},
Standard)
558 bool HasVecMulU64 = ST->hasVMulU64Inst();
570 bool hasMulHi = ST->hasScalarMulHiInsts();
571 addRulesForGOpcs({G_UMULH, G_SMULH},
Standard)
576 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32},
Standard)
580 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
581 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32},
Standard)
585 addRulesForGOpcs({G_XOR, G_OR, G_AND},
StandardB)
605 addRulesForGOpcs({G_LSHR},
Standard)
615 addRulesForGOpcs({G_ASHR},
Standard)
625 addRulesForGOpcs({G_FSHR},
Standard)
629 addRulesForGOpcs({G_BSWAP},
Standard)
637 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
638 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
646 addRulesForGOpcs({G_UBFX, G_SBFX},
Standard)
652 addRulesForGOpcs({G_SMIN, G_SMAX},
Standard)
660 addRulesForGOpcs({G_UMIN, G_UMAX},
Standard)
668 addRulesForGOpcs({G_IMPLICIT_DEF})
673 addRulesForGOpcs({G_CONSTANT},
Standard)
681 addRulesForGOpcs({G_FCONSTANT},
Standard)
686 addRulesForGOpcs({G_FREEZE})
693 addRulesForGOpcs({G_BITCAST})
697 addRulesForGOpcs({G_UNMERGE_VALUES})
702 addRulesForGOpcs({G_BUILD_VECTOR})
708 addRulesForGOpcs({G_MERGE_VALUES, G_CONCAT_VECTORS})
712 addRulesForGOpcs({G_PHI})
718 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
729 addRulesForGOpcs({G_INSERT_VECTOR_ELT})
745 addRulesForGOpcs({G_AMDGPU_BVH_INTERSECT_RAY, G_AMDGPU_BVH_DUAL_INTERSECT_RAY,
746 G_AMDGPU_BVH8_INTERSECT_RAY})
753 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
754 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
755 G_AMDGPU_INTRIN_IMAGE_STORE,
756 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
771 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
773 addRulesForGOpcs({G_ICMP})
792 addRulesForGOpcs({G_BRCOND})
796 addRulesForGOpcs({G_BR}).
Any({{
_}, {{}, {
None}}});
806 addRulesForGOpcs({G_ANYEXT})
818 bool Has16bitCmp = ST->has16BitInsts();
825 addRulesForGOpcs({G_TRUNC})
841 addRulesForGOpcs({G_ZEXT})
856 addRulesForGOpcs({G_SEXT})
871 addRulesForGOpcs({G_SEXT_INREG})
877 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT},
Standard)
883 addRulesForGOpcs({G_ASSERT_ALIGN},
Standard)
895 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
896 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
897 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
898 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
899 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
907 addRulesForGOpcs({G_ATOMICRMW_USUB_SAT, G_ATOMICRMW_USUB_COND})
912 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
913 bool HasAtomicBufferGlobalPkAddF16Insts =
914 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
915 ST->hasAtomicBufferGlobalPkAddF16Insts();
916 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
917 addRulesForGOpcs({G_ATOMICRMW_FADD})
925 HasAtomicFlatPkAdd16Insts)
927 HasAtomicBufferGlobalPkAddF16Insts)
929 HasAtomicDsPkAdd16Insts);
931 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
937 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
943 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP},
Standard)
949 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
950 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
951 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
952 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
953 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
954 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
955 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
960 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
961 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
962 bool usesTrue16 = ST->useRealTrue16Insts();
965 return (*
MI.memoperands_begin())->getAlign() >=
Align(16);
969 return (*
MI.memoperands_begin())->getAlign() >=
Align(4);
973 return (*
MI.memoperands_begin())->isAtomic();
989 return (*
MI.memoperands_begin())->isVolatile();
993 return (*
MI.memoperands_begin())->isInvariant();
1008 return MemSize == 16 || MemSize == 8;
1016 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
1017 (isConst || isInvMMO || isNoClobberMMO);
1021 addRulesForGOpcs({G_LOAD})
1128 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD})
1148 addRulesForGOpcs({G_STORE})
1184 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1185 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1196 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1197 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1203 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1208 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1221 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1230 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1231 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1232 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1233 G_AMDGPU_TBUFFER_STORE_FORMAT,
1234 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1246 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1255 addRulesForGOpcs({G_PTR_ADD})
1261 addRulesForGOpcs({G_INTTOPTR})
1269 addRulesForGOpcs({G_PTRTOINT})
1279 addRulesForGOpcs({G_PTRMASK})
1285 addRulesForGOpcs({G_ABS},
Standard)
1293 addRulesForGOpcs({G_BITREVERSE},
Standard)
1299 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_POISON,
1300 G_CTTZ_ZERO_POISON})
1306 addRulesForGOpcs({G_CTPOP})
1312 addRulesForGOpcs({G_FENCE}).
Any({{{}}, {{}, {}}});
1314 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER},
Standard)
1319 addRulesForGOpcs({G_GLOBAL_VALUE})
1326 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).
Any({{
UniP5}, {{
SgprP5}, {}}});
1328 addRulesForGOpcs({G_SI_CALL})
1334 bool hasSALUFloat = ST->hasSALUFloatInsts();
1336 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL},
Standard)
1350 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB},
Standard)
1358 addRulesForGOpcs({G_FMAD},
Standard)
1364 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP},
Standard)
1372 addRulesForGOpcs({G_FMA, G_STRICT_FMA},
Standard)
1390 addRulesForGOpcs({G_AMDGPU_FMED3},
Standard)
1399 addRulesForGOpcs({G_AMDGPU_SMED3},
Standard)
1407 addRulesForGOpcs({G_FNEG, G_FABS},
Standard)
1422 addRulesForGOpcs({G_FCANONICALIZE},
Standard)
1434 bool hasPST = ST->hasPseudoScalarTrans();
1435 addRulesForGOpcs({G_FSQRT},
Standard)
1440 addRulesForGOpcs({G_FPTOUI, G_FPTOSI, G_FPTOUI_SAT, G_FPTOSI_SAT})
1452 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1464 addRulesForGOpcs({G_AMDGPU_S_BUFFER_PREFETCH})
1467 addRulesForGOpcs({G_FPEXT})
1474 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32},
Standard)
1478 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY},
Standard)
1482 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1484 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM},
Standard)
1496 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM,
1497 G_FMINIMUMNUM, G_FMAXIMUMNUM},
1510 addRulesForGOpcs({G_FPTRUNC})
1519 addRulesForGOpcs({G_IS_FPCLASS})
1527 addRulesForGOpcs({G_FCMP},
Standard)
1541 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1551 addRulesForGOpcs({G_AMDGPU_GLOBAL_LOAD_MONITOR, G_AMDGPU_FLAT_LOAD_MONITOR},
1562 addRulesForIOpcs({returnaddress}).
Any({{
UniP0}, {{
SgprP0}, {}}});
1568 addRulesForIOpcs({amdgcn_s_setreg})
1571 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1574 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1578 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime},
Standard)
1581 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1582 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1587 addRulesForIOpcs({amdgcn_asyncmark,
1591 amdgcn_s_barrier_leave,
1592 amdgcn_s_barrier_signal,
1593 amdgcn_s_barrier_wait,
1594 amdgcn_s_monitor_sleep,
1598 amdgcn_s_setprio_inc_wg,
1600 amdgcn_s_ttracedata_imm,
1601 amdgcn_s_wait_asynccnt,
1602 amdgcn_s_wait_bvhcnt,
1603 amdgcn_s_wait_dscnt,
1604 amdgcn_s_wait_event,
1605 amdgcn_s_wait_event_export_ready,
1606 amdgcn_s_wait_expcnt,
1607 amdgcn_s_wait_kmcnt,
1608 amdgcn_s_wait_loadcnt,
1609 amdgcn_s_wait_samplecnt,
1610 amdgcn_s_wait_storecnt,
1611 amdgcn_s_wait_tensorcnt,
1614 amdgcn_wait_asyncmark,
1615 amdgcn_wave_barrier})
1616 .
Any({{}, {{}, {}}});
1618 addRulesForIOpcs({amdgcn_init_exec_from_input})
1623 addRulesForIOpcs({amdgcn_s_sleep_var})
1626 addRulesForIOpcs({amdgcn_s_barrier_join, amdgcn_s_wakeup_barrier})
1629 addRulesForIOpcs({amdgcn_s_barrier_signal_var, amdgcn_s_barrier_init})
1632 addRulesForIOpcs({amdgcn_s_barrier_signal_isfirst})
1636 {amdgcn_s_get_named_barrier_state, amdgcn_s_get_barrier_state},
Standard)
1639 addRulesForIOpcs({amdgcn_flat_prefetch}).
Any({{}, {{}, {
IntrId,
VgprP0}}});
1641 addRulesForIOpcs({amdgcn_global_prefetch}).
Any({{}, {{}, {
IntrId,
VgprP1}}});
1643 addRulesForIOpcs({amdgcn_s_prefetch_data})
1646 addRulesForIOpcs({amdgcn_class})
1655 addRulesForIOpcs({amdgcn_end_cf})
1659 addRulesForIOpcs({amdgcn_if_break},
Standard)
1663 addRulesForIOpcs({amdgcn_exp})
1667 addRulesForIOpcs({amdgcn_exp_compr})
1670 addRulesForIOpcs({amdgcn_exp_row})
1676 addRulesForIOpcs({amdgcn_lds_direct_load},
StandardB)
1679 addRulesForIOpcs({amdgcn_lds_param_load},
Standard)
1682 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi},
Standard)
1685 addRulesForIOpcs({amdgcn_readfirstlane})
1691 addRulesForIOpcs({amdgcn_readlane},
StandardB)
1694 addRulesForIOpcs({amdgcn_writelane},
StandardB)
1699 addRulesForIOpcs({amdgcn_add_max_i32, amdgcn_add_max_u32, amdgcn_add_min_i32,
1700 amdgcn_add_min_u32},
1705 addRulesForIOpcs({amdgcn_pk_add_max_i16, amdgcn_pk_add_max_u16,
1706 amdgcn_pk_add_min_i16, amdgcn_pk_add_min_u16},
1711 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16},
Standard)
1716 addRulesForIOpcs({amdgcn_permlane_bcast, amdgcn_permlane_up,
1717 amdgcn_permlane_down, amdgcn_permlane_xor},
1723 addRulesForIOpcs({amdgcn_permlane_idx_gen},
Standard)
1726 addRulesForIOpcs({amdgcn_perm},
Standard)
1731 {amdgcn_wave_reduce_add, amdgcn_wave_reduce_and, amdgcn_wave_reduce_fadd,
1732 amdgcn_wave_reduce_fmax, amdgcn_wave_reduce_fmin,
1733 amdgcn_wave_reduce_fsub, amdgcn_wave_reduce_max, amdgcn_wave_reduce_min,
1734 amdgcn_wave_reduce_or, amdgcn_wave_reduce_sub, amdgcn_wave_reduce_umax,
1735 amdgcn_wave_reduce_umin, amdgcn_wave_reduce_xor},
1742 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz},
Standard)
1748 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1749 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1750 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1755 addRulesForIOpcs({amdgcn_rsq, amdgcn_rsq_clamp},
Standard)
1765 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24},
Standard)
1771 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1772 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1778 addRulesForIOpcs({amdgcn_cvt_sr_bf8_f32, amdgcn_cvt_sr_fp8_f32,
1779 amdgcn_cvt_sr_fp8_f32_e5m3, amdgcn_cvt_pk_bf8_f32,
1780 amdgcn_cvt_pk_fp8_f32, amdgcn_cvt_pk_fp8_f32_e5m3},
1785 addRulesForIOpcs({amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8,
1786 amdgcn_cvt_f32_fp8, amdgcn_cvt_f32_fp8_e5m3},
1791 addRulesForIOpcs({amdgcn_cvt_pk_f32_bf8, amdgcn_cvt_pk_f32_fp8})
1795 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1801 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract},
Standard)
1809 addRulesForIOpcs({amdgcn_prng_b32})
1813 addRulesForIOpcs({amdgcn_sffbh},
Standard)
1817 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe},
Standard)
1823 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1824 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1829 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f16,
1830 amdgcn_cvt_scalef32_sr_pk32_fp6_f16,
1831 amdgcn_cvt_scalef32_sr_pk32_bf6_bf16,
1832 amdgcn_cvt_scalef32_sr_pk32_fp6_bf16},
1836 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f32,
1837 amdgcn_cvt_scalef32_sr_pk32_fp6_f32},
1841 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1847 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1853 addRulesForIOpcs({amdgcn_global_load_tr4_b64})
1857 addRulesForIOpcs({amdgcn_global_load_tr6_b96})
1861 addRulesForIOpcs({amdgcn_ds_load_tr4_b64, amdgcn_ds_load_tr8_b64})
1864 addRulesForIOpcs({amdgcn_ds_load_tr6_b96})
1867 addRulesForIOpcs({amdgcn_ds_load_tr16_b128})
1870 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1874 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num},
Standard)
1877 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1881 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1884 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1888 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1891 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1894 addRulesForIOpcs({amdgcn_global_load_lds})
1897 addRulesForIOpcs({amdgcn_global_load_async_to_lds_b8,
1898 amdgcn_global_load_async_to_lds_b32,
1899 amdgcn_global_load_async_to_lds_b64,
1900 amdgcn_global_load_async_to_lds_b128,
1901 amdgcn_global_store_async_from_lds_b8,
1902 amdgcn_global_store_async_from_lds_b32,
1903 amdgcn_global_store_async_from_lds_b64,
1904 amdgcn_global_store_async_from_lds_b128})
1907 addRulesForIOpcs({amdgcn_cluster_load_b32})
1913 addRulesForIOpcs({amdgcn_cluster_load_b64})
1919 addRulesForIOpcs({amdgcn_cluster_load_b128})
1926 addRulesForIOpcs({amdgcn_cluster_load_async_to_lds_b8,
1927 amdgcn_cluster_load_async_to_lds_b32,
1928 amdgcn_cluster_load_async_to_lds_b64,
1929 amdgcn_cluster_load_async_to_lds_b128})
1932 addRulesForIOpcs({amdgcn_perm_pk16_b4_u4},
StandardB)
1936 addRulesForIOpcs({amdgcn_perm_pk16_b6_u4},
StandardB)
1940 addRulesForIOpcs({amdgcn_perm_pk16_b8_u4},
StandardB)
1944 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1960 addRulesForIOpcs({amdgcn_kill, amdgcn_wqm_demote})
1963 addRulesForIOpcs({amdgcn_ballot},
Standard)
1967 addRulesForIOpcs({amdgcn_inverse_ballot})
1971 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1974 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8},
StandardB)
1978 addRulesForIOpcs({amdgcn_update_dpp},
StandardB)
1982 addRulesForIOpcs({amdgcn_sin, amdgcn_cos},
Standard)
1988 addRulesForIOpcs({amdgcn_trig_preop},
Standard)
1992 addRulesForIOpcs({amdgcn_exp2},
Standard)
2000 addRulesForIOpcs({amdgcn_rcp, amdgcn_sqrt},
Standard)
2010 addRulesForIOpcs({amdgcn_log},
Standard)
2018 addRulesForIOpcs({amdgcn_ds_atomic_async_barrier_arrive_b64})
2021 addRulesForIOpcs({amdgcn_ds_atomic_barrier_arrive_rtn_b64},
Standard)
2024 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
2029 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume},
Standard)
2034 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn},
Standard)
2037 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn},
Standard)
2040 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn},
Standard)
2043 addRulesForIOpcs({amdgcn_ds_gws_sema_p, amdgcn_ds_gws_sema_v,
2044 amdgcn_ds_gws_sema_release_all})
2048 {amdgcn_ds_gws_barrier, amdgcn_ds_gws_init, amdgcn_ds_gws_sema_br})
2051 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap},
Standard)
2054 addRulesForIOpcs({amdgcn_ds_swizzle},
Standard)
2058 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var},
Standard)
2061 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap},
Standard)
2064 addRulesForIOpcs({amdgcn_permlane64},
StandardB)
2067 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
2070 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
2073 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
2076 addRulesForIOpcs({amdgcn_interp_p1},
Standard)
2079 addRulesForIOpcs({amdgcn_interp_p1_f16},
Standard)
2082 addRulesForIOpcs({amdgcn_interp_p2},
Standard)
2085 addRulesForIOpcs({amdgcn_interp_p2_f16},
Standard)
2089 addRulesForIOpcs({amdgcn_interp_mov},
Standard)
2092 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
2093 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
2098 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
2103 addRulesForIOpcs({amdgcn_div_fmas},
Standard)
2109 addRulesForIOpcs({amdgcn_div_fixup},
Standard)
2117 addRulesForIOpcs({amdgcn_div_scale},
Standard)
2123 addRulesForIOpcs({amdgcn_fdot2, amdgcn_sdot2, amdgcn_udot2},
Standard)
2127 addRulesForIOpcs({amdgcn_fdot2_f16_f16},
Standard)
2131 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8},
Standard)
2135 addRulesForIOpcs({amdgcn_s_alloc_vgpr})
2138 addRulesForIOpcs({amdgcn_sat_pk4_i4_i8, amdgcn_sat_pk4_u4_u8},
Standard)
2143 bool HasGFX90AInsts = ST->hasGFX90AInsts();
2144 addRulesForIOpcs({amdgcn_mfma_f32_32x32x1f32, amdgcn_mfma_f32_16x16x1f32,
2145 amdgcn_mfma_f32_4x4x1f32, amdgcn_mfma_f32_32x32x2f32,
2146 amdgcn_mfma_f32_16x16x4f32, amdgcn_mfma_f32_32x32x4f16,
2147 amdgcn_mfma_f32_16x16x4f16, amdgcn_mfma_f32_4x4x4f16,
2148 amdgcn_mfma_f32_32x32x8f16, amdgcn_mfma_f32_16x16x16f16,
2149 amdgcn_mfma_i32_32x32x4i8, amdgcn_mfma_i32_16x16x4i8,
2150 amdgcn_mfma_i32_4x4x4i8, amdgcn_mfma_i32_32x32x8i8,
2151 amdgcn_mfma_i32_16x16x16i8, amdgcn_mfma_f32_32x32x2bf16,
2152 amdgcn_mfma_f32_16x16x2bf16, amdgcn_mfma_f32_4x4x2bf16,
2153 amdgcn_mfma_f32_32x32x4bf16, amdgcn_mfma_f32_16x16x8bf16})
2161 amdgcn_wmma_f32_16x16x16_f16, amdgcn_wmma_f32_16x16x16_bf16,
2162 amdgcn_wmma_f16_16x16x16_f16, amdgcn_wmma_bf16_16x16x16_bf16,
2163 amdgcn_wmma_f16_16x16x16_f16_tied, amdgcn_wmma_bf16_16x16x16_bf16_tied,
2164 amdgcn_wmma_i32_16x16x16_iu8, amdgcn_wmma_i32_16x16x16_iu4,
2166 amdgcn_wmma_f32_16x16x16_fp8_fp8, amdgcn_wmma_f32_16x16x16_fp8_bf8,
2167 amdgcn_wmma_f32_16x16x16_bf8_fp8, amdgcn_wmma_f32_16x16x16_bf8_bf8,
2168 amdgcn_wmma_i32_16x16x32_iu4,
2170 amdgcn_wmma_f32_16x16x4_f32, amdgcn_wmma_f32_16x16x32_bf16,
2171 amdgcn_wmma_f32_16x16x32_f16, amdgcn_wmma_f16_16x16x32_f16,
2172 amdgcn_wmma_bf16_16x16x32_bf16, amdgcn_wmma_bf16f32_16x16x32_bf16,
2173 amdgcn_wmma_f32_16x16x64_fp8_fp8, amdgcn_wmma_f32_16x16x64_fp8_bf8,
2174 amdgcn_wmma_f32_16x16x64_bf8_fp8, amdgcn_wmma_f32_16x16x64_bf8_bf8,
2175 amdgcn_wmma_f16_16x16x64_fp8_fp8, amdgcn_wmma_f16_16x16x64_fp8_bf8,
2176 amdgcn_wmma_f16_16x16x64_bf8_fp8, amdgcn_wmma_f16_16x16x64_bf8_bf8,
2177 amdgcn_wmma_f16_16x16x128_fp8_fp8, amdgcn_wmma_f16_16x16x128_fp8_bf8,
2178 amdgcn_wmma_f16_16x16x128_bf8_fp8, amdgcn_wmma_f16_16x16x128_bf8_bf8,
2179 amdgcn_wmma_f32_16x16x128_fp8_fp8, amdgcn_wmma_f32_16x16x128_fp8_bf8,
2180 amdgcn_wmma_f32_16x16x128_bf8_fp8, amdgcn_wmma_f32_16x16x128_bf8_bf8,
2181 amdgcn_wmma_i32_16x16x64_iu8, amdgcn_wmma_f32_16x16x128_f8f6f4,
2182 amdgcn_wmma_scale_f32_16x16x128_f8f6f4,
2183 amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, amdgcn_wmma_f32_32x16x128_f4,
2184 amdgcn_wmma_scale_f32_32x16x128_f4, amdgcn_wmma_scale16_f32_32x16x128_f4,
2186 amdgcn_swmmac_f32_16x16x32_f16, amdgcn_swmmac_f32_16x16x32_bf16,
2187 amdgcn_swmmac_f16_16x16x32_f16, amdgcn_swmmac_bf16_16x16x32_bf16,
2188 amdgcn_swmmac_i32_16x16x32_iu8, amdgcn_swmmac_i32_16x16x32_iu4,
2189 amdgcn_swmmac_i32_16x16x64_iu4, amdgcn_swmmac_f32_16x16x32_fp8_fp8,
2190 amdgcn_swmmac_f32_16x16x32_fp8_bf8, amdgcn_swmmac_f32_16x16x32_bf8_fp8,
2191 amdgcn_swmmac_f32_16x16x32_bf8_bf8,
2193 amdgcn_swmmac_f32_16x16x64_f16, amdgcn_swmmac_f32_16x16x64_bf16,
2194 amdgcn_swmmac_f16_16x16x64_f16, amdgcn_swmmac_bf16_16x16x64_bf16,
2195 amdgcn_swmmac_bf16f32_16x16x64_bf16, amdgcn_swmmac_f32_16x16x128_fp8_fp8,
2196 amdgcn_swmmac_f32_16x16x128_fp8_bf8, amdgcn_swmmac_f32_16x16x128_bf8_fp8,
2197 amdgcn_swmmac_f32_16x16x128_bf8_bf8, amdgcn_swmmac_f16_16x16x128_fp8_fp8,
2198 amdgcn_swmmac_f16_16x16x128_fp8_bf8, amdgcn_swmmac_f16_16x16x128_bf8_fp8,
2199 amdgcn_swmmac_f16_16x16x128_bf8_bf8, amdgcn_swmmac_i32_16x16x128_iu8})