586 : ST(&_ST), MRI(&_MRI) {
588 addRulesForGOpcs({G_ADD, G_SUB},
Standard)
600 addRulesForGOpcs({G_UADDO, G_USUBO},
Standard)
604 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE},
Standard)
608 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT},
Standard)
616 bool HasVecMulU64 = ST->hasVMulU64Inst();
628 bool hasMulHi = ST->hasScalarMulHiInsts();
629 addRulesForGOpcs({G_UMULH, G_SMULH},
Standard)
634 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32},
Standard)
638 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
639 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32},
Standard)
643 addRulesForGOpcs({G_XOR, G_OR, G_AND},
StandardB)
663 addRulesForGOpcs({G_LSHR},
Standard)
673 addRulesForGOpcs({G_ASHR},
Standard)
683 addRulesForGOpcs({G_FSHR},
Standard)
687 addRulesForGOpcs({G_BSWAP},
Standard)
695 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
696 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
704 addRulesForGOpcs({G_UBFX, G_SBFX},
Standard)
710 addRulesForGOpcs({G_SMIN, G_SMAX},
Standard)
720 addRulesForGOpcs({G_UMIN, G_UMAX},
Standard)
730 addRulesForGOpcs({G_IMPLICIT_DEF})
735 addRulesForGOpcs({G_CONSTANT},
Standard)
743 addRulesForGOpcs({G_FCONSTANT},
Standard)
748 addRulesForGOpcs({G_FREEZE})
755 addRulesForGOpcs({G_BITCAST})
759 addRulesForGOpcs({G_UNMERGE_VALUES})
764 addRulesForGOpcs({G_BUILD_VECTOR, G_MERGE_VALUES})
770 addRulesForGOpcs({G_CONCAT_VECTORS})
774 addRulesForGOpcs({G_PHI})
780 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
791 addRulesForGOpcs({G_INSERT_VECTOR_ELT})
807 addRulesForGOpcs({G_AMDGPU_BVH_INTERSECT_RAY, G_AMDGPU_BVH_DUAL_INTERSECT_RAY,
808 G_AMDGPU_BVH8_INTERSECT_RAY})
815 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
816 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
817 G_AMDGPU_INTRIN_IMAGE_STORE,
818 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
833 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
835 addRulesForGOpcs({G_ICMP})
854 addRulesForGOpcs({G_BRCOND})
858 addRulesForGOpcs({G_BR}).
Any({{
_}, {{}, {
None}}});
868 addRulesForGOpcs({G_ANYEXT})
880 bool Has16bitCmp = ST->has16BitInsts();
887 addRulesForGOpcs({G_TRUNC})
903 addRulesForGOpcs({G_ZEXT})
918 addRulesForGOpcs({G_SEXT})
933 addRulesForGOpcs({G_SEXT_INREG})
939 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT},
Standard)
945 addRulesForGOpcs({G_ASSERT_ALIGN},
Standard)
957 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
958 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
959 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
960 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
961 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
969 addRulesForGOpcs({G_ATOMICRMW_USUB_SAT, G_ATOMICRMW_USUB_COND})
974 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
975 bool HasAtomicBufferGlobalPkAddF16Insts =
976 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
977 ST->hasAtomicBufferGlobalPkAddF16Insts();
978 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
979 addRulesForGOpcs({G_ATOMICRMW_FADD})
987 HasAtomicFlatPkAdd16Insts)
989 HasAtomicBufferGlobalPkAddF16Insts)
991 HasAtomicDsPkAdd16Insts);
993 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
999 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
1005 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP},
Standard)
1011 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
1012 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
1013 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
1014 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
1015 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
1016 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
1017 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
1022 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
1023 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
1024 bool usesTrue16 = ST->useRealTrue16Insts();
1027 return (*
MI.memoperands_begin())->getAlign() >=
Align(16);
1031 return (*
MI.memoperands_begin())->getAlign() >=
Align(4);
1035 return (*
MI.memoperands_begin())->isAtomic();
1051 return (*
MI.memoperands_begin())->isVolatile();
1055 return (*
MI.memoperands_begin())->isInvariant();
1070 return MemSize == 16 || MemSize == 8;
1078 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
1079 (isConst || isInvMMO || isNoClobberMMO);
1083 addRulesForGOpcs({G_LOAD})
1190 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD})
1215 addRulesForGOpcs({G_STORE})
1251 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1252 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1263 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1264 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1270 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1275 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1288 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1297 addRulesForGOpcs({G_AMDGPU_S_BUFFER_LOAD})
1347 addRulesForGOpcs({G_AMDGPU_S_BUFFER_LOAD_SBYTE, G_AMDGPU_S_BUFFER_LOAD_UBYTE,
1348 G_AMDGPU_S_BUFFER_LOAD_SSHORT,
1349 G_AMDGPU_S_BUFFER_LOAD_USHORT})
1358 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1359 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1360 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1361 G_AMDGPU_TBUFFER_STORE_FORMAT,
1362 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1374 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1383 addRulesForGOpcs({G_PTR_ADD})
1389 addRulesForGOpcs({G_INTTOPTR})
1397 addRulesForGOpcs({G_PTRTOINT})
1407 addRulesForGOpcs({G_PTRMASK})
1413 addRulesForGOpcs({G_DYN_STACKALLOC})
1417 addRulesForGOpcs({G_ABS},
Standard)
1425 addRulesForGOpcs({G_BITREVERSE},
Standard)
1431 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_POISON,
1432 G_CTTZ_ZERO_POISON})
1438 addRulesForGOpcs({G_CTPOP})
1444 addRulesForGOpcs({G_FENCE}).
Any({{{}}, {{}, {}}});
1446 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER},
Standard)
1449 addRulesForGOpcs({G_GET_ROUNDING},
Standard)
1452 addRulesForGOpcs({G_SET_ROUNDING},
Standard)
1458 addRulesForGOpcs({G_GLOBAL_VALUE})
1465 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).
Any({{
UniP5}, {{
SgprP5}, {}}});
1469 addRulesForGOpcs({G_SI_CALL})
1475 bool hasSALUFloat = ST->hasSALUFloatInsts();
1477 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL},
Standard)
1495 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB},
Standard)
1503 addRulesForGOpcs({G_FMAD},
Standard)
1509 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP},
Standard)
1517 addRulesForGOpcs({G_FMA, G_STRICT_FMA},
Standard)
1537 addRulesForGOpcs({G_AMDGPU_FMED3},
Standard)
1546 addRulesForGOpcs({G_AMDGPU_SMED3},
Standard)
1554 addRulesForGOpcs({G_FNEG, G_FABS},
Standard)
1569 addRulesForGOpcs({G_FCANONICALIZE},
Standard)
1583 bool hasPST = ST->hasPseudoScalarTrans();
1584 addRulesForGOpcs({G_FSQRT},
Standard)
1589 addRulesForGOpcs({G_FPTOUI, G_FPTOSI, G_FPTOUI_SAT, G_FPTOSI_SAT})
1606 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1618 addRulesForGOpcs({G_AMDGPU_S_BUFFER_PREFETCH})
1623 return MI.getOperand(3).getImm() != 0;
1626 bool HasSMemPF = ST->hasSafeSmemPrefetch();
1627 bool HasVMemPF = ST->hasVmemPrefInsts();
1628 addRulesForGOpcs({G_PREFETCH})
1634 !HasSMemPF && !HasVMemPF)
1646 addRulesForGOpcs({G_FPEXT})
1653 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32},
Standard)
1657 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY},
Standard)
1661 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1663 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM},
Standard)
1675 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM,
1676 G_FMINIMUMNUM, G_FMAXIMUMNUM},
1689 addRulesForGOpcs({G_FPTRUNC})
1698 addRulesForGOpcs({G_INTRINSIC_FPTRUNC_ROUND})
1707 addRulesForGOpcs({G_IS_FPCLASS})
1715 addRulesForGOpcs({G_FCMP},
Standard)
1729 addRulesForGOpcs({G_INTRINSIC_ROUNDEVEN, G_FEXP2, G_FLOG2},
Standard)
1737 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_FFLOOR, G_FCEIL},
Standard)
1747 addRulesForGOpcs({G_AMDGPU_GLOBAL_LOAD_MONITOR, G_AMDGPU_FLAT_LOAD_MONITOR},
1756 addRulesForGOpcs({G_AMDGPU_WHOLE_WAVE_FUNC_SETUP})
1759 addRulesForGOpcs({G_AMDGPU_WHOLE_WAVE_FUNC_RETURN}).
Any({{}, {{}, {
Vcc}}});
1763 addRulesForIOpcs({returnaddress}).
Any({{
UniP0}, {{
SgprP0}, {}}});
1767 addRulesForIOpcs({amdgcn_icmp})
1776 addRulesForIOpcs({amdgcn_fcmp})
1789 addRulesForIOpcs({amdgcn_s_setreg})
1792 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1795 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1799 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime},
Standard)
1802 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1803 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1808 addRulesForIOpcs({amdgcn_asyncmark,
1813 amdgcn_s_barrier_leave,
1814 amdgcn_s_barrier_signal,
1815 amdgcn_s_barrier_wait,
1816 amdgcn_s_monitor_sleep,
1820 amdgcn_s_setprio_inc_wg,
1822 amdgcn_s_ttracedata_imm,
1823 amdgcn_s_wait_asynccnt,
1824 amdgcn_s_wait_bvhcnt,
1825 amdgcn_s_wait_dscnt,
1826 amdgcn_s_wait_event,
1827 amdgcn_s_wait_event_export_ready,
1828 amdgcn_s_wait_expcnt,
1829 amdgcn_s_wait_kmcnt,
1830 amdgcn_s_wait_loadcnt,
1831 amdgcn_s_wait_samplecnt,
1832 amdgcn_s_wait_storecnt,
1833 amdgcn_s_wait_tensorcnt,
1835 amdgcn_sched_barrier,
1836 amdgcn_sched_group_barrier,
1838 amdgcn_wait_asyncmark,
1839 amdgcn_wave_barrier})
1840 .
Any({{}, {{}, {}}});
1842 addRulesForIOpcs({amdgcn_init_exec_from_input})
1847 addRulesForIOpcs({amdgcn_s_sleep_var})
1850 addRulesForIOpcs({amdgcn_s_barrier_join, amdgcn_s_wakeup_barrier})
1853 addRulesForIOpcs({amdgcn_s_barrier_signal_var, amdgcn_s_barrier_init})
1856 addRulesForIOpcs({amdgcn_s_barrier_signal_isfirst})
1860 {amdgcn_s_get_named_barrier_state, amdgcn_s_get_barrier_state},
Standard)
1863 addRulesForIOpcs({amdgcn_flat_prefetch}).
Any({{}, {{}, {
IntrId,
VgprP0}}});
1865 addRulesForIOpcs({amdgcn_global_prefetch}).
Any({{}, {{}, {
IntrId,
VgprP1}}});
1867 addRulesForIOpcs({amdgcn_s_prefetch_data, amdgcn_s_prefetch_inst})
1870 addRulesForIOpcs({amdgcn_class})
1879 addRulesForIOpcs({amdgcn_end_cf})
1883 addRulesForIOpcs({amdgcn_if_break},
Standard)
1887 addRulesForIOpcs({amdgcn_exp})
1891 addRulesForIOpcs({amdgcn_exp_compr})
1894 addRulesForIOpcs({amdgcn_exp_row})
1900 addRulesForIOpcs({amdgcn_lds_direct_load},
StandardB)
1903 addRulesForIOpcs({amdgcn_lds_param_load},
Standard)
1906 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi},
Standard)
1909 addRulesForIOpcs({amdgcn_readfirstlane})
1915 addRulesForIOpcs({amdgcn_readlane},
StandardB)
1918 addRulesForIOpcs({amdgcn_s_quadmask, amdgcn_s_wqm},
StandardB)
1922 addRulesForIOpcs({amdgcn_writelane},
StandardB)
1927 addRulesForIOpcs({amdgcn_add_max_i32, amdgcn_add_max_u32, amdgcn_add_min_i32,
1928 amdgcn_add_min_u32},
1933 addRulesForIOpcs({amdgcn_pk_add_max_i16, amdgcn_pk_add_max_u16,
1934 amdgcn_pk_add_min_i16, amdgcn_pk_add_min_u16},
1939 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16},
Standard)
1944 addRulesForIOpcs({amdgcn_permlane_bcast, amdgcn_permlane_up,
1945 amdgcn_permlane_down, amdgcn_permlane_xor},
1951 addRulesForIOpcs({amdgcn_permlane_idx_gen},
Standard)
1954 addRulesForIOpcs({amdgcn_perm},
Standard)
1959 {amdgcn_wave_reduce_add, amdgcn_wave_reduce_and, amdgcn_wave_reduce_fadd,
1960 amdgcn_wave_reduce_fmax, amdgcn_wave_reduce_fmin,
1961 amdgcn_wave_reduce_fsub, amdgcn_wave_reduce_max, amdgcn_wave_reduce_min,
1962 amdgcn_wave_reduce_or, amdgcn_wave_reduce_sub, amdgcn_wave_reduce_umax,
1963 amdgcn_wave_reduce_umin, amdgcn_wave_reduce_xor},
1970 addRulesForIOpcs({amdgcn_wave_shuffle},
Standard)
1974 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz},
Standard)
1980 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1981 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1982 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1987 addRulesForIOpcs({amdgcn_rsq, amdgcn_rsq_clamp},
Standard)
1997 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24},
Standard)
2003 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
2004 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
2010 addRulesForIOpcs({amdgcn_cvt_sr_bf8_f32, amdgcn_cvt_sr_fp8_f32,
2011 amdgcn_cvt_sr_fp8_f32_e5m3, amdgcn_cvt_pk_bf8_f32,
2012 amdgcn_cvt_pk_fp8_f32, amdgcn_cvt_pk_fp8_f32_e5m3},
2017 addRulesForIOpcs({amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8,
2018 amdgcn_cvt_f32_fp8, amdgcn_cvt_f32_fp8_e5m3},
2023 addRulesForIOpcs({amdgcn_cvt_pk_f32_bf8, amdgcn_cvt_pk_f32_fp8})
2027 addRulesForIOpcs({amdgcn_cvt_f16_bf8, amdgcn_cvt_f16_fp8},
Standard)
2031 addRulesForIOpcs({amdgcn_cvt_pk_f16_bf8, amdgcn_cvt_pk_f16_fp8},
Standard)
2035 addRulesForIOpcs({amdgcn_cvt_pk_bf8_f16, amdgcn_cvt_pk_fp8_f16},
Standard)
2039 addRulesForIOpcs({amdgcn_cvt_sr_bf8_f16, amdgcn_cvt_sr_fp8_f16},
Standard)
2043 addRulesForIOpcs({amdgcn_cvt_sr_pk_f16_f32},
Standard)
2047 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_fp8_f16})
2050 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_fp8_f32})
2053 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
2059 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract},
Standard)
2067 addRulesForIOpcs({amdgcn_prng_b32})
2071 addRulesForIOpcs({amdgcn_sffbh},
Standard)
2075 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe},
Standard)
2081 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
2082 amdgcn_cvt_pknorm_u16},
2087 addRulesForIOpcs({amdgcn_cvt_pkrtz},
Standard)
2092 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f16,
2093 amdgcn_cvt_scalef32_sr_pk32_fp6_f16,
2094 amdgcn_cvt_scalef32_sr_pk32_bf6_bf16,
2095 amdgcn_cvt_scalef32_sr_pk32_fp6_bf16},
2099 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f32,
2100 amdgcn_cvt_scalef32_sr_pk32_fp6_f32},
2104 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk_fp4_f16},
Standard)
2108 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk_fp4_f32},
Standard)
2113 {amdgcn_cvt_scalef32_2xpk16_fp6_f32, amdgcn_cvt_scalef32_2xpk16_bf6_f32})
2119 addRulesForIOpcs({amdgcn_cvt_scalef32_f16_fp8, amdgcn_cvt_scalef32_f16_bf8},
2124 addRulesForIOpcs({amdgcn_cvt_scalef32_f32_fp8, amdgcn_cvt_scalef32_f32_bf8},
2130 {amdgcn_cvt_scalef32_pk16_bf6_f16, amdgcn_cvt_scalef32_pk16_fp6_f16},
2136 {amdgcn_cvt_scalef32_pk16_bf6_f32, amdgcn_cvt_scalef32_pk16_fp6_f32},
2142 {amdgcn_cvt_scalef32_pk8_bf8_f16, amdgcn_cvt_scalef32_pk8_fp8_f16},
2148 {amdgcn_cvt_scalef32_pk8_bf8_f32, amdgcn_cvt_scalef32_pk8_fp8_f32},
2153 addRulesForIOpcs({amdgcn_cvt_scalef32_pk8_fp4_f16},
Standard)
2157 addRulesForIOpcs({amdgcn_cvt_scalef32_pk8_fp4_f32},
Standard)
2161 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk16_bf6_f16,
2162 amdgcn_cvt_scalef32_sr_pk16_fp6_f16},
2168 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk16_bf6_f32,
2169 amdgcn_cvt_scalef32_sr_pk16_fp6_f32},
2176 {amdgcn_cvt_scalef32_sr_pk8_bf8_f16, amdgcn_cvt_scalef32_sr_pk8_fp8_f16},
2183 {amdgcn_cvt_scalef32_sr_pk8_bf8_f32, amdgcn_cvt_scalef32_sr_pk8_fp8_f32},
2189 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk8_fp4_f16},
Standard)
2193 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk8_fp4_f32},
Standard)
2198 {amdgcn_cvt_scale_pk16_f16_bf6, amdgcn_cvt_scale_pk16_f16_fp6},
Standard)
2203 {amdgcn_cvt_scale_pk16_f32_bf6, amdgcn_cvt_scale_pk16_f32_fp6},
Standard)
2207 addRulesForIOpcs({amdgcn_cvt_scale_pk8_f16_bf8, amdgcn_cvt_scale_pk8_f16_fp8},
2212 addRulesForIOpcs({amdgcn_cvt_scale_pk8_f16_fp4},
Standard)
2216 addRulesForIOpcs({amdgcn_cvt_scale_pk8_f32_bf8, amdgcn_cvt_scale_pk8_f32_fp8},
2221 addRulesForIOpcs({amdgcn_cvt_scale_pk8_f32_fp4},
Standard)
2226 {amdgcn_cvt_scalef32_pk32_bf6_f16, amdgcn_cvt_scalef32_pk32_fp6_f16},
2232 {amdgcn_cvt_scalef32_pk_fp8_f32, amdgcn_cvt_scalef32_pk_bf8_f32},
2239 {amdgcn_cvt_scalef32_pk_f32_fp8, amdgcn_cvt_scalef32_pk_f32_bf8},
2245 {amdgcn_cvt_scalef32_pk_fp8_f16, amdgcn_cvt_scalef32_pk_bf8_f16},
2250 addRulesForIOpcs({amdgcn_cvt_scalef32_pk_f32_fp4},
Standard)
2254 addRulesForIOpcs({amdgcn_cvt_scalef32_pk_fp4_f32},
Standard)
2258 addRulesForIOpcs({amdgcn_cvt_scalef32_pk_f16_fp4,
2259 amdgcn_cvt_scalef32_pk_f16_fp8,
2260 amdgcn_cvt_scalef32_pk_f16_bf8},
2266 {amdgcn_cvt_scalef32_pk32_f32_fp6, amdgcn_cvt_scalef32_pk32_f32_bf6},
2272 {amdgcn_cvt_scalef32_pk32_f16_fp6, amdgcn_cvt_scalef32_pk32_f16_bf6},
2277 addRulesForIOpcs({amdgcn_cvt_scalef32_pk_fp4_f16},
Standard)
2281 addRulesForIOpcs({amdgcn_global_load_tr_b64})
2287 addRulesForIOpcs({amdgcn_global_load_tr_b128})
2293 addRulesForIOpcs({amdgcn_global_load_tr4_b64})
2297 addRulesForIOpcs({amdgcn_global_load_tr6_b96})
2301 addRulesForIOpcs({amdgcn_ds_load_tr4_b64, amdgcn_ds_load_tr8_b64})
2304 addRulesForIOpcs({amdgcn_ds_load_tr6_b96})
2307 addRulesForIOpcs({amdgcn_ds_load_tr16_b128})
2310 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
2314 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num},
Standard)
2317 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
2321 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
2324 addRulesForIOpcs({amdgcn_raw_buffer_load_async_lds})
2327 addRulesForIOpcs({amdgcn_struct_buffer_load_async_lds})
2332 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
2336 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
2339 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_async_lds})
2342 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_async_lds})
2346 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
2350 {amdgcn_global_load_lds, amdgcn_load_to_lds, amdgcn_load_async_to_lds})
2353 addRulesForIOpcs({amdgcn_global_load_async_to_lds_b8,
2354 amdgcn_global_load_async_to_lds_b32,
2355 amdgcn_global_load_async_to_lds_b64,
2356 amdgcn_global_load_async_to_lds_b128,
2357 amdgcn_global_store_async_from_lds_b8,
2358 amdgcn_global_store_async_from_lds_b32,
2359 amdgcn_global_store_async_from_lds_b64,
2360 amdgcn_global_store_async_from_lds_b128})
2363 addRulesForIOpcs({amdgcn_global_load_async_lds})
2366 addRulesForIOpcs({amdgcn_tensor_load_to_lds, amdgcn_tensor_store_from_lds})
2373 addRulesForIOpcs({amdgcn_cluster_load_b32})
2379 addRulesForIOpcs({amdgcn_cluster_load_b64})
2385 addRulesForIOpcs({amdgcn_cluster_load_b128})
2392 addRulesForIOpcs({amdgcn_cluster_load_async_to_lds_b8,
2393 amdgcn_cluster_load_async_to_lds_b32,
2394 amdgcn_cluster_load_async_to_lds_b64,
2395 amdgcn_cluster_load_async_to_lds_b128})
2398 addRulesForIOpcs({amdgcn_perm_pk16_b4_u4},
StandardB)
2402 addRulesForIOpcs({amdgcn_perm_pk16_b6_u4},
StandardB)
2406 addRulesForIOpcs({amdgcn_perm_pk16_b8_u4},
StandardB)
2410 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
2428 addRulesForIOpcs({amdgcn_kill, amdgcn_wqm_demote})
2431 addRulesForIOpcs({amdgcn_set_inactive},
StandardB)
2434 addRulesForIOpcs({amdgcn_set_inactive_chain_arg},
Standard)
2437 addRulesForIOpcs({amdgcn_cvt_sr_bf16_f32, amdgcn_cvt_sr_f16_f32},
Standard)
2440 addRulesForIOpcs({amdgcn_ballot},
Standard)
2444 addRulesForIOpcs({amdgcn_inverse_ballot})
2448 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
2451 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8},
StandardB)
2455 addRulesForIOpcs({amdgcn_update_dpp},
StandardB)
2459 addRulesForIOpcs({amdgcn_sin, amdgcn_cos},
Standard)
2465 addRulesForIOpcs({amdgcn_trig_preop},
Standard)
2469 addRulesForIOpcs({amdgcn_exp2},
Standard)
2477 addRulesForIOpcs({amdgcn_rcp, amdgcn_sqrt},
Standard)
2487 addRulesForIOpcs({amdgcn_log},
Standard)
2495 addRulesForIOpcs({amdgcn_ds_atomic_async_barrier_arrive_b64})
2498 addRulesForIOpcs({amdgcn_ds_atomic_barrier_arrive_rtn_b64},
Standard)
2501 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
2506 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume},
Standard)
2511 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn},
Standard)
2514 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn},
Standard)
2517 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn},
Standard)
2520 addRulesForIOpcs({amdgcn_ds_gws_sema_p, amdgcn_ds_gws_sema_v,
2521 amdgcn_ds_gws_sema_release_all})
2525 {amdgcn_ds_gws_barrier, amdgcn_ds_gws_init, amdgcn_ds_gws_sema_br})
2528 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap},
Standard)
2531 addRulesForIOpcs({amdgcn_ds_swizzle},
Standard)
2535 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var},
Standard)
2538 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap},
Standard)
2541 addRulesForIOpcs({amdgcn_permlane64},
StandardB)
2544 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
2547 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
2550 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
2553 addRulesForIOpcs({amdgcn_interp_p1},
Standard)
2556 addRulesForIOpcs({amdgcn_interp_p1_f16},
Standard)
2559 addRulesForIOpcs({amdgcn_interp_p2},
Standard)
2562 addRulesForIOpcs({amdgcn_interp_p2_f16},
Standard)
2566 addRulesForIOpcs({amdgcn_interp_mov},
Standard)
2569 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
2570 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
2575 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
2580 addRulesForIOpcs({amdgcn_frexp_exp})
2588 addRulesForIOpcs({amdgcn_div_fmas},
Standard)
2594 addRulesForIOpcs({amdgcn_div_fixup},
Standard)
2602 addRulesForIOpcs({amdgcn_div_scale},
Standard)
2608 addRulesForIOpcs({amdgcn_fdot2, amdgcn_sdot2, amdgcn_udot2},
Standard)
2612 addRulesForIOpcs({amdgcn_fdot2_f16_f16},
Standard)
2616 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8},
Standard)
2620 addRulesForIOpcs({amdgcn_s_alloc_vgpr})
2623 addRulesForIOpcs({amdgcn_sat_pk4_i4_i8, amdgcn_sat_pk4_u4_u8},
Standard)
2627 bool HasGFX90AInsts = ST->hasGFX90AInsts();
2630 addRulesForIOpcs({amdgcn_mfma_f32_32x32x1f32, amdgcn_mfma_f32_16x16x1f32,
2631 amdgcn_mfma_f32_4x4x1f32, amdgcn_mfma_f32_32x32x2f32,
2632 amdgcn_mfma_f32_16x16x4f32, amdgcn_mfma_f32_32x32x4f16,
2633 amdgcn_mfma_f32_16x16x4f16, amdgcn_mfma_f32_4x4x4f16,
2634 amdgcn_mfma_f32_32x32x8f16, amdgcn_mfma_f32_16x16x16f16,
2635 amdgcn_mfma_i32_32x32x4i8, amdgcn_mfma_i32_16x16x4i8,
2636 amdgcn_mfma_i32_4x4x4i8, amdgcn_mfma_i32_32x32x8i8,
2637 amdgcn_mfma_i32_16x16x16i8, amdgcn_mfma_f32_32x32x2bf16,
2638 amdgcn_mfma_f32_16x16x2bf16, amdgcn_mfma_f32_4x4x2bf16,
2639 amdgcn_mfma_f32_32x32x4bf16, amdgcn_mfma_f32_16x16x8bf16})
2651 amdgcn_mfma_f32_32x32x4bf16_1k,
2652 amdgcn_mfma_f32_16x16x4bf16_1k,
2653 amdgcn_mfma_f32_4x4x4bf16_1k,
2654 amdgcn_mfma_f32_32x32x8bf16_1k,
2655 amdgcn_mfma_f32_16x16x16bf16_1k,
2656 amdgcn_mfma_f64_16x16x4f64,
2657 amdgcn_mfma_f64_4x4x4f64,
2658 amdgcn_mfma_i32_16x16x32_i8,
2659 amdgcn_mfma_i32_32x32x16_i8,
2660 amdgcn_mfma_f32_16x16x8_xf32,
2661 amdgcn_mfma_f32_32x32x4_xf32,
2662 amdgcn_mfma_f32_16x16x32_bf8_bf8,
2663 amdgcn_mfma_f32_16x16x32_bf8_fp8,
2664 amdgcn_mfma_f32_16x16x32_fp8_bf8,
2665 amdgcn_mfma_f32_16x16x32_fp8_fp8,
2666 amdgcn_mfma_f32_32x32x16_bf8_bf8,
2667 amdgcn_mfma_f32_32x32x16_bf8_fp8,
2668 amdgcn_mfma_f32_32x32x16_fp8_bf8,
2669 amdgcn_mfma_f32_32x32x16_fp8_fp8,
2671 amdgcn_mfma_f32_16x16x32_f16,
2672 amdgcn_mfma_f32_32x32x16_f16,
2673 amdgcn_mfma_i32_16x16x64_i8,
2674 amdgcn_mfma_i32_32x32x32_i8,
2684 amdgcn_smfmac_f32_16x16x32_f16, amdgcn_smfmac_f32_32x32x16_f16,
2685 amdgcn_smfmac_f32_16x16x32_bf16, amdgcn_smfmac_f32_32x32x16_bf16,
2686 amdgcn_smfmac_i32_16x16x64_i8, amdgcn_smfmac_i32_32x32x32_i8,
2687 amdgcn_smfmac_f32_16x16x64_bf8_bf8, amdgcn_smfmac_f32_16x16x64_bf8_fp8,
2688 amdgcn_smfmac_f32_16x16x64_fp8_bf8, amdgcn_smfmac_f32_16x16x64_fp8_fp8,
2689 amdgcn_smfmac_f32_32x32x32_bf8_bf8, amdgcn_smfmac_f32_32x32x32_bf8_fp8,
2690 amdgcn_smfmac_f32_32x32x32_fp8_bf8, amdgcn_smfmac_f32_32x32x32_fp8_fp8,
2692 amdgcn_smfmac_f32_16x16x64_f16, amdgcn_smfmac_f32_32x32x32_f16,
2693 amdgcn_smfmac_i32_16x16x128_i8, amdgcn_smfmac_i32_32x32x64_i8,
2694 amdgcn_smfmac_f32_16x16x128_bf8_bf8, amdgcn_smfmac_f32_16x16x128_bf8_fp8,
2695 amdgcn_smfmac_f32_16x16x128_fp8_bf8, amdgcn_smfmac_f32_16x16x128_fp8_fp8,
2696 amdgcn_smfmac_f32_32x32x64_bf8_bf8, amdgcn_smfmac_f32_32x32x64_bf8_fp8,
2697 amdgcn_smfmac_f32_32x32x64_fp8_bf8, amdgcn_smfmac_f32_32x32x64_fp8_fp8})
2702 addRulesForIOpcs({amdgcn_mfma_scale_f32_32x32x64_f8f6f4,
2703 amdgcn_mfma_scale_f32_16x16x128_f8f6f4})
2712 amdgcn_wmma_f32_16x16x16_f16, amdgcn_wmma_f32_16x16x16_bf16,
2713 amdgcn_wmma_f16_16x16x16_f16, amdgcn_wmma_bf16_16x16x16_bf16,
2714 amdgcn_wmma_f16_16x16x16_f16_tied, amdgcn_wmma_bf16_16x16x16_bf16_tied,
2715 amdgcn_wmma_i32_16x16x16_iu8, amdgcn_wmma_i32_16x16x16_iu4,
2717 amdgcn_wmma_f32_16x16x16_fp8_fp8, amdgcn_wmma_f32_16x16x16_fp8_bf8,
2718 amdgcn_wmma_f32_16x16x16_bf8_fp8, amdgcn_wmma_f32_16x16x16_bf8_bf8,
2719 amdgcn_wmma_i32_16x16x32_iu4,
2721 amdgcn_wmma_f32_16x16x4_f32, amdgcn_wmma_f32_16x16x32_bf16,
2722 amdgcn_wmma_f32_16x16x32_f16, amdgcn_wmma_f16_16x16x32_f16,
2723 amdgcn_wmma_bf16_16x16x32_bf16, amdgcn_wmma_bf16f32_16x16x32_bf16,
2724 amdgcn_wmma_f32_16x16x64_fp8_fp8, amdgcn_wmma_f32_16x16x64_fp8_bf8,
2725 amdgcn_wmma_f32_16x16x64_bf8_fp8, amdgcn_wmma_f32_16x16x64_bf8_bf8,
2726 amdgcn_wmma_f16_16x16x64_fp8_fp8, amdgcn_wmma_f16_16x16x64_fp8_bf8,
2727 amdgcn_wmma_f16_16x16x64_bf8_fp8, amdgcn_wmma_f16_16x16x64_bf8_bf8,
2728 amdgcn_wmma_f16_16x16x128_fp8_fp8, amdgcn_wmma_f16_16x16x128_fp8_bf8,
2729 amdgcn_wmma_f16_16x16x128_bf8_fp8, amdgcn_wmma_f16_16x16x128_bf8_bf8,
2730 amdgcn_wmma_f32_16x16x128_fp8_fp8, amdgcn_wmma_f32_16x16x128_fp8_bf8,
2731 amdgcn_wmma_f32_16x16x128_bf8_fp8, amdgcn_wmma_f32_16x16x128_bf8_bf8,
2732 amdgcn_wmma_i32_16x16x64_iu8, amdgcn_wmma_f32_16x16x128_f8f6f4,
2733 amdgcn_wmma_scale_f32_16x16x128_f8f6f4,
2734 amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, amdgcn_wmma_f32_32x16x128_f4,
2735 amdgcn_wmma_scale_f32_32x16x128_f4, amdgcn_wmma_scale16_f32_32x16x128_f4,
2737 amdgcn_wmma_f64_16x16x4_f64,
2739 amdgcn_swmmac_f32_16x16x32_f16, amdgcn_swmmac_f32_16x16x32_bf16,
2740 amdgcn_swmmac_f16_16x16x32_f16, amdgcn_swmmac_bf16_16x16x32_bf16,
2741 amdgcn_swmmac_i32_16x16x32_iu8, amdgcn_swmmac_i32_16x16x32_iu4,
2742 amdgcn_swmmac_i32_16x16x64_iu4, amdgcn_swmmac_f32_16x16x32_fp8_fp8,
2743 amdgcn_swmmac_f32_16x16x32_fp8_bf8, amdgcn_swmmac_f32_16x16x32_bf8_fp8,
2744 amdgcn_swmmac_f32_16x16x32_bf8_bf8,
2746 amdgcn_swmmac_f32_16x16x64_f16, amdgcn_swmmac_f32_16x16x64_bf16,
2747 amdgcn_swmmac_f16_16x16x64_f16, amdgcn_swmmac_bf16_16x16x64_bf16,
2748 amdgcn_swmmac_bf16f32_16x16x64_bf16, amdgcn_swmmac_f32_16x16x128_fp8_fp8,
2749 amdgcn_swmmac_f32_16x16x128_fp8_bf8, amdgcn_swmmac_f32_16x16x128_bf8_fp8,
2750 amdgcn_swmmac_f32_16x16x128_bf8_bf8, amdgcn_swmmac_f16_16x16x128_fp8_fp8,
2751 amdgcn_swmmac_f16_16x16x128_fp8_bf8, amdgcn_swmmac_f16_16x16x128_bf8_fp8,
2752 amdgcn_swmmac_f16_16x16x128_bf8_bf8, amdgcn_swmmac_i32_16x16x128_iu8})