519 : ST(&_ST), MRI(&_MRI) {
521 addRulesForGOpcs({G_ADD, G_SUB},
Standard)
531 addRulesForGOpcs({G_UADDO, G_USUBO},
Standard)
535 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE},
Standard)
539 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT},
Standard)
547 bool HasVecMulU64 = ST->hasVectorMulU64();
559 bool hasMulHi = ST->hasScalarMulHiInsts();
560 addRulesForGOpcs({G_UMULH, G_SMULH},
Standard)
565 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32},
Standard)
569 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
570 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32},
Standard)
574 addRulesForGOpcs({G_XOR, G_OR, G_AND},
StandardB)
594 addRulesForGOpcs({G_LSHR},
Standard)
604 addRulesForGOpcs({G_ASHR},
Standard)
614 addRulesForGOpcs({G_FSHR},
Standard)
618 addRulesForGOpcs({G_BSWAP},
Standard)
626 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
627 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
635 addRulesForGOpcs({G_UBFX, G_SBFX},
Standard)
641 addRulesForGOpcs({G_SMIN, G_SMAX},
Standard)
649 addRulesForGOpcs({G_UMIN, G_UMAX},
Standard)
660 addRulesForGOpcs({G_CONSTANT})
663 addRulesForGOpcs({G_FREEZE})
670 addRulesForGOpcs({G_UNMERGE_VALUES})
675 addRulesForGOpcs({G_PHI})
681 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
696 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
697 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
698 G_AMDGPU_INTRIN_IMAGE_STORE,
699 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
714 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
716 addRulesForGOpcs({G_ICMP})
735 addRulesForGOpcs({G_BRCOND})
739 addRulesForGOpcs({G_BR}).
Any({{
_}, {{}, {
None}}});
749 addRulesForGOpcs({G_ANYEXT})
761 bool Has16bitCmp = ST->has16BitInsts();
765 addRulesForGOpcs({G_TRUNC})
782 addRulesForGOpcs({G_ZEXT})
797 addRulesForGOpcs({G_SEXT})
812 addRulesForGOpcs({G_SEXT_INREG})
818 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT},
Standard)
824 addRulesForGOpcs({G_ASSERT_ALIGN},
Standard)
836 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
837 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
838 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
839 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
840 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
848 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
849 bool HasAtomicBufferGlobalPkAddF16Insts =
850 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
851 ST->hasAtomicBufferGlobalPkAddF16Insts();
852 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
853 addRulesForGOpcs({G_ATOMICRMW_FADD})
861 HasAtomicFlatPkAdd16Insts)
863 HasAtomicBufferGlobalPkAddF16Insts)
865 HasAtomicDsPkAdd16Insts);
867 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
873 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
879 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP},
Standard)
885 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
886 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
887 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
888 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
889 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
890 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
891 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
896 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
897 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
898 bool usesTrue16 = ST->useRealTrue16Insts();
901 return (*
MI.memoperands_begin())->getAlign() >=
Align(16);
905 return (*
MI.memoperands_begin())->getAlign() >=
Align(4);
909 return (*
MI.memoperands_begin())->isAtomic();
925 return (*
MI.memoperands_begin())->isVolatile();
929 return (*
MI.memoperands_begin())->isInvariant();
944 return MemSize == 16 || MemSize == 8;
952 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
953 (isConst || isInvMMO || isNoClobberMMO);
957 addRulesForGOpcs({G_LOAD})
1064 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD})
1084 addRulesForGOpcs({G_STORE})
1120 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1121 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1132 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1133 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1139 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1144 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1157 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1166 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1167 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1168 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1169 G_AMDGPU_TBUFFER_STORE_FORMAT,
1170 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1182 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1191 addRulesForGOpcs({G_PTR_ADD})
1197 addRulesForGOpcs({G_INTTOPTR})
1205 addRulesForGOpcs({G_PTRTOINT})
1215 addRulesForGOpcs({G_PTRMASK})
1223 addRulesForGOpcs({G_BITREVERSE},
Standard)
1229 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_UNDEF,
1236 addRulesForGOpcs({G_FENCE}).
Any({{{}}, {{}, {}}});
1238 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER},
Standard)
1243 addRulesForGOpcs({G_GLOBAL_VALUE})
1250 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).
Any({{
UniP5}, {{
SgprP5}, {}}});
1252 addRulesForGOpcs({G_SI_CALL})
1258 bool hasSALUFloat = ST->hasSALUFloatInsts();
1260 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL},
Standard)
1274 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB},
Standard)
1282 addRulesForGOpcs({G_FMAD},
Standard)
1288 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP},
Standard)
1296 addRulesForGOpcs({G_FMA, G_STRICT_FMA},
Standard)
1314 addRulesForGOpcs({G_AMDGPU_FMED3},
Standard)
1323 addRulesForGOpcs({G_AMDGPU_SMED3},
Standard)
1331 addRulesForGOpcs({G_FNEG, G_FABS},
Standard)
1346 addRulesForGOpcs({G_FCANONICALIZE},
Standard)
1358 bool hasPST = ST->hasPseudoScalarTrans();
1359 addRulesForGOpcs({G_FSQRT},
Standard)
1364 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1376 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1388 addRulesForGOpcs({G_FPEXT})
1395 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32},
Standard)
1399 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY},
Standard)
1403 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1405 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM},
Standard)
1417 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1430 addRulesForGOpcs({G_FPTRUNC})
1439 addRulesForGOpcs({G_IS_FPCLASS})
1447 addRulesForGOpcs({G_FCMP},
Standard)
1461 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1477 addRulesForIOpcs({amdgcn_s_setreg})
1480 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1483 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1487 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime},
Standard)
1490 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1491 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1496 addRulesForIOpcs({amdgcn_endpgm,
1498 amdgcn_s_barrier_signal,
1499 amdgcn_s_barrier_wait,
1504 amdgcn_s_ttracedata_imm,
1505 amdgcn_s_wait_asynccnt,
1506 amdgcn_s_wait_bvhcnt,
1507 amdgcn_s_wait_dscnt,
1508 amdgcn_s_wait_event,
1509 amdgcn_s_wait_event_export_ready,
1510 amdgcn_s_wait_expcnt,
1511 amdgcn_s_wait_kmcnt,
1512 amdgcn_s_wait_loadcnt,
1513 amdgcn_s_wait_samplecnt,
1514 amdgcn_s_wait_storecnt,
1515 amdgcn_s_wait_tensorcnt,
1517 amdgcn_wave_barrier})
1518 .
Any({{}, {{}, {}}});
1522 addRulesForIOpcs({amdgcn_s_sleep_var})
1525 addRulesForIOpcs({amdgcn_s_prefetch_data})
1528 addRulesForIOpcs({amdgcn_class})
1537 addRulesForIOpcs({amdgcn_end_cf})
1541 addRulesForIOpcs({amdgcn_if_break},
Standard)
1545 addRulesForIOpcs({amdgcn_exp})
1549 addRulesForIOpcs({amdgcn_exp_compr})
1552 addRulesForIOpcs({amdgcn_exp_row})
1558 addRulesForIOpcs({amdgcn_lds_direct_load},
StandardB)
1561 addRulesForIOpcs({amdgcn_lds_param_load},
Standard)
1564 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi},
Standard)
1567 addRulesForIOpcs({amdgcn_readfirstlane})
1573 addRulesForIOpcs({amdgcn_readlane},
StandardB)
1576 addRulesForIOpcs({amdgcn_writelane},
StandardB)
1581 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16},
Standard)
1586 addRulesForIOpcs({amdgcn_perm},
Standard)
1590 addRulesForIOpcs({amdgcn_wave_reduce_umax, amdgcn_wave_reduce_umin},
Standard)
1596 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz},
Standard)
1602 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1603 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1604 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1609 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24},
Standard)
1615 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1616 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1622 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1628 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract},
Standard)
1636 addRulesForIOpcs({amdgcn_prng_b32})
1640 addRulesForIOpcs({amdgcn_sffbh},
Standard)
1644 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe},
Standard)
1650 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1651 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1656 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1660 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1664 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1668 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num},
Standard)
1671 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1675 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1678 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1682 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1685 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1688 addRulesForIOpcs({amdgcn_global_load_lds})
1691 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1707 addRulesForIOpcs({amdgcn_wqm_demote}).
Any({{}, {{}, {
IntrId,
Vcc}}});
1709 addRulesForIOpcs({amdgcn_inverse_ballot})
1713 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1716 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8},
StandardB)
1720 addRulesForIOpcs({amdgcn_update_dpp},
StandardB)
1724 addRulesForIOpcs({amdgcn_sin, amdgcn_cos},
Standard)
1730 addRulesForIOpcs({amdgcn_trig_preop},
Standard)
1734 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
1739 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume},
Standard)
1744 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn},
Standard)
1747 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn},
Standard)
1750 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn},
Standard)
1753 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap},
Standard)
1756 addRulesForIOpcs({amdgcn_ds_swizzle},
Standard)
1760 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var},
Standard)
1763 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap},
Standard)
1766 addRulesForIOpcs({amdgcn_permlane64},
StandardB)
1769 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
1772 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
1775 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
1778 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
1779 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
1784 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
1789 addRulesForIOpcs({amdgcn_div_fmas},
Standard)
1795 addRulesForIOpcs({amdgcn_div_fixup},
Standard)
1803 addRulesForIOpcs({amdgcn_div_scale},
Standard)
1809 addRulesForIOpcs({amdgcn_udot2, amdgcn_sdot2},
Standard)
1813 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8},
Standard)