14#include "llvm/IR/IntrinsicsAMDGPU.h" 
   25      TII(*
STI.getInstrInfo()) {}
 
 
   29  switch (
MI.getOpcode()) {
 
   35  case AMDGPU::G_FMINNUM:
 
   36  case AMDGPU::G_FMAXNUM:
 
   37  case AMDGPU::G_FMINNUM_IEEE:
 
   38  case AMDGPU::G_FMAXNUM_IEEE:
 
   39  case AMDGPU::G_FMINIMUM:
 
   40  case AMDGPU::G_FMAXIMUM:
 
   43  case AMDGPU::G_INTRINSIC_TRUNC:
 
   44  case AMDGPU::G_FPTRUNC:
 
   46  case AMDGPU::G_FNEARBYINT:
 
   47  case AMDGPU::G_INTRINSIC_ROUND:
 
   48  case AMDGPU::G_INTRINSIC_ROUNDEVEN:
 
   49  case AMDGPU::G_FCANONICALIZE:
 
   50  case AMDGPU::G_AMDGPU_RCP_IFLAG:
 
   51  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
 
   52  case AMDGPU::G_AMDGPU_FMAX_LEGACY:
 
   54  case AMDGPU::G_INTRINSIC: {
 
   56    switch (IntrinsicID) {
 
   57    case Intrinsic::amdgcn_rcp:
 
   58    case Intrinsic::amdgcn_rcp_legacy:
 
   59    case Intrinsic::amdgcn_sin:
 
   60    case Intrinsic::amdgcn_fmul_legacy:
 
   61    case Intrinsic::amdgcn_fmed3:
 
   62    case Intrinsic::amdgcn_fma_legacy:
 
 
   80         MRI.getType(
MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
 
 
   86  if (!
MI.memoperands().empty())
 
   89  switch (
MI.getOpcode()) {
 
   91  case AMDGPU::G_SELECT:
 
   94  case TargetOpcode::INLINEASM:
 
   95  case TargetOpcode::INLINEASM_BR:
 
   96  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
 
   97  case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
 
   98  case AMDGPU::G_BITCAST:
 
   99  case AMDGPU::G_ANYEXT:
 
  100  case AMDGPU::G_BUILD_VECTOR:
 
  101  case AMDGPU::G_BUILD_VECTOR_TRUNC:
 
  104  case AMDGPU::G_INTRINSIC:
 
  105  case AMDGPU::G_INTRINSIC_CONVERGENT: {
 
  107    switch (IntrinsicID) {
 
  108    case Intrinsic::amdgcn_interp_p1:
 
  109    case Intrinsic::amdgcn_interp_p2:
 
  110    case Intrinsic::amdgcn_interp_mov:
 
  111    case Intrinsic::amdgcn_interp_p1_f16:
 
  112    case Intrinsic::amdgcn_interp_p2_f16:
 
  113    case Intrinsic::amdgcn_div_scale:
 
 
  131  unsigned NumMayIncreaseSize = 0;
 
 
  154                            APInt(64, 0x3fc45f306dc9c882));
 
 
  164  std::optional<FPValueAndVReg> FPValReg;
 
  166    if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
 
  170    if (ST.hasInv2PiInlineImm() && 
isInv2Pi(FPValReg->Value))
 
 
  178  case AMDGPU::G_FMAXNUM:
 
  179    return AMDGPU::G_FMINNUM;
 
  180  case AMDGPU::G_FMINNUM:
 
  181    return AMDGPU::G_FMAXNUM;
 
  182  case AMDGPU::G_FMAXNUM_IEEE:
 
  183    return AMDGPU::G_FMINNUM_IEEE;
 
  184  case AMDGPU::G_FMINNUM_IEEE:
 
  185    return AMDGPU::G_FMAXNUM_IEEE;
 
  186  case AMDGPU::G_FMAXIMUM:
 
  187    return AMDGPU::G_FMINIMUM;
 
  188  case AMDGPU::G_FMINIMUM:
 
  189    return AMDGPU::G_FMAXIMUM;
 
  190  case AMDGPU::G_AMDGPU_FMAX_LEGACY:
 
  191    return AMDGPU::G_AMDGPU_FMIN_LEGACY;
 
  192  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
 
  193    return AMDGPU::G_AMDGPU_FMAX_LEGACY;
 
 
  202  MatchInfo = 
MRI.getVRegDef(Src);
 
  208  if (
MRI.hasOneNonDBGUse(Src)) {
 
  219  case AMDGPU::G_FMINNUM:
 
  220  case AMDGPU::G_FMAXNUM:
 
  221  case AMDGPU::G_FMINNUM_IEEE:
 
  222  case AMDGPU::G_FMAXNUM_IEEE:
 
  223  case AMDGPU::G_FMINIMUM:
 
  224  case AMDGPU::G_FMAXIMUM:
 
  225  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
 
  226  case AMDGPU::G_AMDGPU_FMAX_LEGACY:
 
  236  case AMDGPU::G_FPEXT:
 
  237  case AMDGPU::G_INTRINSIC_TRUNC:
 
  238  case AMDGPU::G_FPTRUNC:
 
  239  case AMDGPU::G_FRINT:
 
  240  case AMDGPU::G_FNEARBYINT:
 
  241  case AMDGPU::G_INTRINSIC_ROUND:
 
  242  case AMDGPU::G_INTRINSIC_ROUNDEVEN:
 
  244  case AMDGPU::G_FCANONICALIZE:
 
  245  case AMDGPU::G_AMDGPU_RCP_IFLAG:
 
  247  case AMDGPU::G_INTRINSIC:
 
  248  case AMDGPU::G_INTRINSIC_CONVERGENT: {
 
  250    switch (IntrinsicID) {
 
  251    case Intrinsic::amdgcn_rcp:
 
  252    case Intrinsic::amdgcn_rcp_legacy:
 
  253    case Intrinsic::amdgcn_sin:
 
  254    case Intrinsic::amdgcn_fmul_legacy:
 
  255    case Intrinsic::amdgcn_fmed3:
 
  257    case Intrinsic::amdgcn_fma_legacy:
 
 
  287      Reg = 
Builder.buildFNeg(
MRI.getType(Reg), Reg).getReg(0);
 
  300      YReg = 
Builder.buildFNeg(
MRI.getType(YReg), YReg).getReg(0);
 
  305  Builder.setInstrAndDebugLoc(*MatchInfo);
 
  318  case AMDGPU::G_FMINNUM:
 
  319  case AMDGPU::G_FMAXNUM:
 
  320  case AMDGPU::G_FMINNUM_IEEE:
 
  321  case AMDGPU::G_FMAXNUM_IEEE:
 
  322  case AMDGPU::G_FMINIMUM:
 
  323  case AMDGPU::G_FMAXIMUM:
 
  324  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
 
  325  case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
 
  337  case AMDGPU::G_FPEXT:
 
  338  case AMDGPU::G_INTRINSIC_TRUNC:
 
  339  case AMDGPU::G_FRINT:
 
  340  case AMDGPU::G_FNEARBYINT:
 
  341  case AMDGPU::G_INTRINSIC_ROUND:
 
  342  case AMDGPU::G_INTRINSIC_ROUNDEVEN:
 
  344  case AMDGPU::G_FCANONICALIZE:
 
  345  case AMDGPU::G_AMDGPU_RCP_IFLAG:
 
  346  case AMDGPU::G_FPTRUNC:
 
  349  case AMDGPU::G_INTRINSIC:
 
  350  case AMDGPU::G_INTRINSIC_CONVERGENT: {
 
  352    switch (IntrinsicID) {
 
  353    case Intrinsic::amdgcn_rcp:
 
  354    case Intrinsic::amdgcn_rcp_legacy:
 
  355    case Intrinsic::amdgcn_sin:
 
  358    case Intrinsic::amdgcn_fmul_legacy:
 
  361    case Intrinsic::amdgcn_fmed3:
 
  366    case Intrinsic::amdgcn_fma_legacy:
 
  382  if (
MRI.hasOneNonDBGUse(MatchInfoDst)) {
 
  398    Builder.setInstrAndDebugLoc(*NextInst);
 
  399    Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, 
MI.getFlags());
 
  402  MI.eraseFromParent();
 
 
  410  if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
 
  411    Register SrcReg = Def->getOperand(1).getReg();
 
  415  if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
 
  416    APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
 
  417    bool LosesInfo = 
true;
 
 
  429  assert(
MI.getOpcode() == TargetOpcode::G_FPTRUNC);
 
 
  448  LLT Ty = 
MRI.getType(Src0);
 
  449  auto A1 = 
Builder.buildFMinNumIEEE(Ty, Src0, Src1);
 
  450  auto B1 = 
Builder.buildFMaxNumIEEE(Ty, Src0, Src1);
 
  451  auto C1 = 
Builder.buildFMaxNumIEEE(Ty, A1, Src2);
 
  452  Builder.buildFMinNumIEEE(
MI.getOperand(0), B1, C1);
 
  453  MI.eraseFromParent();
 
 
  459  assert(
MI.getOpcode() == TargetOpcode::G_FMUL);
 
  464  LLT DestTy = 
MRI.getType(Dst);
 
  476  const auto SelectTrueVal =
 
  480  const auto SelectFalseVal =
 
  485  if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
 
  489  if (ScalarDestTy == 
LLT::float32() && 
TII.isInlineConstant(*SelectTrueVal) &&
 
  490      TII.isInlineConstant(*SelectFalseVal))
 
  493  int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
 
  494  if (SelectTrueLog2Val == INT_MIN)
 
  496  int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
 
  497  if (SelectFalseLog2Val == INT_MIN)
 
  502    auto NewSel = 
Builder.buildSelect(
 
  503        IntDestTy, SelectCondReg,
 
  504        Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
 
  505        Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
 
  508    if (SelectTrueVal->isNegative()) {
 
  510          Builder.buildFNeg(DestTy, XReg, 
MRI.getVRegDef(XReg)->getFlags());
 
  511      Builder.buildFLdexp(Dst, NegX, NewSel, 
MI.getFlags());
 
  513      Builder.buildFLdexp(Dst, XReg, NewSel, 
MI.getFlags());
 
 
  525  const uint64_t Val = Res->Value.getZExtValue();
 
  526  unsigned MaskIdx = 0;
 
  527  unsigned MaskLen = 0;
 
  532  return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));
 
 
unsigned const MachineRegisterInfo * MRI
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
 
static bool isInv2Pi(const APFloat &APF)
 
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
 
static bool mayIgnoreSignedZero(MachineInstr &MI)
 
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
 
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
 
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
 
static unsigned inverseMinMax(unsigned Opc)
 
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
 
This contains common combine transformations that may be used in a combine pass.
 
Provides AMDGPU specific target descriptions.
 
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
 
AMD GCN specific subclass of TargetSubtarget.
 
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
 
Contains matchers for matching SSA Machine Instructions.
 
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
 
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
 
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)
 
bool matchConstantIs32BitMask(Register Reg) const
 
bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo) const
 
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
 
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
 
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
 
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
 
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
 
static const fltSemantics & IEEEsingle()
 
static const fltSemantics & IEEEdouble()
 
static constexpr roundingMode rmNearestTiesToEven
 
static const fltSemantics & IEEEhalf()
 
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
 
bool bitwiseIsEqual(const APFloat &RHS) const
 
Class for arbitrary precision integers.
 
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
 
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
 
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
 
MachineRegisterInfo & MRI
 
MachineDominatorTree * MDT
 
GISelChangeObserver & Observer
 
MachineIRBuilder & Builder
 
Abstract class that contains various methods for clients to notify about changes.
 
static constexpr LLT float64()
Get a 64-bit IEEE double value.
 
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
 
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
 
static constexpr LLT float16()
Get a 16-bit IEEE half value.
 
constexpr LLT getScalarType() const
 
static constexpr LLT float32()
Get a 32-bit IEEE float value.
 
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
 
Helper class to build MachineInstr.
 
Representation of each machine instruction.
 
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
 
const MachineOperand & getOperand(unsigned i) const
 
MachineOperand class - Representation of each machine instruction operand.
 
Register getReg() const
getReg - Returns the register number.
 
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
 
Wrapper class representing virtual and physical registers.
 
The instances of the Type class are immutable: once they are created, they are never changed.
 
A Use represents the edge between a Value definition and its users.
 
self_iterator getIterator()
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
operand_type_match m_Reg()
 
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
 
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
 
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
 
This is an optimization pass for GlobalISel generic memory operations.
 
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
 
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
 
DWARFExpression::Operation Op
 
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
 
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
 
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
 
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))