14#include "llvm/IR/IntrinsicsAMDGPU.h"
18using namespace MIPatternMatch;
22 switch (
MI.getOpcode()) {
28 case AMDGPU::G_FMINNUM:
29 case AMDGPU::G_FMAXNUM:
30 case AMDGPU::G_FMINNUM_IEEE:
31 case AMDGPU::G_FMAXNUM_IEEE:
34 case AMDGPU::G_INTRINSIC_TRUNC:
35 case AMDGPU::G_FPTRUNC:
37 case AMDGPU::G_FNEARBYINT:
38 case AMDGPU::G_INTRINSIC_ROUND:
39 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
40 case AMDGPU::G_FCANONICALIZE:
41 case AMDGPU::G_AMDGPU_RCP_IFLAG:
42 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
43 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
45 case AMDGPU::G_INTRINSIC: {
46 unsigned IntrinsicID = cast<GIntrinsic>(
MI).getIntrinsicID();
47 switch (IntrinsicID) {
48 case Intrinsic::amdgcn_rcp:
49 case Intrinsic::amdgcn_rcp_legacy:
50 case Intrinsic::amdgcn_sin:
51 case Intrinsic::amdgcn_fmul_legacy:
52 case Intrinsic::amdgcn_fmed3:
53 case Intrinsic::amdgcn_fma_legacy:
70 return MI.getNumOperands() > (isa<GIntrinsic>(
MI) ? 4u : 3u) ||
77 if (!
MI.memoperands().empty())
80 switch (
MI.getOpcode()) {
82 case AMDGPU::G_SELECT:
85 case TargetOpcode::INLINEASM:
86 case TargetOpcode::INLINEASM_BR:
87 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
88 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
89 case AMDGPU::G_BITCAST:
90 case AMDGPU::G_ANYEXT:
91 case AMDGPU::G_BUILD_VECTOR:
92 case AMDGPU::G_BUILD_VECTOR_TRUNC:
95 case AMDGPU::G_INTRINSIC:
96 case AMDGPU::G_INTRINSIC_CONVERGENT: {
97 unsigned IntrinsicID = cast<GIntrinsic>(
MI).getIntrinsicID();
98 switch (IntrinsicID) {
99 case Intrinsic::amdgcn_interp_p1:
100 case Intrinsic::amdgcn_interp_p2:
101 case Intrinsic::amdgcn_interp_mov:
102 case Intrinsic::amdgcn_interp_p1_f16:
103 case Intrinsic::amdgcn_interp_p2_f16:
104 case Intrinsic::amdgcn_div_scale:
122 unsigned NumMayIncreaseSize = 0;
138 return Options.NoSignedZerosFPMath ||
MI.getFlag(MachineInstr::MIFlag::FmNsz);
142 static const APFloat KF16(APFloat::IEEEhalf(),
APInt(16, 0x3118));
143 static const APFloat KF32(APFloat::IEEEsingle(),
APInt(32, 0x3e22f983));
144 static const APFloat KF64(APFloat::IEEEdouble(),
145 APInt(64, 0x3fc45f306dc9c882));
155 std::optional<FPValueAndVReg> FPValReg;
157 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
161 if (ST.hasInv2PiInlineImm() &&
isInv2Pi(FPValReg->Value))
169 case AMDGPU::G_FMAXNUM:
170 return AMDGPU::G_FMINNUM;
171 case AMDGPU::G_FMINNUM:
172 return AMDGPU::G_FMAXNUM;
173 case AMDGPU::G_FMAXNUM_IEEE:
174 return AMDGPU::G_FMINNUM_IEEE;
175 case AMDGPU::G_FMINNUM_IEEE:
176 return AMDGPU::G_FMAXNUM_IEEE;
177 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
178 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
179 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
180 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
206 case AMDGPU::G_FMINNUM:
207 case AMDGPU::G_FMAXNUM:
208 case AMDGPU::G_FMINNUM_IEEE:
209 case AMDGPU::G_FMAXNUM_IEEE:
210 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
211 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
221 case AMDGPU::G_FPEXT:
222 case AMDGPU::G_INTRINSIC_TRUNC:
223 case AMDGPU::G_FPTRUNC:
224 case AMDGPU::G_FRINT:
225 case AMDGPU::G_FNEARBYINT:
226 case AMDGPU::G_INTRINSIC_ROUND:
227 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
229 case AMDGPU::G_FCANONICALIZE:
230 case AMDGPU::G_AMDGPU_RCP_IFLAG:
232 case AMDGPU::G_INTRINSIC:
233 case AMDGPU::G_INTRINSIC_CONVERGENT: {
234 unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
235 switch (IntrinsicID) {
236 case Intrinsic::amdgcn_rcp:
237 case Intrinsic::amdgcn_rcp_legacy:
238 case Intrinsic::amdgcn_sin:
239 case Intrinsic::amdgcn_fmul_legacy:
240 case Intrinsic::amdgcn_fmed3:
242 case Intrinsic::amdgcn_fma_legacy:
303 case AMDGPU::G_FMINNUM:
304 case AMDGPU::G_FMAXNUM:
305 case AMDGPU::G_FMINNUM_IEEE:
306 case AMDGPU::G_FMAXNUM_IEEE:
307 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
308 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
320 case AMDGPU::G_FPEXT:
321 case AMDGPU::G_INTRINSIC_TRUNC:
322 case AMDGPU::G_FRINT:
323 case AMDGPU::G_FNEARBYINT:
324 case AMDGPU::G_INTRINSIC_ROUND:
325 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
327 case AMDGPU::G_FCANONICALIZE:
328 case AMDGPU::G_AMDGPU_RCP_IFLAG:
329 case AMDGPU::G_FPTRUNC:
332 case AMDGPU::G_INTRINSIC:
333 case AMDGPU::G_INTRINSIC_CONVERGENT: {
334 unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
335 switch (IntrinsicID) {
336 case Intrinsic::amdgcn_rcp:
337 case Intrinsic::amdgcn_rcp_legacy:
338 case Intrinsic::amdgcn_sin:
341 case Intrinsic::amdgcn_fmul_legacy:
344 case Intrinsic::amdgcn_fmed3:
349 case Intrinsic::amdgcn_fma_legacy:
385 MI.eraseFromParent();
393 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
394 Register SrcReg = Def->getOperand(1).getReg();
398 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
399 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
400 bool LosesInfo =
true;
401 Val.
convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
412 assert(
MI.getOpcode() == TargetOpcode::G_FPTRUNC);
438 MI.eraseFromParent();
unsigned const MachineRegisterInfo * MRI
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
Class for arbitrary precision integers.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
MachineIRBuilder & Builder
This class represents an Operation in the Expression.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.
DWARFExpression::Operation Op