14#include "llvm/IR/IntrinsicsAMDGPU.h"
18using namespace MIPatternMatch;
22 switch (
MI.getOpcode()) {
28 case AMDGPU::G_FMINNUM:
29 case AMDGPU::G_FMAXNUM:
30 case AMDGPU::G_FMINNUM_IEEE:
31 case AMDGPU::G_FMAXNUM_IEEE:
32 case AMDGPU::G_FMINIMUM:
33 case AMDGPU::G_FMAXIMUM:
36 case AMDGPU::G_INTRINSIC_TRUNC:
37 case AMDGPU::G_FPTRUNC:
39 case AMDGPU::G_FNEARBYINT:
40 case AMDGPU::G_INTRINSIC_ROUND:
41 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
42 case AMDGPU::G_FCANONICALIZE:
43 case AMDGPU::G_AMDGPU_RCP_IFLAG:
44 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
45 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
47 case AMDGPU::G_INTRINSIC: {
49 switch (IntrinsicID) {
50 case Intrinsic::amdgcn_rcp:
51 case Intrinsic::amdgcn_rcp_legacy:
52 case Intrinsic::amdgcn_sin:
53 case Intrinsic::amdgcn_fmul_legacy:
54 case Intrinsic::amdgcn_fmed3:
55 case Intrinsic::amdgcn_fma_legacy:
72 return MI.getNumOperands() > (isa<GIntrinsic>(
MI) ? 4u : 3u) ||
79 if (!
MI.memoperands().empty())
82 switch (
MI.getOpcode()) {
84 case AMDGPU::G_SELECT:
87 case TargetOpcode::INLINEASM:
88 case TargetOpcode::INLINEASM_BR:
89 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
90 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
91 case AMDGPU::G_BITCAST:
92 case AMDGPU::G_ANYEXT:
93 case AMDGPU::G_BUILD_VECTOR:
94 case AMDGPU::G_BUILD_VECTOR_TRUNC:
97 case AMDGPU::G_INTRINSIC:
98 case AMDGPU::G_INTRINSIC_CONVERGENT: {
100 switch (IntrinsicID) {
101 case Intrinsic::amdgcn_interp_p1:
102 case Intrinsic::amdgcn_interp_p2:
103 case Intrinsic::amdgcn_interp_mov:
104 case Intrinsic::amdgcn_interp_p1_f16:
105 case Intrinsic::amdgcn_interp_p2_f16:
106 case Intrinsic::amdgcn_div_scale:
124 unsigned NumMayIncreaseSize = 0;
140 return Options.NoSignedZerosFPMath ||
MI.getFlag(MachineInstr::MIFlag::FmNsz);
144 static const APFloat KF16(APFloat::IEEEhalf(),
APInt(16, 0x3118));
145 static const APFloat KF32(APFloat::IEEEsingle(),
APInt(32, 0x3e22f983));
146 static const APFloat KF64(APFloat::IEEEdouble(),
147 APInt(64, 0x3fc45f306dc9c882));
157 std::optional<FPValueAndVReg> FPValReg;
159 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
163 if (ST.hasInv2PiInlineImm() &&
isInv2Pi(FPValReg->Value))
171 case AMDGPU::G_FMAXNUM:
172 return AMDGPU::G_FMINNUM;
173 case AMDGPU::G_FMINNUM:
174 return AMDGPU::G_FMAXNUM;
175 case AMDGPU::G_FMAXNUM_IEEE:
176 return AMDGPU::G_FMINNUM_IEEE;
177 case AMDGPU::G_FMINNUM_IEEE:
178 return AMDGPU::G_FMAXNUM_IEEE;
179 case AMDGPU::G_FMAXIMUM:
180 return AMDGPU::G_FMINIMUM;
181 case AMDGPU::G_FMINIMUM:
182 return AMDGPU::G_FMAXIMUM;
183 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
184 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
185 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
186 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
212 case AMDGPU::G_FMINNUM:
213 case AMDGPU::G_FMAXNUM:
214 case AMDGPU::G_FMINNUM_IEEE:
215 case AMDGPU::G_FMAXNUM_IEEE:
216 case AMDGPU::G_FMINIMUM:
217 case AMDGPU::G_FMAXIMUM:
218 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
219 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
229 case AMDGPU::G_FPEXT:
230 case AMDGPU::G_INTRINSIC_TRUNC:
231 case AMDGPU::G_FPTRUNC:
232 case AMDGPU::G_FRINT:
233 case AMDGPU::G_FNEARBYINT:
234 case AMDGPU::G_INTRINSIC_ROUND:
235 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
237 case AMDGPU::G_FCANONICALIZE:
238 case AMDGPU::G_AMDGPU_RCP_IFLAG:
240 case AMDGPU::G_INTRINSIC:
241 case AMDGPU::G_INTRINSIC_CONVERGENT: {
242 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
243 switch (IntrinsicID) {
244 case Intrinsic::amdgcn_rcp:
245 case Intrinsic::amdgcn_rcp_legacy:
246 case Intrinsic::amdgcn_sin:
247 case Intrinsic::amdgcn_fmul_legacy:
248 case Intrinsic::amdgcn_fmed3:
250 case Intrinsic::amdgcn_fma_legacy:
311 case AMDGPU::G_FMINNUM:
312 case AMDGPU::G_FMAXNUM:
313 case AMDGPU::G_FMINNUM_IEEE:
314 case AMDGPU::G_FMAXNUM_IEEE:
315 case AMDGPU::G_FMINIMUM:
316 case AMDGPU::G_FMAXIMUM:
317 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
318 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
330 case AMDGPU::G_FPEXT:
331 case AMDGPU::G_INTRINSIC_TRUNC:
332 case AMDGPU::G_FRINT:
333 case AMDGPU::G_FNEARBYINT:
334 case AMDGPU::G_INTRINSIC_ROUND:
335 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
337 case AMDGPU::G_FCANONICALIZE:
338 case AMDGPU::G_AMDGPU_RCP_IFLAG:
339 case AMDGPU::G_FPTRUNC:
342 case AMDGPU::G_INTRINSIC:
343 case AMDGPU::G_INTRINSIC_CONVERGENT: {
344 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
345 switch (IntrinsicID) {
346 case Intrinsic::amdgcn_rcp:
347 case Intrinsic::amdgcn_rcp_legacy:
348 case Intrinsic::amdgcn_sin:
351 case Intrinsic::amdgcn_fmul_legacy:
354 case Intrinsic::amdgcn_fmed3:
359 case Intrinsic::amdgcn_fma_legacy:
395 MI.eraseFromParent();
403 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
404 Register SrcReg = Def->getOperand(1).getReg();
408 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
409 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
410 bool LosesInfo =
true;
422 assert(
MI.getOpcode() == TargetOpcode::G_FPTRUNC);
446 MI.eraseFromParent();
unsigned const MachineRegisterInfo * MRI
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
Class for arbitrary precision integers.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
MachineIRBuilder & Builder
This class represents an Operation in the Expression.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.
DWARFExpression::Operation Op
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf() LLVM_READNONE