14#include "llvm/IR/IntrinsicsAMDGPU.h"
18using namespace MIPatternMatch;
25 TII(*STI.getInstrInfo()) {}
29 switch (
MI.getOpcode()) {
35 case AMDGPU::G_FMINNUM:
36 case AMDGPU::G_FMAXNUM:
37 case AMDGPU::G_FMINNUM_IEEE:
38 case AMDGPU::G_FMAXNUM_IEEE:
39 case AMDGPU::G_FMINIMUM:
40 case AMDGPU::G_FMAXIMUM:
43 case AMDGPU::G_INTRINSIC_TRUNC:
44 case AMDGPU::G_FPTRUNC:
46 case AMDGPU::G_FNEARBYINT:
47 case AMDGPU::G_INTRINSIC_ROUND:
48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
49 case AMDGPU::G_FCANONICALIZE:
50 case AMDGPU::G_AMDGPU_RCP_IFLAG:
51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
54 case AMDGPU::G_INTRINSIC: {
56 switch (IntrinsicID) {
57 case Intrinsic::amdgcn_rcp:
58 case Intrinsic::amdgcn_rcp_legacy:
59 case Intrinsic::amdgcn_sin:
60 case Intrinsic::amdgcn_fmul_legacy:
61 case Intrinsic::amdgcn_fmed3:
62 case Intrinsic::amdgcn_fma_legacy:
79 return MI.getNumOperands() > (isa<GIntrinsic>(
MI) ? 4u : 3u) ||
86 if (!
MI.memoperands().empty())
89 switch (
MI.getOpcode()) {
91 case AMDGPU::G_SELECT:
94 case TargetOpcode::INLINEASM:
95 case TargetOpcode::INLINEASM_BR:
96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
98 case AMDGPU::G_BITCAST:
99 case AMDGPU::G_ANYEXT:
100 case AMDGPU::G_BUILD_VECTOR:
101 case AMDGPU::G_BUILD_VECTOR_TRUNC:
104 case AMDGPU::G_INTRINSIC:
105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
107 switch (IntrinsicID) {
108 case Intrinsic::amdgcn_interp_p1:
109 case Intrinsic::amdgcn_interp_p2:
110 case Intrinsic::amdgcn_interp_mov:
111 case Intrinsic::amdgcn_interp_p1_f16:
112 case Intrinsic::amdgcn_interp_p2_f16:
113 case Intrinsic::amdgcn_div_scale:
131 unsigned NumMayIncreaseSize = 0;
154 APInt(64, 0x3fc45f306dc9c882));
164 std::optional<FPValueAndVReg> FPValReg;
166 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
170 if (ST.hasInv2PiInlineImm() &&
isInv2Pi(FPValReg->Value))
178 case AMDGPU::G_FMAXNUM:
179 return AMDGPU::G_FMINNUM;
180 case AMDGPU::G_FMINNUM:
181 return AMDGPU::G_FMAXNUM;
182 case AMDGPU::G_FMAXNUM_IEEE:
183 return AMDGPU::G_FMINNUM_IEEE;
184 case AMDGPU::G_FMINNUM_IEEE:
185 return AMDGPU::G_FMAXNUM_IEEE;
186 case AMDGPU::G_FMAXIMUM:
187 return AMDGPU::G_FMINIMUM;
188 case AMDGPU::G_FMINIMUM:
189 return AMDGPU::G_FMAXIMUM;
190 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
191 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
192 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
193 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
219 case AMDGPU::G_FMINNUM:
220 case AMDGPU::G_FMAXNUM:
221 case AMDGPU::G_FMINNUM_IEEE:
222 case AMDGPU::G_FMAXNUM_IEEE:
223 case AMDGPU::G_FMINIMUM:
224 case AMDGPU::G_FMAXIMUM:
225 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
226 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
236 case AMDGPU::G_FPEXT:
237 case AMDGPU::G_INTRINSIC_TRUNC:
238 case AMDGPU::G_FPTRUNC:
239 case AMDGPU::G_FRINT:
240 case AMDGPU::G_FNEARBYINT:
241 case AMDGPU::G_INTRINSIC_ROUND:
242 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
244 case AMDGPU::G_FCANONICALIZE:
245 case AMDGPU::G_AMDGPU_RCP_IFLAG:
247 case AMDGPU::G_INTRINSIC:
248 case AMDGPU::G_INTRINSIC_CONVERGENT: {
249 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
250 switch (IntrinsicID) {
251 case Intrinsic::amdgcn_rcp:
252 case Intrinsic::amdgcn_rcp_legacy:
253 case Intrinsic::amdgcn_sin:
254 case Intrinsic::amdgcn_fmul_legacy:
255 case Intrinsic::amdgcn_fmed3:
257 case Intrinsic::amdgcn_fma_legacy:
318 case AMDGPU::G_FMINNUM:
319 case AMDGPU::G_FMAXNUM:
320 case AMDGPU::G_FMINNUM_IEEE:
321 case AMDGPU::G_FMAXNUM_IEEE:
322 case AMDGPU::G_FMINIMUM:
323 case AMDGPU::G_FMAXIMUM:
324 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
325 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
337 case AMDGPU::G_FPEXT:
338 case AMDGPU::G_INTRINSIC_TRUNC:
339 case AMDGPU::G_FRINT:
340 case AMDGPU::G_FNEARBYINT:
341 case AMDGPU::G_INTRINSIC_ROUND:
342 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
344 case AMDGPU::G_FCANONICALIZE:
345 case AMDGPU::G_AMDGPU_RCP_IFLAG:
346 case AMDGPU::G_FPTRUNC:
349 case AMDGPU::G_INTRINSIC:
350 case AMDGPU::G_INTRINSIC_CONVERGENT: {
351 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
352 switch (IntrinsicID) {
353 case Intrinsic::amdgcn_rcp:
354 case Intrinsic::amdgcn_rcp_legacy:
355 case Intrinsic::amdgcn_sin:
358 case Intrinsic::amdgcn_fmul_legacy:
361 case Intrinsic::amdgcn_fmed3:
366 case Intrinsic::amdgcn_fma_legacy:
402 MI.eraseFromParent();
410 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
411 Register SrcReg = Def->getOperand(1).getReg();
415 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
416 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
417 bool LosesInfo =
true;
429 assert(
MI.getOpcode() == TargetOpcode::G_FPTRUNC);
453 MI.eraseFromParent();
459 assert(
MI.getOpcode() == TargetOpcode::G_FMUL);
476 const auto SelectTrueVal =
480 const auto SelectFalseVal =
485 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
493 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
494 if (SelectTrueLog2Val == INT_MIN)
496 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
497 if (SelectFalseLog2Val == INT_MIN)
503 IntDestTy, SelectCondReg,
508 if (SelectTrueVal->isNegative()) {
unsigned const MachineRegisterInfo * MRI
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo)
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
Class for arbitrary precision integers.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
MachineIRBuilder & Builder
This class represents an Operation in the Expression.
Abstract class that contains various methods for clients to notify about changes.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr LLT getScalarType() const
static constexpr LLT float32()
Get a 32-bit IEEE float value.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Helper class to build MachineInstr.
MachineInstrBuilder buildFLdexp(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FLDEXP Src0, Src1.
MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
bool isInlineConstant(const APInt &Imm) const
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.
DWARFExpression::Operation Op
std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE