LLVM 20.0.0git
AMDGPUCombinerHelper.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "GCNSubtarget.h"
14#include "llvm/IR/IntrinsicsAMDGPU.h"
16
17using namespace llvm;
18using namespace MIPatternMatch;
19
21 GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
23 const GCNSubtarget &STI)
24 : CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),
25 TII(*STI.getInstrInfo()) {}
26
28static bool fnegFoldsIntoMI(const MachineInstr &MI) {
29 switch (MI.getOpcode()) {
30 case AMDGPU::G_FADD:
31 case AMDGPU::G_FSUB:
32 case AMDGPU::G_FMUL:
33 case AMDGPU::G_FMA:
34 case AMDGPU::G_FMAD:
35 case AMDGPU::G_FMINNUM:
36 case AMDGPU::G_FMAXNUM:
37 case AMDGPU::G_FMINNUM_IEEE:
38 case AMDGPU::G_FMAXNUM_IEEE:
39 case AMDGPU::G_FMINIMUM:
40 case AMDGPU::G_FMAXIMUM:
41 case AMDGPU::G_FSIN:
42 case AMDGPU::G_FPEXT:
43 case AMDGPU::G_INTRINSIC_TRUNC:
44 case AMDGPU::G_FPTRUNC:
45 case AMDGPU::G_FRINT:
46 case AMDGPU::G_FNEARBYINT:
47 case AMDGPU::G_INTRINSIC_ROUND:
48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
49 case AMDGPU::G_FCANONICALIZE:
50 case AMDGPU::G_AMDGPU_RCP_IFLAG:
51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
53 return true;
54 case AMDGPU::G_INTRINSIC: {
55 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
56 switch (IntrinsicID) {
57 case Intrinsic::amdgcn_rcp:
58 case Intrinsic::amdgcn_rcp_legacy:
59 case Intrinsic::amdgcn_sin:
60 case Intrinsic::amdgcn_fmul_legacy:
61 case Intrinsic::amdgcn_fmed3:
62 case Intrinsic::amdgcn_fma_legacy:
63 return true;
64 default:
65 return false;
66 }
67 }
68 default:
69 return false;
70 }
71}
72
73/// \p returns true if the operation will definitely need to use a 64-bit
74/// encoding, and thus will use a VOP3 encoding regardless of the source
75/// modifiers.
78 const MachineRegisterInfo &MRI) {
79 return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
80 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
81}
82
83// Most FP instructions support source modifiers.
85static bool hasSourceMods(const MachineInstr &MI) {
86 if (!MI.memoperands().empty())
87 return false;
88
89 switch (MI.getOpcode()) {
90 case AMDGPU::COPY:
91 case AMDGPU::G_SELECT:
92 case AMDGPU::G_FDIV:
93 case AMDGPU::G_FREM:
94 case TargetOpcode::INLINEASM:
95 case TargetOpcode::INLINEASM_BR:
96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
98 case AMDGPU::G_BITCAST:
99 case AMDGPU::G_ANYEXT:
100 case AMDGPU::G_BUILD_VECTOR:
101 case AMDGPU::G_BUILD_VECTOR_TRUNC:
102 case AMDGPU::G_PHI:
103 return false;
104 case AMDGPU::G_INTRINSIC:
105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
106 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
107 switch (IntrinsicID) {
108 case Intrinsic::amdgcn_interp_p1:
109 case Intrinsic::amdgcn_interp_p2:
110 case Intrinsic::amdgcn_interp_mov:
111 case Intrinsic::amdgcn_interp_p1_f16:
112 case Intrinsic::amdgcn_interp_p2_f16:
113 case Intrinsic::amdgcn_div_scale:
114 return false;
115 default:
116 return true;
117 }
118 }
119 default:
120 return true;
121 }
122}
123
125 unsigned CostThreshold = 4) {
126 // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
127 // it is truly free to use a source modifier in all cases. If there are
128 // multiple users but for each one will necessitate using VOP3, there will be
129 // a code size increase. Try to avoid increasing code size unless we know it
130 // will save on the instruction count.
131 unsigned NumMayIncreaseSize = 0;
132 Register Dst = MI.getOperand(0).getReg();
133 for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
134 if (!hasSourceMods(Use))
135 return false;
136
138 if (++NumMayIncreaseSize > CostThreshold)
139 return false;
140 }
141 }
142 return true;
143}
144
146 const TargetOptions &Options = MI.getMF()->getTarget().Options;
147 return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
148}
149
150static bool isInv2Pi(const APFloat &APF) {
151 static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
152 static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
153 static const APFloat KF64(APFloat::IEEEdouble(),
154 APInt(64, 0x3fc45f306dc9c882));
155
156 return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
157 APF.bitwiseIsEqual(KF64);
158}
159
160// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
161// additional cost to negate them.
164 std::optional<FPValueAndVReg> FPValReg;
165 if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
166 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
167 return true;
168
169 const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
170 if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
171 return true;
172 }
173 return false;
174}
175
176static unsigned inverseMinMax(unsigned Opc) {
177 switch (Opc) {
178 case AMDGPU::G_FMAXNUM:
179 return AMDGPU::G_FMINNUM;
180 case AMDGPU::G_FMINNUM:
181 return AMDGPU::G_FMAXNUM;
182 case AMDGPU::G_FMAXNUM_IEEE:
183 return AMDGPU::G_FMINNUM_IEEE;
184 case AMDGPU::G_FMINNUM_IEEE:
185 return AMDGPU::G_FMAXNUM_IEEE;
186 case AMDGPU::G_FMAXIMUM:
187 return AMDGPU::G_FMINIMUM;
188 case AMDGPU::G_FMINIMUM:
189 return AMDGPU::G_FMAXIMUM;
190 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
191 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
192 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
193 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
194 default:
195 llvm_unreachable("invalid min/max opcode");
196 }
197}
198
200 MachineInstr *&MatchInfo) {
201 Register Src = MI.getOperand(1).getReg();
202 MatchInfo = MRI.getVRegDef(Src);
203
204 // If the input has multiple uses and we can either fold the negate down, or
205 // the other uses cannot, give up. This both prevents unprofitable
206 // transformations and infinite loops: we won't repeatedly try to fold around
207 // a negate that has no 'good' form.
208 if (MRI.hasOneNonDBGUse(Src)) {
210 return false;
211 } else {
212 if (fnegFoldsIntoMI(*MatchInfo) &&
214 !allUsesHaveSourceMods(*MatchInfo, MRI)))
215 return false;
216 }
217
218 switch (MatchInfo->getOpcode()) {
219 case AMDGPU::G_FMINNUM:
220 case AMDGPU::G_FMAXNUM:
221 case AMDGPU::G_FMINNUM_IEEE:
222 case AMDGPU::G_FMAXNUM_IEEE:
223 case AMDGPU::G_FMINIMUM:
224 case AMDGPU::G_FMAXIMUM:
225 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
226 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
227 // 0 doesn't have a negated inline immediate.
228 return !isConstantCostlierToNegate(*MatchInfo,
229 MatchInfo->getOperand(2).getReg(), MRI);
230 case AMDGPU::G_FADD:
231 case AMDGPU::G_FSUB:
232 case AMDGPU::G_FMA:
233 case AMDGPU::G_FMAD:
234 return mayIgnoreSignedZero(*MatchInfo);
235 case AMDGPU::G_FMUL:
236 case AMDGPU::G_FPEXT:
237 case AMDGPU::G_INTRINSIC_TRUNC:
238 case AMDGPU::G_FPTRUNC:
239 case AMDGPU::G_FRINT:
240 case AMDGPU::G_FNEARBYINT:
241 case AMDGPU::G_INTRINSIC_ROUND:
242 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
243 case AMDGPU::G_FSIN:
244 case AMDGPU::G_FCANONICALIZE:
245 case AMDGPU::G_AMDGPU_RCP_IFLAG:
246 return true;
247 case AMDGPU::G_INTRINSIC:
248 case AMDGPU::G_INTRINSIC_CONVERGENT: {
249 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
250 switch (IntrinsicID) {
251 case Intrinsic::amdgcn_rcp:
252 case Intrinsic::amdgcn_rcp_legacy:
253 case Intrinsic::amdgcn_sin:
254 case Intrinsic::amdgcn_fmul_legacy:
255 case Intrinsic::amdgcn_fmed3:
256 return true;
257 case Intrinsic::amdgcn_fma_legacy:
258 return mayIgnoreSignedZero(*MatchInfo);
259 default:
260 return false;
261 }
262 }
263 default:
264 return false;
265 }
266}
267
269 MachineInstr *&MatchInfo) {
270 // Transform:
271 // %A = inst %Op1, ...
272 // %B = fneg %A
273 //
274 // into:
275 //
276 // (if %A has one use, specifically fneg above)
277 // %B = inst (maybe fneg %Op1), ...
278 //
279 // (if %A has multiple uses)
280 // %B = inst (maybe fneg %Op1), ...
281 // %A = fneg %B
282
283 // Replace register in operand with a register holding negated value.
284 auto NegateOperand = [&](MachineOperand &Op) {
285 Register Reg = Op.getReg();
286 if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
287 Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
288 replaceRegOpWith(MRI, Op, Reg);
289 };
290
291 // Replace either register in operands with a register holding negated value.
292 auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
293 Register XReg = X.getReg();
294 Register YReg = Y.getReg();
295 if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
296 replaceRegOpWith(MRI, X, XReg);
297 else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
298 replaceRegOpWith(MRI, Y, YReg);
299 else {
300 YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
301 replaceRegOpWith(MRI, Y, YReg);
302 }
303 };
304
305 Builder.setInstrAndDebugLoc(*MatchInfo);
306
307 // Negate appropriate operands so that resulting value of MatchInfo is
308 // negated.
309 switch (MatchInfo->getOpcode()) {
310 case AMDGPU::G_FADD:
311 case AMDGPU::G_FSUB:
312 NegateOperand(MatchInfo->getOperand(1));
313 NegateOperand(MatchInfo->getOperand(2));
314 break;
315 case AMDGPU::G_FMUL:
316 NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
317 break;
318 case AMDGPU::G_FMINNUM:
319 case AMDGPU::G_FMAXNUM:
320 case AMDGPU::G_FMINNUM_IEEE:
321 case AMDGPU::G_FMAXNUM_IEEE:
322 case AMDGPU::G_FMINIMUM:
323 case AMDGPU::G_FMAXIMUM:
324 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
325 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
326 NegateOperand(MatchInfo->getOperand(1));
327 NegateOperand(MatchInfo->getOperand(2));
328 unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
329 replaceOpcodeWith(*MatchInfo, Opposite);
330 break;
331 }
332 case AMDGPU::G_FMA:
333 case AMDGPU::G_FMAD:
334 NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
335 NegateOperand(MatchInfo->getOperand(3));
336 break;
337 case AMDGPU::G_FPEXT:
338 case AMDGPU::G_INTRINSIC_TRUNC:
339 case AMDGPU::G_FRINT:
340 case AMDGPU::G_FNEARBYINT:
341 case AMDGPU::G_INTRINSIC_ROUND:
342 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
343 case AMDGPU::G_FSIN:
344 case AMDGPU::G_FCANONICALIZE:
345 case AMDGPU::G_AMDGPU_RCP_IFLAG:
346 case AMDGPU::G_FPTRUNC:
347 NegateOperand(MatchInfo->getOperand(1));
348 break;
349 case AMDGPU::G_INTRINSIC:
350 case AMDGPU::G_INTRINSIC_CONVERGENT: {
351 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
352 switch (IntrinsicID) {
353 case Intrinsic::amdgcn_rcp:
354 case Intrinsic::amdgcn_rcp_legacy:
355 case Intrinsic::amdgcn_sin:
356 NegateOperand(MatchInfo->getOperand(2));
357 break;
358 case Intrinsic::amdgcn_fmul_legacy:
359 NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
360 break;
361 case Intrinsic::amdgcn_fmed3:
362 NegateOperand(MatchInfo->getOperand(2));
363 NegateOperand(MatchInfo->getOperand(3));
364 NegateOperand(MatchInfo->getOperand(4));
365 break;
366 case Intrinsic::amdgcn_fma_legacy:
367 NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
368 NegateOperand(MatchInfo->getOperand(4));
369 break;
370 default:
371 llvm_unreachable("folding fneg not supported for this intrinsic");
372 }
373 break;
374 }
375 default:
376 llvm_unreachable("folding fneg not supported for this instruction");
377 }
378
379 Register Dst = MI.getOperand(0).getReg();
380 Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
381
382 if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
383 // MatchInfo now has negated value so use that instead of old Dst.
384 replaceRegWith(MRI, Dst, MatchInfoDst);
385 } else {
386 // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
387 // but replaceRegWith will replace defs as well. It is easier to replace one
388 // def with a new register.
389 LLT Type = MRI.getType(Dst);
390 Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
391 replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
392
393 // MatchInfo now has negated value so use that instead of old Dst.
394 replaceRegWith(MRI, Dst, NegatedMatchInfo);
395
396 // Recreate non negated value for other uses of old MatchInfoDst
397 auto NextInst = ++MatchInfo->getIterator();
398 Builder.setInstrAndDebugLoc(*NextInst);
399 Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
400 }
401
402 MI.eraseFromParent();
403}
404
405// TODO: Should return converted value / extension source and avoid introducing
406// intermediate fptruncs in the apply function.
408 Register Reg) {
409 const MachineInstr *Def = MRI.getVRegDef(Reg);
410 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
411 Register SrcReg = Def->getOperand(1).getReg();
412 return MRI.getType(SrcReg) == LLT::scalar(16);
413 }
414
415 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
416 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
417 bool LosesInfo = true;
419 return !LosesInfo;
420 }
421
422 return false;
423}
424
426 Register Src0,
427 Register Src1,
428 Register Src2) {
429 assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);
430 Register SrcReg = MI.getOperand(1).getReg();
431 if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32))
432 return false;
433
434 return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) &&
436}
437
439 Register Src0,
440 Register Src1,
441 Register Src2) {
442 // We expect fptrunc (fpext x) to fold out, and to constant fold any constant
443 // sources.
444 Src0 = Builder.buildFPTrunc(LLT::scalar(16), Src0).getReg(0);
445 Src1 = Builder.buildFPTrunc(LLT::scalar(16), Src1).getReg(0);
446 Src2 = Builder.buildFPTrunc(LLT::scalar(16), Src2).getReg(0);
447
448 LLT Ty = MRI.getType(Src0);
449 auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1);
450 auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1);
451 auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2);
452 Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
453 MI.eraseFromParent();
454}
455
458 std::function<void(MachineIRBuilder &)> &MatchInfo) {
459 assert(MI.getOpcode() == TargetOpcode::G_FMUL);
460 assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
461 assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());
462
463 Register Dst = MI.getOperand(0).getReg();
464 LLT DestTy = MRI.getType(Dst);
465 LLT ScalarDestTy = DestTy.getScalarType();
466
467 if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&
468 ScalarDestTy != LLT::float16()) ||
470 return false;
471
472 Register SelectCondReg = Sel.getOperand(1).getReg();
473 MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());
474 MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());
475
476 const auto SelectTrueVal =
478 if (!SelectTrueVal)
479 return false;
480 const auto SelectFalseVal =
482 if (!SelectFalseVal)
483 return false;
484
485 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
486 return false;
487
488 // For f32, only non-inline constants should be transformed.
489 if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&
490 TII.isInlineConstant(*SelectFalseVal))
491 return false;
492
493 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
494 if (SelectTrueLog2Val == INT_MIN)
495 return false;
496 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
497 if (SelectFalseLog2Val == INT_MIN)
498 return false;
499
500 MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
501 LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
502 auto NewSel = Builder.buildSelect(
503 IntDestTy, SelectCondReg,
504 Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
505 Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
506
507 Register XReg = MI.getOperand(1).getReg();
508 if (SelectTrueVal->isNegative()) {
509 auto NegX =
510 Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
511 Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
512 } else {
513 Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());
514 }
515 };
516
517 return true;
518}
unsigned const MachineRegisterInfo * MRI
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition: Compiler.h:299
#define LLVM_READONLY
Definition: Compiler.h:306
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Contains matchers for matching SSA Machine Instructions.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getScalarSizeInBits(Type *Ty)
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo)
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5465
bool bitwiseIsEqual(const APFloat &RHS) const
Definition: APFloat.h:1405
Class for arbitrary precision integers.
Definition: APInt.h:78
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
MachineIRBuilder & Builder
This class represents an Operation in the Expression.
Abstract class that contains various methods for clients to notify about changes.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
Definition: LowLevelType.h:94
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:211
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
static constexpr LLT float16()
Get a 16-bit IEEE half value.
Definition: LowLevelType.h:84
constexpr LLT getScalarType() const
Definition: LowLevelType.h:205
static constexpr LLT float32()
Get a 32-bit IEEE float value.
Definition: LowLevelType.h:89
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Helper class to build MachineInstr.
MachineInstrBuilder buildFLdexp(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FLDEXP Src0, Src1.
MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:392
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isInlineConstant(const APInt &Imm) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
DWARFExpression::Operation Op
std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition: Utils.cpp:1536
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:265
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:266
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:263