LLVM 18.0.0git
AMDGPUPostLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
28#include "llvm/IR/IntrinsicsAMDGPU.h"
30
31#define GET_GICOMBINER_DEPS
32#include "AMDGPUGenPreLegalizeGICombiner.inc"
33#undef GET_GICOMBINER_DEPS
34
35#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36
37using namespace llvm;
38using namespace MIPatternMatch;
39
40namespace {
41#define GET_GICOMBINER_TYPES
42#include "AMDGPUGenPostLegalizeGICombiner.inc"
43#undef GET_GICOMBINER_TYPES
44
45class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46protected:
47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48 const GCNSubtarget &STI;
49 const SIInstrInfo &TII;
50 // TODO: Make CombinerHelper methods const.
51 mutable AMDGPUCombinerHelper Helper;
52
53public:
54 AMDGPUPostLegalizerCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62
63 bool tryCombineAllImpl(MachineInstr &I) const;
64 bool tryCombineAll(MachineInstr &I) const override;
65
66 struct FMinFMaxLegacyInfo {
69 Register True;
70 Register False;
72 };
73
74 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
75 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
76 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
77 const FMinFMaxLegacyInfo &Info) const;
78
79 bool matchUCharToFloat(MachineInstr &MI) const;
80 void applyUCharToFloat(MachineInstr &MI) const;
81
82 bool
83 matchRcpSqrtToRsq(MachineInstr &MI,
84 std::function<void(MachineIRBuilder &)> &MatchInfo) const;
85
86 // FIXME: Should be able to have 2 separate matchdatas rather than custom
87 // struct boilerplate.
88 struct CvtF32UByteMatchInfo {
89 Register CvtVal;
90 unsigned ShiftOffset;
91 };
92
93 bool matchCvtF32UByteN(MachineInstr &MI,
94 CvtF32UByteMatchInfo &MatchInfo) const;
95 void applyCvtF32UByteN(MachineInstr &MI,
96 const CvtF32UByteMatchInfo &MatchInfo) const;
97
98 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
99
100 // Combine unsigned buffer load and signed extension instructions to generate
101 // signed buffer laod instructions.
102 bool matchCombineSignExtendInReg(MachineInstr &MI,
103 MachineInstr *&MatchInfo) const;
104 void applyCombineSignExtendInReg(MachineInstr &MI,
105 MachineInstr *&MatchInfo) const;
106
107private:
108#define GET_GICOMBINER_CLASS_MEMBERS
109#define AMDGPUSubtarget GCNSubtarget
110#include "AMDGPUGenPostLegalizeGICombiner.inc"
111#undef GET_GICOMBINER_CLASS_MEMBERS
112#undef AMDGPUSubtarget
113};
114
115#define GET_GICOMBINER_IMPL
116#define AMDGPUSubtarget GCNSubtarget
117#include "AMDGPUGenPostLegalizeGICombiner.inc"
118#undef AMDGPUSubtarget
119#undef GET_GICOMBINER_IMPL
120
121AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
122 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
123 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
124 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
125 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
126 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
127 TII(*STI.getInstrInfo()),
128 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
130#include "AMDGPUGenPostLegalizeGICombiner.inc"
132{
133}
134
135bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
136 if (tryCombineAllImpl(MI))
137 return true;
138
139 switch (MI.getOpcode()) {
140 case TargetOpcode::G_SHL:
141 case TargetOpcode::G_LSHR:
142 case TargetOpcode::G_ASHR:
143 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
144 // common case, splitting this into a move and a 32-bit shift is faster and
145 // the same code size.
146 return Helper.tryCombineShiftToUnmerge(MI, 32);
147 }
148
149 return false;
150}
151
152bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
153 MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
154 // FIXME: Type predicate on pattern
155 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
156 return false;
157
158 Register Cond = MI.getOperand(1).getReg();
159 if (!MRI.hasOneNonDBGUse(Cond) ||
160 !mi_match(Cond, MRI,
161 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
162 return false;
163
164 Info.True = MI.getOperand(2).getReg();
165 Info.False = MI.getOperand(3).getReg();
166
167 // TODO: Handle case where the the selected value is an fneg and the compared
168 // constant is the negation of the selected value.
169 if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
170 !(Info.LHS == Info.False && Info.RHS == Info.True))
171 return false;
172
173 switch (Info.Pred) {
182 return false;
183 default:
184 return true;
185 }
186}
187
188void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
189 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
190 B.setInstrAndDebugLoc(MI);
191 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
192 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
193 };
194
195 switch (Info.Pred) {
198 if (Info.LHS == Info.True)
199 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
200 else
201 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
202 break;
204 case CmpInst::FCMP_OLT: {
205 // We need to permute the operands to get the correct NaN behavior. The
206 // selected operand is the second one based on the failing compare with NaN,
207 // so permute it based on the compare type the hardware uses.
208 if (Info.LHS == Info.True)
209 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
210 else
211 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
212 break;
213 }
215 case CmpInst::FCMP_UGT: {
216 if (Info.LHS == Info.True)
217 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
218 else
219 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
220 break;
221 }
223 case CmpInst::FCMP_OGE: {
224 if (Info.LHS == Info.True)
225 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
226 else
227 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
228 break;
229 }
230 default:
231 llvm_unreachable("predicate should not have matched");
232 }
233
234 MI.eraseFromParent();
235}
236
237bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
238 MachineInstr &MI) const {
239 Register DstReg = MI.getOperand(0).getReg();
240
241 // TODO: We could try to match extracting the higher bytes, which would be
242 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
243 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
244 // about in practice.
245 LLT Ty = MRI.getType(DstReg);
246 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
247 Register SrcReg = MI.getOperand(1).getReg();
248 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
249 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
250 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
251 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
252 }
253
254 return false;
255}
256
257void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
258 MachineInstr &MI) const {
259 B.setInstrAndDebugLoc(MI);
260
261 const LLT S32 = LLT::scalar(32);
262
263 Register DstReg = MI.getOperand(0).getReg();
264 Register SrcReg = MI.getOperand(1).getReg();
265 LLT Ty = MRI.getType(DstReg);
266 LLT SrcTy = MRI.getType(SrcReg);
267 if (SrcTy != S32)
268 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
269
270 if (Ty == S32) {
271 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
272 MI.getFlags());
273 } else {
274 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
275 MI.getFlags());
276 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
277 }
278
279 MI.eraseFromParent();
280}
281
282bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
284 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
285 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
286 if (!MI.getFlag(MachineInstr::FmContract))
287 return nullptr;
288
289 if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
290 if (GI->is(Intrinsic::amdgcn_rcp))
291 return MRI.getVRegDef(MI.getOperand(2).getReg());
292 }
293 return nullptr;
294 };
295
296 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
297 if (!MI.getFlag(MachineInstr::FmContract))
298 return nullptr;
299 MachineInstr *SqrtSrcMI = nullptr;
300 auto Match =
301 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
302 (void)Match;
303 return SqrtSrcMI;
304 };
305
306 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
307 // rcp(sqrt(x))
308 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
309 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
310 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
311 .addUse(SqrtSrcMI->getOperand(0).getReg())
312 .setMIFlags(MI.getFlags());
313 };
314 return true;
315 }
316
317 // sqrt(rcp(x))
318 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
319 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
320 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
321 .addUse(RcpSrcMI->getOperand(0).getReg())
322 .setMIFlags(MI.getFlags());
323 };
324 return true;
325 }
326 return false;
327}
328
329bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
330 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
331 Register SrcReg = MI.getOperand(1).getReg();
332
333 // Look through G_ZEXT.
334 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
335
336 Register Src0;
337 int64_t ShiftAmt;
338 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
339 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
340 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
341
342 unsigned ShiftOffset = 8 * Offset;
343 if (IsShr)
344 ShiftOffset += ShiftAmt;
345 else
346 ShiftOffset -= ShiftAmt;
347
348 MatchInfo.CvtVal = Src0;
349 MatchInfo.ShiftOffset = ShiftOffset;
350 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
351 }
352
353 // TODO: Simplify demanded bits.
354 return false;
355}
356
357void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
358 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
359 B.setInstrAndDebugLoc(MI);
360 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
361
362 const LLT S32 = LLT::scalar(32);
363 Register CvtSrc = MatchInfo.CvtVal;
364 LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
365 if (SrcTy != S32) {
366 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
367 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
368 }
369
370 assert(MI.getOpcode() != NewOpc);
371 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
372 MI.eraseFromParent();
373}
374
375bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
376 MachineInstr &MI, Register &Reg) const {
377 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
378 MF.getSubtarget().getTargetLowering());
379 Reg = MI.getOperand(1).getReg();
380 return TLI->isCanonicalized(Reg, MF);
381}
382
383// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
384// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
385// with sign extension instrucions in order to generate buffer_load_{i8, i16}
386// instructions.
387
388// Identify buffer_load_{u8, u16}.
389bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
390 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
391 Register Op0Reg = MI.getOperand(1).getReg();
392 SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
393
394 if (!MRI.hasOneNonDBGUse(Op0Reg))
395 return false;
396
397 // Check if the first operand of the sign extension is a subword buffer load
398 // instruction.
399 return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
400 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
401}
402
403// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
404// buffer_load_{i8, i16}.
405void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
406 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
407 // Modify the opcode and the destination of buffer_load_{u8, u16}:
408 // Replace the opcode.
409 unsigned Opc =
410 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
411 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
412 : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
413 SubwordBufferLoad->setDesc(TII.get(Opc));
414 // Update the destination register of SubwordBufferLoad with the destination
415 // register of the sign extension.
416 Register SignExtendInsnDst = MI.getOperand(0).getReg();
417 SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
418 // Remove the sign extension.
419 MI.eraseFromParent();
420}
421
422// Pass boilerplate
423// ================
424
425class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
426public:
427 static char ID;
428
429 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
430
431 StringRef getPassName() const override {
432 return "AMDGPUPostLegalizerCombiner";
433 }
434
435 bool runOnMachineFunction(MachineFunction &MF) override;
436
437 void getAnalysisUsage(AnalysisUsage &AU) const override;
438
439private:
440 bool IsOptNone;
441 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
442};
443} // end anonymous namespace
444
445void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
447 AU.setPreservesCFG();
451 if (!IsOptNone) {
454 }
456}
457
458AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
459 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
461
462 if (!RuleConfig.parseCommandLineOption())
463 report_fatal_error("Invalid rule identifier");
464}
465
466bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
467 if (MF.getProperties().hasProperty(
468 MachineFunctionProperties::Property::FailedISel))
469 return false;
470 auto *TPC = &getAnalysis<TargetPassConfig>();
471 const Function &F = MF.getFunction();
472 bool EnableOpt =
473 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
474
476 const AMDGPULegalizerInfo *LI =
477 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
478
479 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
481 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
482
483 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
484 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
485
486 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
487 RuleConfig, ST, MDT, LI);
488 return Impl.combineMachineInstrs();
489}
490
491char AMDGPUPostLegalizerCombiner::ID = 0;
492INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
493 "Combine AMDGPU machine instrs after legalization", false,
494 false)
497INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
498 "Combine AMDGPU machine instrs after legalization", false,
499 false)
500
501namespace llvm {
503 return new AMDGPUPostLegalizerCombiner(IsOptNone);
504}
505} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs after legalization
#define DEBUG_TYPE
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
This class provides the information for the target register banks.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:714
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:728
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:717
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:726
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:715
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:716
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:725
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:719
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:722
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:723
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:718
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:720
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:727
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:724
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:713
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:721
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
The CSE Analysis object.
Definition: CSEInfo.h:69
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
constexpr bool isScalar() const
Definition: LowLevelType.h:139
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:175
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
operand_type_match m_Reg()
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:922
auto instrs(const MachineBasicBlock &BB)