LLVM 19.0.0git
AMDGPUPostLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
28#include "llvm/IR/IntrinsicsAMDGPU.h"
30
31#define GET_GICOMBINER_DEPS
32#include "AMDGPUGenPreLegalizeGICombiner.inc"
33#undef GET_GICOMBINER_DEPS
34
35#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36
37using namespace llvm;
38using namespace MIPatternMatch;
39
40namespace {
41#define GET_GICOMBINER_TYPES
42#include "AMDGPUGenPostLegalizeGICombiner.inc"
43#undef GET_GICOMBINER_TYPES
44
45class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46protected:
47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48 const GCNSubtarget &STI;
49 const SIInstrInfo &TII;
50 // TODO: Make CombinerHelper methods const.
51 mutable AMDGPUCombinerHelper Helper;
52
53public:
54 AMDGPUPostLegalizerCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62
63 bool tryCombineAllImpl(MachineInstr &I) const;
64 bool tryCombineAll(MachineInstr &I) const override;
65
66 struct FMinFMaxLegacyInfo {
69 Register True;
70 Register False;
72 };
73
74 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
75 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
76 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
77 const FMinFMaxLegacyInfo &Info) const;
78
79 bool matchUCharToFloat(MachineInstr &MI) const;
80 void applyUCharToFloat(MachineInstr &MI) const;
81
82 bool
83 matchRcpSqrtToRsq(MachineInstr &MI,
84 std::function<void(MachineIRBuilder &)> &MatchInfo) const;
85
86 bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
87 void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
88
89 // FIXME: Should be able to have 2 separate matchdatas rather than custom
90 // struct boilerplate.
91 struct CvtF32UByteMatchInfo {
92 Register CvtVal;
93 unsigned ShiftOffset;
94 };
95
96 bool matchCvtF32UByteN(MachineInstr &MI,
97 CvtF32UByteMatchInfo &MatchInfo) const;
98 void applyCvtF32UByteN(MachineInstr &MI,
99 const CvtF32UByteMatchInfo &MatchInfo) const;
100
101 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
102
103 // Combine unsigned buffer load and signed extension instructions to generate
104 // signed buffer laod instructions.
105 bool matchCombineSignExtendInReg(
106 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
107 void applyCombineSignExtendInReg(
108 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
109
110 // Find the s_mul_u64 instructions where the higher bits are either
111 // zero-extended or sign-extended.
112 bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
113 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
114 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
115 // bits are zero extended.
116 void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
117
118private:
119#define GET_GICOMBINER_CLASS_MEMBERS
120#define AMDGPUSubtarget GCNSubtarget
121#include "AMDGPUGenPostLegalizeGICombiner.inc"
122#undef GET_GICOMBINER_CLASS_MEMBERS
123#undef AMDGPUSubtarget
124};
125
126#define GET_GICOMBINER_IMPL
127#define AMDGPUSubtarget GCNSubtarget
128#include "AMDGPUGenPostLegalizeGICombiner.inc"
129#undef AMDGPUSubtarget
130#undef GET_GICOMBINER_IMPL
131
132AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
133 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
134 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
135 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
136 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
137 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
138 TII(*STI.getInstrInfo()),
139 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
141#include "AMDGPUGenPostLegalizeGICombiner.inc"
143{
144}
145
146bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
147 if (tryCombineAllImpl(MI))
148 return true;
149
150 switch (MI.getOpcode()) {
151 case TargetOpcode::G_SHL:
152 case TargetOpcode::G_LSHR:
153 case TargetOpcode::G_ASHR:
154 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
155 // common case, splitting this into a move and a 32-bit shift is faster and
156 // the same code size.
157 return Helper.tryCombineShiftToUnmerge(MI, 32);
158 }
159
160 return false;
161}
162
163bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
164 MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
165 // FIXME: Type predicate on pattern
166 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
167 return false;
168
169 Register Cond = MI.getOperand(1).getReg();
170 if (!MRI.hasOneNonDBGUse(Cond) ||
171 !mi_match(Cond, MRI,
172 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
173 return false;
174
175 Info.True = MI.getOperand(2).getReg();
176 Info.False = MI.getOperand(3).getReg();
177
178 // TODO: Handle case where the the selected value is an fneg and the compared
179 // constant is the negation of the selected value.
180 if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
181 !(Info.LHS == Info.False && Info.RHS == Info.True))
182 return false;
183
184 switch (Info.Pred) {
193 return false;
194 default:
195 return true;
196 }
197}
198
199void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
200 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
201 B.setInstrAndDebugLoc(MI);
202 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
203 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
204 };
205
206 switch (Info.Pred) {
209 if (Info.LHS == Info.True)
210 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
211 else
212 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
213 break;
215 case CmpInst::FCMP_OLT: {
216 // We need to permute the operands to get the correct NaN behavior. The
217 // selected operand is the second one based on the failing compare with NaN,
218 // so permute it based on the compare type the hardware uses.
219 if (Info.LHS == Info.True)
220 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
221 else
222 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
223 break;
224 }
226 case CmpInst::FCMP_UGT: {
227 if (Info.LHS == Info.True)
228 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
229 else
230 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
231 break;
232 }
234 case CmpInst::FCMP_OGE: {
235 if (Info.LHS == Info.True)
236 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
237 else
238 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
239 break;
240 }
241 default:
242 llvm_unreachable("predicate should not have matched");
243 }
244
245 MI.eraseFromParent();
246}
247
248bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
249 MachineInstr &MI) const {
250 Register DstReg = MI.getOperand(0).getReg();
251
252 // TODO: We could try to match extracting the higher bytes, which would be
253 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
254 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
255 // about in practice.
256 LLT Ty = MRI.getType(DstReg);
257 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
258 Register SrcReg = MI.getOperand(1).getReg();
259 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
260 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
261 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
262 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
263 }
264
265 return false;
266}
267
268void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
269 MachineInstr &MI) const {
270 B.setInstrAndDebugLoc(MI);
271
272 const LLT S32 = LLT::scalar(32);
273
274 Register DstReg = MI.getOperand(0).getReg();
275 Register SrcReg = MI.getOperand(1).getReg();
276 LLT Ty = MRI.getType(DstReg);
277 LLT SrcTy = MRI.getType(SrcReg);
278 if (SrcTy != S32)
279 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
280
281 if (Ty == S32) {
282 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
283 MI.getFlags());
284 } else {
285 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
286 MI.getFlags());
287 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
288 }
289
290 MI.eraseFromParent();
291}
292
293bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
295 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
296 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
297 if (!MI.getFlag(MachineInstr::FmContract))
298 return nullptr;
299
300 if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
301 if (GI->is(Intrinsic::amdgcn_rcp))
302 return MRI.getVRegDef(MI.getOperand(2).getReg());
303 }
304 return nullptr;
305 };
306
307 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
308 if (!MI.getFlag(MachineInstr::FmContract))
309 return nullptr;
310 MachineInstr *SqrtSrcMI = nullptr;
311 auto Match =
312 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
313 (void)Match;
314 return SqrtSrcMI;
315 };
316
317 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
318 // rcp(sqrt(x))
319 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
320 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
321 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
322 .addUse(SqrtSrcMI->getOperand(0).getReg())
323 .setMIFlags(MI.getFlags());
324 };
325 return true;
326 }
327
328 // sqrt(rcp(x))
329 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
330 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
331 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
332 .addUse(RcpSrcMI->getOperand(0).getReg())
333 .setMIFlags(MI.getFlags());
334 };
335 return true;
336 }
337 return false;
338}
339
340bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
341 MachineInstr &MI) const {
342 Register Sqrt = MI.getOperand(2).getReg();
343 return MRI.hasOneNonDBGUse(Sqrt);
344}
345
346void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
347 MachineInstr &MI, const Register &X) const {
348 Register Dst = MI.getOperand(0).getReg();
349 Register Y = MI.getOperand(1).getReg();
350 LLT DstTy = MRI.getType(Dst);
351 uint32_t Flags = MI.getFlags();
352 Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})
353 .addUse(X)
354 .setMIFlags(Flags)
355 .getReg(0);
356 B.buildFMul(Dst, RSQ, Y, Flags);
357 MI.eraseFromParent();
358}
359
360bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
361 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
362 Register SrcReg = MI.getOperand(1).getReg();
363
364 // Look through G_ZEXT.
365 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
366
367 Register Src0;
368 int64_t ShiftAmt;
369 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
370 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
371 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
372
373 unsigned ShiftOffset = 8 * Offset;
374 if (IsShr)
375 ShiftOffset += ShiftAmt;
376 else
377 ShiftOffset -= ShiftAmt;
378
379 MatchInfo.CvtVal = Src0;
380 MatchInfo.ShiftOffset = ShiftOffset;
381 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
382 }
383
384 // TODO: Simplify demanded bits.
385 return false;
386}
387
388void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
389 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
390 B.setInstrAndDebugLoc(MI);
391 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
392
393 const LLT S32 = LLT::scalar(32);
394 Register CvtSrc = MatchInfo.CvtVal;
395 LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
396 if (SrcTy != S32) {
397 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
398 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
399 }
400
401 assert(MI.getOpcode() != NewOpc);
402 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
403 MI.eraseFromParent();
404}
405
406bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
407 MachineInstr &MI, Register &Reg) const {
408 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
409 MF.getSubtarget().getTargetLowering());
410 Reg = MI.getOperand(1).getReg();
411 return TLI->isCanonicalized(Reg, MF);
412}
413
414// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
415// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
416// with sign extension instrucions in order to generate buffer_load_{i8, i16}
417// instructions.
418
419// Identify buffer_load_{u8, u16}.
420bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
421 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
422 Register LoadReg = MI.getOperand(1).getReg();
423 if (!MRI.hasOneNonDBGUse(LoadReg))
424 return false;
425
426 // Check if the first operand of the sign extension is a subword buffer load
427 // instruction.
428 MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);
429 int64_t Width = MI.getOperand(2).getImm();
430 switch (LoadMI->getOpcode()) {
431 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
432 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
433 return Width == 8;
434 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
435 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
436 return Width == 16;
437 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
438 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};
439 return Width == 8;
440 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
441 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};
442 return Width == 16;
443 }
444 return false;
445}
446
447// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
448// buffer_load_{i8, i16}.
449void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
450 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
451 auto [LoadMI, NewOpcode] = MatchData;
452 LoadMI->setDesc(TII.get(NewOpcode));
453 // Update the destination register of the load with the destination register
454 // of the sign extension.
455 Register SignExtendInsnDst = MI.getOperand(0).getReg();
456 LoadMI->getOperand(0).setReg(SignExtendInsnDst);
457 // Remove the sign extension.
458 MI.eraseFromParent();
459}
460
461bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
462 MachineInstr &MI, unsigned &NewOpcode) const {
463 Register Src0 = MI.getOperand(1).getReg();
464 Register Src1 = MI.getOperand(2).getReg();
465 if (MRI.getType(Src0) != LLT::scalar(64))
466 return false;
467
468 if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&
469 KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) {
470 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
471 return true;
472 }
473
474 if (KB->computeNumSignBits(Src1) >= 33 &&
475 KB->computeNumSignBits(Src0) >= 33) {
476 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
477 return true;
478 }
479 return false;
480}
481
482void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64(
483 MachineInstr &MI, unsigned &NewOpcode) const {
484 Helper.replaceOpcodeWith(MI, NewOpcode);
485}
486
487// Pass boilerplate
488// ================
489
490class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
491public:
492 static char ID;
493
494 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
495
496 StringRef getPassName() const override {
497 return "AMDGPUPostLegalizerCombiner";
498 }
499
500 bool runOnMachineFunction(MachineFunction &MF) override;
501
502 void getAnalysisUsage(AnalysisUsage &AU) const override;
503
504private:
505 bool IsOptNone;
506 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
507};
508} // end anonymous namespace
509
510void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
512 AU.setPreservesCFG();
516 if (!IsOptNone) {
519 }
521}
522
523AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
524 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
526
527 if (!RuleConfig.parseCommandLineOption())
528 report_fatal_error("Invalid rule identifier");
529}
530
531bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
532 if (MF.getProperties().hasProperty(
533 MachineFunctionProperties::Property::FailedISel))
534 return false;
535 auto *TPC = &getAnalysis<TargetPassConfig>();
536 const Function &F = MF.getFunction();
537 bool EnableOpt =
538 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
539
541 const AMDGPULegalizerInfo *LI =
542 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
543
544 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
546 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
547
548 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
549 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
550
551 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
552 RuleConfig, ST, MDT, LI);
553 return Impl.combineMachineInstrs();
554}
555
556char AMDGPUPostLegalizerCombiner::ID = 0;
557INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
558 "Combine AMDGPU machine instrs after legalization", false,
559 false)
562INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
563 "Combine AMDGPU machine instrs after legalization", false,
564 false)
565
566namespace llvm {
568 return new AMDGPUPostLegalizerCombiner(IsOptNone);
569}
570} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
This contains common combine transformations that may be used in a combine pass.
static const LLT S32
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs after legalization
#define DEBUG_TYPE
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:966
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:969
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:983
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:972
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:981
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:970
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:971
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:980
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:974
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:977
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:978
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:973
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:975
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:982
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:979
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:968
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:976
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
The CSE Analysis object.
Definition: CSEInfo.h:69
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
operand_type_match m_Reg()
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1140
auto instrs(const MachineBasicBlock &BB)