LLVM 20.0.0git
AMDGPUPostLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
28#include "llvm/IR/IntrinsicsAMDGPU.h"
30
31#define GET_GICOMBINER_DEPS
32#include "AMDGPUGenPreLegalizeGICombiner.inc"
33#undef GET_GICOMBINER_DEPS
34
35#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36
37using namespace llvm;
38using namespace MIPatternMatch;
39
40namespace {
41#define GET_GICOMBINER_TYPES
42#include "AMDGPUGenPostLegalizeGICombiner.inc"
43#undef GET_GICOMBINER_TYPES
44
45class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46protected:
47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48 const GCNSubtarget &STI;
49 const SIInstrInfo &TII;
50 // TODO: Make CombinerHelper methods const.
51 mutable AMDGPUCombinerHelper Helper;
52
53public:
54 AMDGPUPostLegalizerCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62
63 bool tryCombineAllImpl(MachineInstr &I) const;
64 bool tryCombineAll(MachineInstr &I) const override;
65
66 struct FMinFMaxLegacyInfo {
70 };
71
72 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
73 bool matchFMinFMaxLegacy(MachineInstr &MI, MachineInstr &FCmp,
74 FMinFMaxLegacyInfo &Info) const;
75 void applySelectFCmpToFMinFMaxLegacy(MachineInstr &MI,
76 const FMinFMaxLegacyInfo &Info) const;
77
78 bool matchUCharToFloat(MachineInstr &MI) const;
79 void applyUCharToFloat(MachineInstr &MI) const;
80
81 bool
82 matchRcpSqrtToRsq(MachineInstr &MI,
83 std::function<void(MachineIRBuilder &)> &MatchInfo) const;
84
85 bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
86 void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
87
88 // FIXME: Should be able to have 2 separate matchdatas rather than custom
89 // struct boilerplate.
90 struct CvtF32UByteMatchInfo {
91 Register CvtVal;
92 unsigned ShiftOffset;
93 };
94
95 bool matchCvtF32UByteN(MachineInstr &MI,
96 CvtF32UByteMatchInfo &MatchInfo) const;
97 void applyCvtF32UByteN(MachineInstr &MI,
98 const CvtF32UByteMatchInfo &MatchInfo) const;
99
100 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
101
102 // Combine unsigned buffer load and signed extension instructions to generate
103 // signed buffer load instructions.
104 bool matchCombineSignExtendInReg(
105 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
106 void applyCombineSignExtendInReg(
107 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
108
109 // Find the s_mul_u64 instructions where the higher bits are either
110 // zero-extended or sign-extended.
111 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
112 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
113 // bits are zero extended.
114 bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
115
116private:
117#define GET_GICOMBINER_CLASS_MEMBERS
118#define AMDGPUSubtarget GCNSubtarget
119#include "AMDGPUGenPostLegalizeGICombiner.inc"
120#undef GET_GICOMBINER_CLASS_MEMBERS
121#undef AMDGPUSubtarget
122};
123
124#define GET_GICOMBINER_IMPL
125#define AMDGPUSubtarget GCNSubtarget
126#include "AMDGPUGenPostLegalizeGICombiner.inc"
127#undef AMDGPUSubtarget
128#undef GET_GICOMBINER_IMPL
129
130AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
131 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
132 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
133 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
134 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
135 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
136 TII(*STI.getInstrInfo()),
137 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
139#include "AMDGPUGenPostLegalizeGICombiner.inc"
141{
142}
143
144bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
145 if (tryCombineAllImpl(MI))
146 return true;
147
148 switch (MI.getOpcode()) {
149 case TargetOpcode::G_SHL:
150 case TargetOpcode::G_LSHR:
151 case TargetOpcode::G_ASHR:
152 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
153 // common case, splitting this into a move and a 32-bit shift is faster and
154 // the same code size.
155 return Helper.tryCombineShiftToUnmerge(MI, 32);
156 }
157
158 return false;
159}
160
161bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
162 MachineInstr &MI, MachineInstr &FCmp, FMinFMaxLegacyInfo &Info) const {
163 if (!MRI.hasOneNonDBGUse(FCmp.getOperand(0).getReg()))
164 return false;
165
166 Info.Pred =
167 static_cast<CmpInst::Predicate>(FCmp.getOperand(1).getPredicate());
168 Info.LHS = FCmp.getOperand(2).getReg();
169 Info.RHS = FCmp.getOperand(3).getReg();
170 Register True = MI.getOperand(2).getReg();
171 Register False = MI.getOperand(3).getReg();
172
173 // TODO: Handle case where the the selected value is an fneg and the compared
174 // constant is the negation of the selected value.
175 if ((Info.LHS != True || Info.RHS != False) &&
176 (Info.LHS != False || Info.RHS != True))
177 return false;
178
179 // Invert the predicate if necessary so that the apply function can assume
180 // that the select operands are the same as the fcmp operands.
181 // (select (fcmp P, L, R), R, L) -> (select (fcmp !P, L, R), L, R)
182 if (Info.LHS != True)
184
185 // Only match </<=/>=/> not ==/!= etc.
186 return Info.Pred != CmpInst::getSwappedPredicate(Info.Pred);
187}
188
189void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinFMaxLegacy(
190 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
191 unsigned Opc = (Info.Pred & CmpInst::FCMP_OGT) ? AMDGPU::G_AMDGPU_FMAX_LEGACY
192 : AMDGPU::G_AMDGPU_FMIN_LEGACY;
193 Register X = Info.LHS;
194 Register Y = Info.RHS;
195 if (Info.Pred == CmpInst::getUnorderedPredicate(Info.Pred)) {
196 // We need to permute the operands to get the correct NaN behavior. The
197 // selected operand is the second one based on the failing compare with NaN,
198 // so permute it based on the compare type the hardware uses.
199 std::swap(X, Y);
200 }
201
202 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
203
204 MI.eraseFromParent();
205}
206
207bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
208 MachineInstr &MI) const {
209 Register DstReg = MI.getOperand(0).getReg();
210
211 // TODO: We could try to match extracting the higher bytes, which would be
212 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
213 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
214 // about in practice.
215 LLT Ty = MRI.getType(DstReg);
216 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
217 Register SrcReg = MI.getOperand(1).getReg();
218 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
219 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
220 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
221 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
222 }
223
224 return false;
225}
226
227void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
228 MachineInstr &MI) const {
229 const LLT S32 = LLT::scalar(32);
230
231 Register DstReg = MI.getOperand(0).getReg();
232 Register SrcReg = MI.getOperand(1).getReg();
233 LLT Ty = MRI.getType(DstReg);
234 LLT SrcTy = MRI.getType(SrcReg);
235 if (SrcTy != S32)
236 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
237
238 if (Ty == S32) {
239 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
240 MI.getFlags());
241 } else {
242 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
243 MI.getFlags());
244 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
245 }
246
247 MI.eraseFromParent();
248}
249
250bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
252 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
253 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
254 if (!MI.getFlag(MachineInstr::FmContract))
255 return nullptr;
256
257 if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
258 if (GI->is(Intrinsic::amdgcn_rcp))
259 return MRI.getVRegDef(MI.getOperand(2).getReg());
260 }
261 return nullptr;
262 };
263
264 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
265 if (!MI.getFlag(MachineInstr::FmContract))
266 return nullptr;
267 MachineInstr *SqrtSrcMI = nullptr;
268 auto Match =
269 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
270 (void)Match;
271 return SqrtSrcMI;
272 };
273
274 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
275 // rcp(sqrt(x))
276 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
277 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
278 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
279 .addUse(SqrtSrcMI->getOperand(0).getReg())
280 .setMIFlags(MI.getFlags());
281 };
282 return true;
283 }
284
285 // sqrt(rcp(x))
286 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
287 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
288 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
289 .addUse(RcpSrcMI->getOperand(0).getReg())
290 .setMIFlags(MI.getFlags());
291 };
292 return true;
293 }
294 return false;
295}
296
297bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
298 MachineInstr &MI) const {
299 Register Sqrt = MI.getOperand(2).getReg();
300 return MRI.hasOneNonDBGUse(Sqrt);
301}
302
303void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
304 MachineInstr &MI, const Register &X) const {
305 Register Dst = MI.getOperand(0).getReg();
306 Register Y = MI.getOperand(1).getReg();
307 LLT DstTy = MRI.getType(Dst);
308 uint32_t Flags = MI.getFlags();
309 Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})
310 .addUse(X)
311 .setMIFlags(Flags)
312 .getReg(0);
313 B.buildFMul(Dst, RSQ, Y, Flags);
314 MI.eraseFromParent();
315}
316
317bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
318 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
319 Register SrcReg = MI.getOperand(1).getReg();
320
321 // Look through G_ZEXT.
322 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
323
324 Register Src0;
325 int64_t ShiftAmt;
326 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
327 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
328 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
329
330 unsigned ShiftOffset = 8 * Offset;
331 if (IsShr)
332 ShiftOffset += ShiftAmt;
333 else
334 ShiftOffset -= ShiftAmt;
335
336 MatchInfo.CvtVal = Src0;
337 MatchInfo.ShiftOffset = ShiftOffset;
338 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
339 }
340
341 // TODO: Simplify demanded bits.
342 return false;
343}
344
345void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
346 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
347 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
348
349 const LLT S32 = LLT::scalar(32);
350 Register CvtSrc = MatchInfo.CvtVal;
351 LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
352 if (SrcTy != S32) {
353 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
354 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
355 }
356
357 assert(MI.getOpcode() != NewOpc);
358 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
359 MI.eraseFromParent();
360}
361
362bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
363 MachineInstr &MI, Register &Reg) const {
364 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
365 MF.getSubtarget().getTargetLowering());
366 Reg = MI.getOperand(1).getReg();
367 return TLI->isCanonicalized(Reg, MF);
368}
369
370// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
371// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
372// with sign extension instrucions in order to generate buffer_load_{i8, i16}
373// instructions.
374
375// Identify buffer_load_{u8, u16}.
376bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
377 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
378 Register LoadReg = MI.getOperand(1).getReg();
379 if (!MRI.hasOneNonDBGUse(LoadReg))
380 return false;
381
382 // Check if the first operand of the sign extension is a subword buffer load
383 // instruction.
384 MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);
385 int64_t Width = MI.getOperand(2).getImm();
386 switch (LoadMI->getOpcode()) {
387 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
388 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
389 return Width == 8;
390 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
391 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
392 return Width == 16;
393 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
394 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};
395 return Width == 8;
396 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
397 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};
398 return Width == 16;
399 }
400 return false;
401}
402
403// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
404// buffer_load_{i8, i16}.
405void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
406 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
407 auto [LoadMI, NewOpcode] = MatchData;
408 LoadMI->setDesc(TII.get(NewOpcode));
409 // Update the destination register of the load with the destination register
410 // of the sign extension.
411 Register SignExtendInsnDst = MI.getOperand(0).getReg();
412 LoadMI->getOperand(0).setReg(SignExtendInsnDst);
413 // Remove the sign extension.
414 MI.eraseFromParent();
415}
416
417bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
418 MachineInstr &MI, unsigned &NewOpcode) const {
419 Register Src0 = MI.getOperand(1).getReg();
420 Register Src1 = MI.getOperand(2).getReg();
421 if (MRI.getType(Src0) != LLT::scalar(64))
422 return false;
423
424 if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&
425 KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) {
426 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
427 return true;
428 }
429
430 if (KB->computeNumSignBits(Src1) >= 33 &&
431 KB->computeNumSignBits(Src0) >= 33) {
432 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
433 return true;
434 }
435 return false;
436}
437
438// Pass boilerplate
439// ================
440
441class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
442public:
443 static char ID;
444
445 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
446
447 StringRef getPassName() const override {
448 return "AMDGPUPostLegalizerCombiner";
449 }
450
451 bool runOnMachineFunction(MachineFunction &MF) override;
452
453 void getAnalysisUsage(AnalysisUsage &AU) const override;
454
455private:
456 bool IsOptNone;
457 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
458};
459} // end anonymous namespace
460
461void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
463 AU.setPreservesCFG();
467 if (!IsOptNone) {
470 }
472}
473
474AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
475 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
477
478 if (!RuleConfig.parseCommandLineOption())
479 report_fatal_error("Invalid rule identifier");
480}
481
482bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
483 if (MF.getProperties().hasProperty(
484 MachineFunctionProperties::Property::FailedISel))
485 return false;
486 auto *TPC = &getAnalysis<TargetPassConfig>();
487 const Function &F = MF.getFunction();
488 bool EnableOpt =
489 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
490
492 const AMDGPULegalizerInfo *LI =
493 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
494
495 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
497 IsOptNone ? nullptr
498 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
499
500 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
501 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
502 // Disable fixed-point iteration to reduce compile-time
503 CInfo.MaxIterations = 1;
504 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
505 // Legalizer performs DCE, so a full DCE pass is unnecessary.
506 CInfo.EnableFullDCE = false;
507 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
508 RuleConfig, ST, MDT, LI);
509 return Impl.combineMachineInstrs();
510}
511
512char AMDGPUPostLegalizerCombiner::ID = 0;
513INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
514 "Combine AMDGPU machine instrs after legalization", false,
515 false)
518INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
519 "Combine AMDGPU machine instrs after legalization", false,
520 false)
521
522namespace llvm {
524 return new AMDGPUPostLegalizerCombiner(IsOptNone);
525}
526} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
This contains common combine transformations that may be used in a combine pass.
static const LLT S32
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs after legalization
#define DEBUG_TYPE
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:825
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
Predicate getUnorderedPredicate() const
Definition: InstrTypes.h:809
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
The CSE Analysis object.
Definition: CSEInfo.h:70
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
unsigned getPredicate() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
operand_type_match m_Reg()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1153
auto instrs(const MachineBasicBlock &BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860