File: | build/source/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp |
Warning: | line 265, column 8 Value stored to 'IsShr' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass does combining of machine instructions at the generic MI level, |
10 | // after the legalizer. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AMDGPU.h" |
15 | #include "AMDGPUCombinerHelper.h" |
16 | #include "AMDGPULegalizerInfo.h" |
17 | #include "GCNSubtarget.h" |
18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
19 | #include "llvm/CodeGen/GlobalISel/Combiner.h" |
20 | #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" |
21 | #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" |
22 | #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
23 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
24 | #include "llvm/CodeGen/MachineDominators.h" |
25 | #include "llvm/CodeGen/TargetPassConfig.h" |
26 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
27 | #include "llvm/Target/TargetMachine.h" |
28 | |
29 | #define DEBUG_TYPE"amdgpu-postlegalizer-combiner" "amdgpu-postlegalizer-combiner" |
30 | |
31 | using namespace llvm; |
32 | using namespace MIPatternMatch; |
33 | |
34 | class AMDGPUPostLegalizerCombinerHelper { |
35 | protected: |
36 | MachineIRBuilder &B; |
37 | MachineFunction &MF; |
38 | MachineRegisterInfo &MRI; |
39 | const GCNSubtarget &Subtarget; |
40 | const SIInstrInfo &TII; |
41 | AMDGPUCombinerHelper &Helper; |
42 | |
43 | public: |
44 | AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, |
45 | AMDGPUCombinerHelper &Helper) |
46 | : B(B), MF(B.getMF()), MRI(*B.getMRI()), |
47 | Subtarget(MF.getSubtarget<GCNSubtarget>()), |
48 | TII(*Subtarget.getInstrInfo()), Helper(Helper){}; |
49 | |
50 | struct FMinFMaxLegacyInfo { |
51 | Register LHS; |
52 | Register RHS; |
53 | Register True; |
54 | Register False; |
55 | CmpInst::Predicate Pred; |
56 | }; |
57 | |
58 | // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize |
59 | bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info); |
60 | void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, |
61 | const FMinFMaxLegacyInfo &Info); |
62 | |
63 | bool matchUCharToFloat(MachineInstr &MI); |
64 | void applyUCharToFloat(MachineInstr &MI); |
65 | |
66 | bool matchRcpSqrtToRsq(MachineInstr &MI, |
67 | std::function<void(MachineIRBuilder &)> &MatchInfo); |
68 | |
69 | // FIXME: Should be able to have 2 separate matchdatas rather than custom |
70 | // struct boilerplate. |
71 | struct CvtF32UByteMatchInfo { |
72 | Register CvtVal; |
73 | unsigned ShiftOffset; |
74 | }; |
75 | |
76 | bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); |
77 | void applyCvtF32UByteN(MachineInstr &MI, |
78 | const CvtF32UByteMatchInfo &MatchInfo); |
79 | |
80 | bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg); |
81 | |
82 | // Combine unsigned buffer load and signed extension instructions to generate |
83 | // signed buffer laod instructions. |
84 | bool matchCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo); |
85 | void applyCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo); |
86 | }; |
87 | |
88 | bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( |
89 | MachineInstr &MI, FMinFMaxLegacyInfo &Info) { |
90 | // FIXME: Type predicate on pattern |
91 | if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) |
92 | return false; |
93 | |
94 | Register Cond = MI.getOperand(1).getReg(); |
95 | if (!MRI.hasOneNonDBGUse(Cond) || |
96 | !mi_match(Cond, MRI, |
97 | m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) |
98 | return false; |
99 | |
100 | Info.True = MI.getOperand(2).getReg(); |
101 | Info.False = MI.getOperand(3).getReg(); |
102 | |
103 | // TODO: Handle case where the the selected value is an fneg and the compared |
104 | // constant is the negation of the selected value. |
105 | if (!(Info.LHS == Info.True && Info.RHS == Info.False) && |
106 | !(Info.LHS == Info.False && Info.RHS == Info.True)) |
107 | return false; |
108 | |
109 | switch (Info.Pred) { |
110 | case CmpInst::FCMP_FALSE: |
111 | case CmpInst::FCMP_OEQ: |
112 | case CmpInst::FCMP_ONE: |
113 | case CmpInst::FCMP_ORD: |
114 | case CmpInst::FCMP_UNO: |
115 | case CmpInst::FCMP_UEQ: |
116 | case CmpInst::FCMP_UNE: |
117 | case CmpInst::FCMP_TRUE: |
118 | return false; |
119 | default: |
120 | return true; |
121 | } |
122 | } |
123 | |
124 | void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy( |
125 | MachineInstr &MI, const FMinFMaxLegacyInfo &Info) { |
126 | B.setInstrAndDebugLoc(MI); |
127 | auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { |
128 | B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); |
129 | }; |
130 | |
131 | switch (Info.Pred) { |
132 | case CmpInst::FCMP_ULT: |
133 | case CmpInst::FCMP_ULE: |
134 | if (Info.LHS == Info.True) |
135 | buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); |
136 | else |
137 | buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); |
138 | break; |
139 | case CmpInst::FCMP_OLE: |
140 | case CmpInst::FCMP_OLT: { |
141 | // We need to permute the operands to get the correct NaN behavior. The |
142 | // selected operand is the second one based on the failing compare with NaN, |
143 | // so permute it based on the compare type the hardware uses. |
144 | if (Info.LHS == Info.True) |
145 | buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); |
146 | else |
147 | buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); |
148 | break; |
149 | } |
150 | case CmpInst::FCMP_UGE: |
151 | case CmpInst::FCMP_UGT: { |
152 | if (Info.LHS == Info.True) |
153 | buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); |
154 | else |
155 | buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); |
156 | break; |
157 | } |
158 | case CmpInst::FCMP_OGT: |
159 | case CmpInst::FCMP_OGE: { |
160 | if (Info.LHS == Info.True) |
161 | buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); |
162 | else |
163 | buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); |
164 | break; |
165 | } |
166 | default: |
167 | llvm_unreachable("predicate should not have matched")::llvm::llvm_unreachable_internal("predicate should not have matched" , "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp", 167 ); |
168 | } |
169 | |
170 | MI.eraseFromParent(); |
171 | } |
172 | |
173 | bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) { |
174 | Register DstReg = MI.getOperand(0).getReg(); |
175 | |
176 | // TODO: We could try to match extracting the higher bytes, which would be |
177 | // easier if i8 vectors weren't promoted to i32 vectors, particularly after |
178 | // types are legalized. v4i8 -> v4f32 is probably the only case to worry |
179 | // about in practice. |
180 | LLT Ty = MRI.getType(DstReg); |
181 | if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { |
182 | Register SrcReg = MI.getOperand(1).getReg(); |
183 | unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); |
184 | assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64)(static_cast <bool> (SrcSize == 16 || SrcSize == 32 || SrcSize == 64) ? void (0) : __assert_fail ("SrcSize == 16 || SrcSize == 32 || SrcSize == 64" , "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp", 184 , __extension__ __PRETTY_FUNCTION__)); |
185 | const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); |
186 | return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); |
187 | } |
188 | |
189 | return false; |
190 | } |
191 | |
192 | void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { |
193 | B.setInstrAndDebugLoc(MI); |
194 | |
195 | const LLT S32 = LLT::scalar(32); |
196 | |
197 | Register DstReg = MI.getOperand(0).getReg(); |
198 | Register SrcReg = MI.getOperand(1).getReg(); |
199 | LLT Ty = MRI.getType(DstReg); |
200 | LLT SrcTy = MRI.getType(SrcReg); |
201 | if (SrcTy != S32) |
202 | SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); |
203 | |
204 | if (Ty == S32) { |
205 | B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, |
206 | {SrcReg}, MI.getFlags()); |
207 | } else { |
208 | auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, |
209 | {SrcReg}, MI.getFlags()); |
210 | B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); |
211 | } |
212 | |
213 | MI.eraseFromParent(); |
214 | } |
215 | |
216 | bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq( |
217 | MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { |
218 | |
219 | auto getRcpSrc = [=](const MachineInstr &MI) { |
220 | MachineInstr *ResMI = nullptr; |
221 | if (MI.getOpcode() == TargetOpcode::G_INTRINSIC && |
222 | MI.getIntrinsicID() == Intrinsic::amdgcn_rcp) |
223 | ResMI = MRI.getVRegDef(MI.getOperand(2).getReg()); |
224 | |
225 | return ResMI; |
226 | }; |
227 | |
228 | auto getSqrtSrc = [=](const MachineInstr &MI) { |
229 | MachineInstr *SqrtSrcMI = nullptr; |
230 | auto Match = |
231 | mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); |
232 | (void)Match; |
233 | return SqrtSrcMI; |
234 | }; |
235 | |
236 | MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; |
237 | // rcp(sqrt(x)) |
238 | if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { |
239 | MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { |
240 | B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) |
241 | .addUse(SqrtSrcMI->getOperand(0).getReg()) |
242 | .setMIFlags(MI.getFlags()); |
243 | }; |
244 | return true; |
245 | } |
246 | |
247 | // sqrt(rcp(x)) |
248 | if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { |
249 | MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { |
250 | B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) |
251 | .addUse(RcpSrcMI->getOperand(0).getReg()) |
252 | .setMIFlags(MI.getFlags()); |
253 | }; |
254 | return true; |
255 | } |
256 | |
257 | return false; |
258 | } |
259 | |
260 | bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( |
261 | MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { |
262 | Register SrcReg = MI.getOperand(1).getReg(); |
263 | |
264 | // Look through G_ZEXT. |
265 | bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); |
Value stored to 'IsShr' during its initialization is never read | |
266 | |
267 | Register Src0; |
268 | int64_t ShiftAmt; |
269 | IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); |
270 | if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { |
271 | const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; |
272 | |
273 | unsigned ShiftOffset = 8 * Offset; |
274 | if (IsShr) |
275 | ShiftOffset += ShiftAmt; |
276 | else |
277 | ShiftOffset -= ShiftAmt; |
278 | |
279 | MatchInfo.CvtVal = Src0; |
280 | MatchInfo.ShiftOffset = ShiftOffset; |
281 | return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; |
282 | } |
283 | |
284 | // TODO: Simplify demanded bits. |
285 | return false; |
286 | } |
287 | |
288 | void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN( |
289 | MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) { |
290 | B.setInstrAndDebugLoc(MI); |
291 | unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; |
292 | |
293 | const LLT S32 = LLT::scalar(32); |
294 | Register CvtSrc = MatchInfo.CvtVal; |
295 | LLT SrcTy = MRI.getType(MatchInfo.CvtVal); |
296 | if (SrcTy != S32) { |
297 | assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8)(static_cast <bool> (SrcTy.isScalar() && SrcTy. getSizeInBits() >= 8) ? void (0) : __assert_fail ("SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8" , "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp", 297 , __extension__ __PRETTY_FUNCTION__)); |
298 | CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); |
299 | } |
300 | |
301 | assert(MI.getOpcode() != NewOpc)(static_cast <bool> (MI.getOpcode() != NewOpc) ? void ( 0) : __assert_fail ("MI.getOpcode() != NewOpc", "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp" , 301, __extension__ __PRETTY_FUNCTION__)); |
302 | B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); |
303 | MI.eraseFromParent(); |
304 | } |
305 | |
306 | bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize( |
307 | MachineInstr &MI, Register &Reg) { |
308 | const SITargetLowering *TLI = static_cast<const SITargetLowering *>( |
309 | MF.getSubtarget().getTargetLowering()); |
310 | Reg = MI.getOperand(1).getReg(); |
311 | return TLI->isCanonicalized(Reg, MF); |
312 | } |
313 | |
314 | // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8, |
315 | // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined |
316 | // with sign extension instrucions in order to generate buffer_load_{i8, i16} |
317 | // instructions. |
318 | |
319 | // Identify buffer_load_{u8, u16}. |
320 | bool AMDGPUPostLegalizerCombinerHelper::matchCombineSignExtendInReg( |
321 | MachineInstr &MI, MachineInstr *&SubwordBufferLoad) { |
322 | Register Op0Reg = MI.getOperand(1).getReg(); |
323 | SubwordBufferLoad = MRI.getVRegDef(Op0Reg); |
324 | |
325 | if (!MRI.hasOneNonDBGUse(Op0Reg)) |
326 | return false; |
327 | |
328 | // Check if the first operand of the sign extension is a subword buffer load |
329 | // instruction. |
330 | return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE || |
331 | SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT; |
332 | } |
333 | |
334 | // Combine buffer_load_{u8, u16} and the sign extension instruction to generate |
335 | // buffer_load_{i8, i16}. |
336 | void AMDGPUPostLegalizerCombinerHelper::applyCombineSignExtendInReg( |
337 | MachineInstr &MI, MachineInstr *&SubwordBufferLoad) { |
338 | // Modify the opcode and the destination of buffer_load_{u8, u16}: |
339 | // Replace the opcode. |
340 | unsigned Opc = |
341 | SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE |
342 | ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE |
343 | : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT; |
344 | SubwordBufferLoad->setDesc(TII.get(Opc)); |
345 | // Update the destination register of SubwordBufferLoad with the destination |
346 | // register of the sign extension. |
347 | Register SignExtendInsnDst = MI.getOperand(0).getReg(); |
348 | SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst); |
349 | // Remove the sign extension. |
350 | MI.eraseFromParent(); |
351 | } |
352 | |
353 | class AMDGPUPostLegalizerCombinerHelperState { |
354 | protected: |
355 | AMDGPUCombinerHelper &Helper; |
356 | AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper; |
357 | |
358 | // Note: pointer is necessary because Target Predicates use |
359 | // "Subtarget->" |
360 | const GCNSubtarget *Subtarget; |
361 | |
362 | public: |
363 | AMDGPUPostLegalizerCombinerHelperState( |
364 | AMDGPUCombinerHelper &Helper, |
365 | AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper, |
366 | const GCNSubtarget &Subtarget) |
367 | : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper), |
368 | Subtarget(&Subtarget) {} |
369 | }; |
370 | |
371 | #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS |
372 | #include "AMDGPUGenPostLegalizeGICombiner.inc" |
373 | #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS |
374 | |
375 | namespace { |
376 | #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H |
377 | #include "AMDGPUGenPostLegalizeGICombiner.inc" |
378 | #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H |
379 | |
380 | class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { |
381 | GISelKnownBits *KB; |
382 | MachineDominatorTree *MDT; |
383 | const GCNSubtarget &Subtarget; |
384 | |
385 | public: |
386 | AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; |
387 | |
388 | AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt, |
389 | bool OptSize, bool MinSize, |
390 | const AMDGPULegalizerInfo *LI, |
391 | GISelKnownBits *KB, MachineDominatorTree *MDT) |
392 | : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, |
393 | /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), |
394 | KB(KB), MDT(MDT), Subtarget(Subtarget) { |
395 | if (!GeneratedRuleCfg.parseCommandLineOption()) |
396 | report_fatal_error("Invalid rule identifier"); |
397 | } |
398 | |
399 | bool combine(GISelChangeObserver &Observer, MachineInstr &MI, |
400 | MachineIRBuilder &B) const override; |
401 | }; |
402 | |
403 | bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, |
404 | MachineInstr &MI, |
405 | MachineIRBuilder &B) const { |
406 | AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, |
407 | LInfo); |
408 | AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper); |
409 | AMDGPUGenPostLegalizerCombinerHelper Generated( |
410 | GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget); |
411 | |
412 | if (Generated.tryCombineAll(Observer, MI, B)) |
413 | return true; |
414 | |
415 | switch (MI.getOpcode()) { |
416 | case TargetOpcode::G_SHL: |
417 | case TargetOpcode::G_LSHR: |
418 | case TargetOpcode::G_ASHR: |
419 | // On some subtargets, 64-bit shift is a quarter rate instruction. In the |
420 | // common case, splitting this into a move and a 32-bit shift is faster and |
421 | // the same code size. |
422 | return Helper.tryCombineShiftToUnmerge(MI, 32); |
423 | } |
424 | |
425 | return false; |
426 | } |
427 | |
428 | #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP |
429 | #include "AMDGPUGenPostLegalizeGICombiner.inc" |
430 | #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP |
431 | |
432 | // Pass boilerplate |
433 | // ================ |
434 | |
435 | class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { |
436 | public: |
437 | static char ID; |
438 | |
439 | AMDGPUPostLegalizerCombiner(bool IsOptNone = false); |
440 | |
441 | StringRef getPassName() const override { |
442 | return "AMDGPUPostLegalizerCombiner"; |
443 | } |
444 | |
445 | bool runOnMachineFunction(MachineFunction &MF) override; |
446 | |
447 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
448 | private: |
449 | bool IsOptNone; |
450 | }; |
451 | } // end anonymous namespace |
452 | |
453 | void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { |
454 | AU.addRequired<TargetPassConfig>(); |
455 | AU.setPreservesCFG(); |
456 | getSelectionDAGFallbackAnalysisUsage(AU); |
457 | AU.addRequired<GISelKnownBitsAnalysis>(); |
458 | AU.addPreserved<GISelKnownBitsAnalysis>(); |
459 | if (!IsOptNone) { |
460 | AU.addRequired<MachineDominatorTree>(); |
461 | AU.addPreserved<MachineDominatorTree>(); |
462 | } |
463 | MachineFunctionPass::getAnalysisUsage(AU); |
464 | } |
465 | |
466 | AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) |
467 | : MachineFunctionPass(ID), IsOptNone(IsOptNone) { |
468 | initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); |
469 | } |
470 | |
471 | bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { |
472 | if (MF.getProperties().hasProperty( |
473 | MachineFunctionProperties::Property::FailedISel)) |
474 | return false; |
475 | auto *TPC = &getAnalysis<TargetPassConfig>(); |
476 | const Function &F = MF.getFunction(); |
477 | bool EnableOpt = |
478 | MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); |
479 | |
480 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
481 | const AMDGPULegalizerInfo *LI |
482 | = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); |
483 | |
484 | GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); |
485 | MachineDominatorTree *MDT = |
486 | IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); |
487 | AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(), |
488 | F.hasMinSize(), LI, KB, MDT); |
489 | Combiner C(PCInfo, TPC); |
490 | return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); |
491 | } |
492 | |
493 | char AMDGPUPostLegalizerCombiner::ID = 0; |
494 | INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,static void *initializeAMDGPUPostLegalizerCombinerPassOnce(PassRegistry &Registry) { |
495 | "Combine AMDGPU machine instrs after legalization",static void *initializeAMDGPUPostLegalizerCombinerPassOnce(PassRegistry &Registry) { |
496 | false, false)static void *initializeAMDGPUPostLegalizerCombinerPassOnce(PassRegistry &Registry) { |
497 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)initializeTargetPassConfigPass(Registry); |
498 | INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)initializeGISelKnownBitsAnalysisPass(Registry); |
499 | INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Combine AMDGPU machine instrs after legalization" , "amdgpu-postlegalizer-combiner", &AMDGPUPostLegalizerCombiner ::ID, PassInfo::NormalCtor_t(callDefaultCtor<AMDGPUPostLegalizerCombiner >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUPostLegalizerCombinerPassFlag ; void llvm::initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUPostLegalizerCombinerPassFlag , initializeAMDGPUPostLegalizerCombinerPassOnce, std::ref(Registry )); } |
500 | "Combine AMDGPU machine instrs after legalization", false,PassInfo *PI = new PassInfo( "Combine AMDGPU machine instrs after legalization" , "amdgpu-postlegalizer-combiner", &AMDGPUPostLegalizerCombiner ::ID, PassInfo::NormalCtor_t(callDefaultCtor<AMDGPUPostLegalizerCombiner >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUPostLegalizerCombinerPassFlag ; void llvm::initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUPostLegalizerCombinerPassFlag , initializeAMDGPUPostLegalizerCombinerPassOnce, std::ref(Registry )); } |
501 | false)PassInfo *PI = new PassInfo( "Combine AMDGPU machine instrs after legalization" , "amdgpu-postlegalizer-combiner", &AMDGPUPostLegalizerCombiner ::ID, PassInfo::NormalCtor_t(callDefaultCtor<AMDGPUPostLegalizerCombiner >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUPostLegalizerCombinerPassFlag ; void llvm::initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUPostLegalizerCombinerPassFlag , initializeAMDGPUPostLegalizerCombinerPassOnce, std::ref(Registry )); } |
502 | |
503 | namespace llvm { |
504 | FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { |
505 | return new AMDGPUPostLegalizerCombiner(IsOptNone); |
506 | } |
507 | } // end namespace llvm |