LLVM 23.0.0git
AMDGPURewriteAGPRCopyMFMA.cpp
Go to the documentation of this file.
1//===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
10/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
11/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
12/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
13/// pass will attempt to delete the cross register bank copy and replace the
14/// MFMA opcode.
15///
16/// TODO:
17/// - Handle rewrites of phis. This must be more careful than normal about the
18/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
19/// loop, so it depends on the exact assignment of the copy.
20///
21/// - Update LiveIntervals incrementally instead of recomputing from scratch
22///
23//===----------------------------------------------------------------------===//
24
25#include "AMDGPU.h"
26#include "GCNSubtarget.h"
28#include "SIRegisterInfo.h"
29#include "llvm/ADT/Statistic.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
43
44DEBUG_COUNTER(RewriteAGPRCopyMFMACounter, DEBUG_TYPE,
45 "Controls which MFMA chains are rewritten to AGPR form");
46
47namespace {
48
49STATISTIC(NumMFMAsRewrittenToAGPR,
50 "Number of MFMA instructions rewritten to use AGPR form");
51
52/// Map from spill slot frame index to list of instructions which reference it.
53using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, 4>>;
54
55class AMDGPURewriteAGPRCopyMFMAImpl {
57 const GCNSubtarget &ST;
58 const SIInstrInfo &TII;
59 const SIRegisterInfo &TRI;
61 VirtRegMap &VRM;
62 LiveRegMatrix &LRM;
63 LiveIntervals &LIS;
64 LiveStacks &LSS;
65 const RegisterClassInfo &RegClassInfo;
66
67 bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
68 MCPhysReg PrefPhysReg) const;
69
70public:
71 AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
73 LiveStacks &LSS,
74 const RegisterClassInfo &RegClassInfo)
75 : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
76 TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
77 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
78
79 bool isRewriteCandidate(const MachineInstr &MI) const {
80 return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
81 }
82
83 /// Find AV_* registers assigned to AGPRs (or virtual registers which were
84 /// already required to be AGPR).
85 ///
86 /// \return the assigned physical register that \p VReg is assigned to if it
87 /// is an AGPR, otherwise MCRegister().
88 MCRegister getAssignedAGPR(Register VReg) const {
89 MCRegister PhysReg = VRM.getPhys(VReg);
90 if (!PhysReg)
91 return MCRegister();
92
93 // If this is an AV register, we have to check if the actual assignment is
94 // to an AGPR
95 const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
96 return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
97 }
98
99 bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
100 MCPhysReg PhysRegHint) const;
101
102 /// Compute the register class constraints based on the uses of \p Reg,
103 /// excluding MFMA uses from which can be rewritten to change the register
104 /// class constraint. MFMA scale operands need to be constraint checked.
105 /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
106
107 /// \p RewriteCandidates will collect the set of MFMA instructions that need
108 /// to have the opcode mutated to perform the replacement.
109 ///
110 /// \p RewriteRegs will accumulate the set of register used by those MFMAs
111 /// that need to have the register classes adjusted.
112 bool recomputeRegClassExceptRewritable(
113 Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
114 SmallSetVector<Register, 4> &RewriteRegs) const;
115
116 bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
117 bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
118
119 /// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
120 /// with a COPY to the replacement register value \p VReg.
121 void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
122 Register VReg) const;
123
124 /// Create a map from frame index to use instructions for spills. If a use of
125 /// the frame index does not consist only of spill instructions, it will not
126 /// be included in the map.
127 void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
128 SpillReferenceMap &Map) const;
129
130 /// Attempt to unspill VGPRs by finding a free register and replacing the
131 /// spill instructions with copies.
132 void eliminateSpillsOfReassignedVGPRs() const;
133
134 bool run(MachineFunction &MF) const;
135};
136
137bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
138 Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
139 SmallSetVector<Register, 4> &RewriteRegs) const {
140 SmallVector<Register, 8> Worklist = {StartReg};
141
142 // Recursively visit all transitive MFMA users
143 while (!Worklist.empty()) {
144 Register Reg = Worklist.pop_back_val();
145 const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
146
147 // Inflate to the equivalent AV_* class.
148 const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(OldRC, MF);
149 if (OldRC == NewRC)
150 return false;
151
152 // Accumulate constraints from all uses.
153 for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
154 // Apply the effect of the given operand to NewRC.
155 MachineInstr *MI = MO.getParent();
156
157 // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
158 // effects of rewrite candidates. It just so happens that we can use
159 // either AGPR or VGPR in src0/src1. We still need to check constraint
160 // effects for scale variant, which does not allow AGPR.
161 if (isRewriteCandidate(*MI)) {
162 int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
163 const MCInstrDesc &AGPRDesc = TII.get(AGPROp);
164 const TargetRegisterClass *NewRC =
165 TII.getRegClass(AGPRDesc, MO.getOperandNo());
166 if (!TRI.hasAGPRs(NewRC))
167 return false;
168
169 const MachineOperand *VDst =
170 TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
171 const MachineOperand *Src2 =
172 TII.getNamedOperand(*MI, AMDGPU::OpName::src2);
173 for (const MachineOperand *Op : {VDst, Src2}) {
174 if (!Op->isReg())
175 continue;
176
177 Register OtherReg = Op->getReg();
178 if (OtherReg.isPhysical())
179 return false;
180
181 if (OtherReg != Reg && RewriteRegs.insert(OtherReg))
182 Worklist.push_back(OtherReg);
183 }
184
185 if (!is_contained(RewriteCandidates, MI)) {
186 LLVM_DEBUG({
187 Register VDstPhysReg = VRM.getPhys(VDst->getReg());
188 dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
189 << " Dst=[" << printReg(VDst->getReg()) << " => "
190 << printReg(VDstPhysReg, &TRI);
191
192 if (Src2->isReg()) {
193 Register Src2PhysReg = VRM.getPhys(Src2->getReg());
194 dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
195 << printReg(Src2PhysReg, &TRI);
196 }
197
198 dbgs() << "]: " << MI;
199 });
200
201 RewriteCandidates.push_back(MI);
202 }
203
204 continue;
205 }
206
207 unsigned OpNo = &MO - &MI->getOperand(0);
208 NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);
209 if (!NewRC || NewRC == OldRC) {
210 LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
211 << " cannot be reassigned to "
212 << (NewRC ? TRI.getRegClassName(NewRC) : "NULL")
213 << ": " << *MI);
214 return false;
215 }
216 }
217 }
218
219 return true;
220}
221
222bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
223 MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
224 // src2 and dst have the same physical class constraint; try to preserve
225 // the original src2 subclass if one were to exist.
226 SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
227 SmallSetVector<Register, 4> RewriteRegs;
228
229 // Make sure we reassign the MFMA we found the copy from first. We want
230 // to ensure dst ends up in the physreg we were originally copying to.
231 RewriteRegs.insert(MFMAHintReg);
232
233 // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
234 // that we can trivially rewrite src2 to use the new AGPR. If we can't
235 // trivially replace it, we're going to induce as many copies as we would have
236 // emitted in the first place, as well as need to assign another register, and
237 // need to figure out where to put them. The live range splitting is smarter
238 // than anything we're doing here, so trust it did something reasonable.
239 //
240 // Note recomputeRegClassExceptRewritable will consider the constraints of
241 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
242 if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
243 RewriteRegs)) {
244 LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
245 << printReg(MFMAHintReg, &TRI) << '\n');
246 return false;
247 }
248
249 // If src2 and dst are different registers, we need to also reassign the
250 // input to an available AGPR if it is compatible with all other uses.
251 //
252 // If we can't reassign it, we'd need to introduce a different copy
253 // which is likely worse than the copy we'd be saving.
254 //
255 // It's likely that the MFMA is used in sequence with other MFMAs; if we
256 // cannot migrate the full use/def chain of MFMAs, we would need to
257 // introduce intermediate copies somewhere. So we only make the
258 // transform if all the interfering MFMAs can also be migrated. Collect
259 // the set of rewritable MFMAs and check if we can assign an AGPR at
260 // that point.
261 //
262 // If any of the MFMAs aren't reassignable, we give up and rollback to
263 // the original register assignments.
264
265 using RecoloringStack =
267 RecoloringStack TentativeReassignments;
268
269 for (Register RewriteReg : RewriteRegs) {
270 LiveInterval &LI = LIS.getInterval(RewriteReg);
271 TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
272 LRM.unassign(LI);
273 }
274
275 if (!DebugCounter::shouldExecute(RewriteAGPRCopyMFMACounter) ||
276 !attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
277 // Roll back the register assignments to the original state.
278 for (auto [LI, OldAssign] : TentativeReassignments) {
279 if (VRM.hasPhys(LI->reg()))
280 LRM.unassign(*LI);
281 LRM.assign(*LI, OldAssign);
282 }
283
284 return false;
285 }
286
287 // Fixup the register classes of the virtual registers now that we've
288 // committed to the reassignments.
289 for (Register InterferingReg : RewriteRegs) {
290 const TargetRegisterClass *EquivalentAGPRRegClass =
291 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
292 MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
293 }
294
295 for (MachineInstr *RewriteCandidate : RewriteCandidates) {
296 int NewMFMAOp =
297 AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
298 RewriteCandidate->setDesc(TII.get(NewMFMAOp));
299 ++NumMFMAsRewrittenToAGPR;
300 }
301
302 return true;
303}
304
305/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
306/// preference to use \p PhysReg first. Returns false if the reassignments
307/// cannot be trivially performed.
308bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
309 SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const {
310 // FIXME: The ordering may matter here, but we're just taking uselistorder
311 // with the special case of ensuring to process the starting instruction
312 // first. We probably should extract the priority advisor out of greedy and
313 // use that ordering.
314 for (Register InterferingReg : InterferingRegs) {
315 LiveInterval &ReassignLI = LIS.getInterval(InterferingReg);
316 const TargetRegisterClass *EquivalentAGPRRegClass =
317 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
318
319 MCPhysReg Assignable = AMDGPU::NoRegister;
320 if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
321 LRM.checkInterference(ReassignLI, PrefPhysReg) ==
323 // First try to assign to the AGPR we were already copying to. This
324 // should be the first assignment we attempt. We have to guard
325 // against the use being a subregister (which doesn't have an exact
326 // class match).
327
328 // TODO: If this does happen to be a subregister use, we should
329 // still try to assign to a subregister of the original copy result.
330 Assignable = PrefPhysReg;
331 } else {
332 ArrayRef<MCPhysReg> AllocOrder =
333 RegClassInfo.getOrder(EquivalentAGPRRegClass);
334 for (MCPhysReg Reg : AllocOrder) {
335 if (LRM.checkInterference(ReassignLI, Reg) == LiveRegMatrix::IK_Free) {
336 Assignable = Reg;
337 break;
338 }
339 }
340 }
341
342 if (!Assignable) {
343 LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
344 << printReg(InterferingReg, &TRI)
345 << " to a free AGPR\n");
346 return false;
347 }
348
349 LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
350 << " to " << printReg(Assignable, &TRI) << '\n');
351 LRM.assign(ReassignLI, Assignable);
352 }
353
354 return true;
355}
356
357/// Identify copies that look like:
358/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
359/// %agpr = COPY %vgpr
360///
361/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
362/// versions of the MFMA. This should cover the common case.
363bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
364 Register VReg, MCRegister AssignedAGPR) const {
365 bool MadeChange = false;
366 for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
367 if (!UseMI.isCopy())
368 continue;
369
370 Register CopySrcReg = UseMI.getOperand(1).getReg();
371 if (!CopySrcReg.isVirtual())
372 continue;
373
374 // TODO: Handle loop phis copied to AGPR. e.g.
375 //
376 // loop:
377 // %phi:vgpr = COPY %mfma:vgpr
378 // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
379 // s_cbranch_vccnz loop
380 //
381 // endloop:
382 // %agpr = mfma
383 //
384 // We need to be sure that %phi is assigned to the same physical register as
385 // %mfma, or else we will just be moving copies into the loop.
386
387 for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
388 if (isRewriteCandidate(CopySrcDefMI) &&
389 tryReassigningMFMAChain(
390 CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))
391 MadeChange = true;
392 }
393 }
394
395 return MadeChange;
396}
397
398/// Identify copies that look like:
399/// %src:vgpr = COPY %src:agpr
400/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
401///
402/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
403/// versions of the MFMA. This should cover rarer cases, and will generally be
404/// redundant with tryFoldCopiesToAGPR.
405bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
406 Register VReg, MCRegister AssignedAGPR) const {
407 bool MadeChange = false;
408 for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
409 if (!UseMI.isCopy())
410 continue;
411
412 Register CopyDstReg = UseMI.getOperand(0).getReg();
413 if (!CopyDstReg.isVirtual())
414 continue;
415 for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
416 if (!CopyUseMO.readsReg())
417 continue;
418
419 MachineInstr &CopyUseMI = *CopyUseMO.getParent();
420 if (isRewriteCandidate(CopyUseMI)) {
421 if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,
422 VRM.getPhys(CopyDstReg)))
423 MadeChange = true;
424 }
425 }
426 }
427
428 return MadeChange;
429}
430
431void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
432 MachineInstr &SpillMI, int SpillFI, Register VReg) const {
433 const DebugLoc &DL = SpillMI.getDebugLoc();
434 MachineBasicBlock &MBB = *SpillMI.getParent();
435 MachineInstr *NewCopy;
436 if (SpillMI.mayStore()) {
437 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY), VReg)
438 .add(SpillMI.getOperand(0));
439 } else {
440 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY))
441 .add(SpillMI.getOperand(0))
442 .addReg(VReg);
443 }
444
445 LIS.ReplaceMachineInstrInMaps(SpillMI, *NewCopy);
446 SpillMI.eraseFromParent();
447}
448
449void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
450 ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {
451
452 SmallSet<int, 4> NeededFrameIndexes;
453 for (const LiveInterval *LI : StackIntervals)
454 NeededFrameIndexes.insert(LI->reg().stackSlotIndex());
455
456 for (MachineBasicBlock &MBB : MF) {
457 for (MachineInstr &MI : MBB) {
458 for (MachineOperand &MO : MI.operands()) {
459 if (!MO.isFI() || !NeededFrameIndexes.count(MO.getIndex()))
460 continue;
461
462 if (TII.isVGPRSpill(MI)) {
463 SmallVector<MachineInstr *, 4> &References = Map[MO.getIndex()];
464 References.push_back(&MI);
465 break;
466 }
467
468 // Verify this was really a spill instruction, if it's not just ignore
469 // all uses.
470
471 // TODO: This should probably be verifier enforced.
472 NeededFrameIndexes.erase(MO.getIndex());
473 Map.erase(MO.getIndex());
474 }
475 }
476 }
477}
478
479void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
480 unsigned NumSlots = LSS.getNumIntervals();
481 if (NumSlots == 0)
482 return;
483
484 MachineFrameInfo &MFI = MF.getFrameInfo();
485
486 SmallVector<LiveInterval *, 32> StackIntervals;
487 StackIntervals.reserve(NumSlots);
488
489 for (auto &[Slot, LI] : LSS) {
490 if (!MFI.isSpillSlotObjectIndex(Slot) || MFI.isDeadObjectIndex(Slot))
491 continue;
492
493 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
494 if (TRI.hasVGPRs(RC))
495 StackIntervals.push_back(&LI);
496 }
497
498 sort(StackIntervals, [](const LiveInterval *A, const LiveInterval *B) {
499 // The ordering has to be strictly weak.
500 /// Sort heaviest intervals first to prioritize their unspilling
501 if (A->weight() != B->weight())
502 return A->weight() > B->weight();
503
504 if (A->getSize() != B->getSize())
505 return A->getSize() > B->getSize();
506
507 // Tie breaker by number to avoid need for stable sort
508 return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
509 });
510
511 // FIXME: The APIs for dealing with the LiveInterval of a frame index are
512 // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
513 // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
514 // and must create a substitute virtual register to do so. This makes
515 // incremental updating here difficult; we need to actually perform the IR
516 // mutation to get the new vreg references in place to compute the register
517 // LiveInterval to perform an assignment to track the new interference
518 // correctly, and we can't simply migrate the LiveInterval we already have.
519 //
520 // To avoid walking through the entire function for each index, pre-collect
521 // all the instructions slot referencess.
522
523 DenseMap<int, SmallVector<MachineInstr *, 4>> SpillSlotReferences;
524 collectSpillIndexUses(StackIntervals, SpillSlotReferences);
525
526 for (LiveInterval *LI : StackIntervals) {
527 int Slot = LI->reg().stackSlotIndex();
528 auto SpillReferences = SpillSlotReferences.find(Slot);
529 if (SpillReferences == SpillSlotReferences.end())
530 continue;
531
532 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
533
534 LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
535 << " by reassigning\n");
536
537 ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
538
539 for (MCPhysReg PhysReg : AllocOrder) {
540 if (LRM.checkInterference(*LI, PhysReg) != LiveRegMatrix::IK_Free)
541 continue;
542
543 LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
544 << printReg(PhysReg, &TRI) << '\n');
545
546 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
547 Register NewVReg = MRI.createVirtualRegister(RC);
548
549 for (MachineInstr *SpillMI : SpillReferences->second)
550 replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg);
551
552 // TODO: We should be able to transfer the information from the stack
553 // slot's LiveInterval without recomputing from scratch with the
554 // replacement vreg uses.
555 LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(NewVReg);
556 VRM.grow();
557 LRM.assign(NewLI, PhysReg);
558 MFI.RemoveStackObject(Slot);
559 break;
560 }
561 }
562}
563
564bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
565 // This only applies on subtargets that have a configurable AGPR vs. VGPR
566 // allocation.
567 if (!ST.hasGFX90AInsts())
568 return false;
569
570 // Early exit if no AGPRs were assigned.
571 if (!LRM.isPhysRegUsed(AMDGPU::AGPR0)) {
572 LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
573 return false;
574 }
575
576 bool MadeChange = false;
577
578 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
579 Register VReg = Register::index2VirtReg(I);
580 MCRegister AssignedAGPR = getAssignedAGPR(VReg);
581 if (!AssignedAGPR)
582 continue;
583
584 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
585 MadeChange = true;
586 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
587 MadeChange = true;
588 }
589
590 // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
591 // pressure. See if we can eliminate some spills now that those registers are
592 // more available.
593 if (MadeChange)
594 eliminateSpillsOfReassignedVGPRs();
595
596 return MadeChange;
597}
598
599class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
600public:
601 static char ID;
602 RegisterClassInfo RegClassInfo;
603
604 AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {}
605
606 bool runOnMachineFunction(MachineFunction &MF) override;
607
608 StringRef getPassName() const override {
609 return "AMDGPU Rewrite AGPR-Copy-MFMA";
610 }
611
612 void getAnalysisUsage(AnalysisUsage &AU) const override {
613 AU.addRequired<LiveIntervalsWrapperPass>();
614 AU.addRequired<VirtRegMapWrapperLegacy>();
615 AU.addRequired<LiveRegMatrixWrapperLegacy>();
616 AU.addRequired<LiveStacksWrapperLegacy>();
617
618 AU.addPreserved<LiveIntervalsWrapperPass>();
619 AU.addPreserved<VirtRegMapWrapperLegacy>();
620 AU.addPreserved<LiveRegMatrixWrapperLegacy>();
621 AU.addPreserved<LiveStacksWrapperLegacy>();
622
623 AU.setPreservesAll();
625 }
626};
627
628} // End anonymous namespace.
629
630INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
631 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
636INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
637 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
638
639char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;
640
642 AMDGPURewriteAGPRCopyMFMALegacy::ID;
643
644bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
645 MachineFunction &MF) {
646 if (skipFunction(MF.getFunction()))
647 return false;
648
649 RegClassInfo.runOnMachineFunction(MF);
650
651 auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
652 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
653 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
654 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
655 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
656 return Impl.run(MF);
657}
658
662 VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
665 LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(MF);
666 RegisterClassInfo RegClassInfo;
667 RegClassInfo.runOnMachineFunction(MF);
668
669 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
670 if (!Impl.run(MF))
671 return PreservedAnalyses::all();
673 PA.preserveSet<CFGAnalyses>()
674 .preserve<LiveStacksAnalysis>()
675 .preserve<VirtRegMapAnalysis>()
676 .preserve<SlotIndexesAnalysis>()
677 .preserve<LiveIntervalsAnalysis>()
678 .preserve<LiveRegMatrixAnalysis>();
679 return PA;
680}
MachineInstrBuilder & UseMI
AMDGPU Rewrite AGPR Copy MFMA
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static bool shouldExecute(CounterInfo &Counter)
Register reg() const
LiveInterval & getInterval(Register Reg)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
void unassign(const LiveInterval &VirtReg, bool ClearAllReferencingSegments=false)
Unassign VirtReg from its PhysReg.
@ IK_Free
No interference, go ahead and assign.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
unsigned getNumIntervals() const
Definition LiveStacks.h:59
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineBasicBlock * getParent() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
iterator_range< def_instr_iterator > def_instructions(Register Reg) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
iterator_range< reg_nodbg_iterator > reg_nodbg_operands(Register Reg) const
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
int stackSlotIndex() const
Compute the frame index from a register value representing a stack slot.
Definition Register.h:93
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
bool erase(const T &V)
Definition SmallSet.h:200
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition VirtRegMap.h:91
LLVM_ABI void grow()
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition VirtRegMap.h:87
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
char & AMDGPURewriteAGPRCopyMFMALegacyID
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.