LLVM 22.0.0git
AMDGPURewriteAGPRCopyMFMA.cpp
Go to the documentation of this file.
1//===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
10/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
11/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
12/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
13/// pass will attempt to delete the cross register bank copy and replace the
14/// MFMA opcode.
15///
16/// TODO:
17/// - Handle rewrites of phis. This must be more careful than normal about the
18/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
19/// loop, so it depends on the exact assignment of the copy.
20///
21/// - Update LiveIntervals incrementally instead of recomputing from scratch
22///
23//===----------------------------------------------------------------------===//
24
25#include "AMDGPU.h"
26#include "GCNSubtarget.h"
28#include "SIRegisterInfo.h"
29#include "llvm/ADT/Statistic.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
41
42namespace {
43
44STATISTIC(NumMFMAsRewrittenToAGPR,
45 "Number of MFMA instructions rewritten to use AGPR form");
46
47/// Map from spill slot frame index to list of instructions which reference it.
48using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, 4>>;
49
50class AMDGPURewriteAGPRCopyMFMAImpl {
52 const GCNSubtarget &ST;
53 const SIInstrInfo &TII;
54 const SIRegisterInfo &TRI;
56 VirtRegMap &VRM;
57 LiveRegMatrix &LRM;
58 LiveIntervals &LIS;
59 LiveStacks &LSS;
60 const RegisterClassInfo &RegClassInfo;
61
62 bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
63 MCPhysReg PrefPhysReg) const;
64
65public:
66 AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
68 LiveStacks &LSS,
69 const RegisterClassInfo &RegClassInfo)
70 : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
71 TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
72 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
73
74 bool isRewriteCandidate(const MachineInstr &MI) const {
75 return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
76 }
77
78 /// Find AV_* registers assigned to AGPRs (or virtual registers which were
79 /// already required to be AGPR).
80 ///
81 /// \return the assigned physical register that \p VReg is assigned to if it
82 /// is an AGPR, otherwise MCRegister().
83 MCRegister getAssignedAGPR(Register VReg) const {
84 MCRegister PhysReg = VRM.getPhys(VReg);
85 if (!PhysReg)
86 return MCRegister();
87
88 // If this is an AV register, we have to check if the actual assignment is
89 // to an AGPR
90 const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
91 return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
92 }
93
94 bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
95 MCPhysReg PhysRegHint) const;
96
97 /// Compute the register class constraints based on the uses of \p Reg,
98 /// excluding MFMA uses from which can be rewritten to change the register
99 /// class constraint. This should be nearly identical to
100 /// MachineRegisterInfo::recomputeRegClass.
101
102 /// \p RewriteCandidates will collect the set of MFMA instructions that need
103 /// to have the opcode mutated to perform the replacement.
104 ///
105 /// \p RewriteRegs will accumulate the set of register used by those MFMAs
106 /// that need to have the register classes adjusted.
107 bool recomputeRegClassExceptRewritable(
108 Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
109 SmallSetVector<Register, 4> &RewriteRegs) const;
110
111 bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
112 bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
113
114 /// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
115 /// with a COPY to the replacement register value \p VReg.
116 void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
117 Register VReg) const;
118
119 /// Create a map from frame index to use instructions for spills. If a use of
120 /// the frame index does not consist only of spill instructions, it will not
121 /// be included in the map.
122 void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
123 SpillReferenceMap &Map) const;
124
125 /// Attempt to unspill VGPRs by finding a free register and replacing the
126 /// spill instructions with copies.
127 void eliminateSpillsOfReassignedVGPRs() const;
128
129 bool run(MachineFunction &MF) const;
130};
131
132bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
133 Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
134 SmallSetVector<Register, 4> &RewriteRegs) const {
135 SmallVector<Register, 8> Worklist = {StartReg};
136
137 // Recursively visit all transitive MFMA users
138 while (!Worklist.empty()) {
139 Register Reg = Worklist.pop_back_val();
140 const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
141
142 // Inflate to the equivalent AV_* class.
143 const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(OldRC, MF);
144 if (OldRC == NewRC)
145 return false;
146
147 // Accumulate constraints from all uses.
148 for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
149 // Apply the effect of the given operand to NewRC.
150 MachineInstr *MI = MO.getParent();
151
152 // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
153 // effects of rewrite candidates. It just so happens that we can use
154 // either AGPR or VGPR in src0/src1, so don't bother checking the
155 // constraint effects of the individual operands.
156 if (isRewriteCandidate(*MI)) {
157 const MachineOperand *VDst =
158 TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
159 const MachineOperand *Src2 =
160 TII.getNamedOperand(*MI, AMDGPU::OpName::src2);
161 for (const MachineOperand *Op : {VDst, Src2}) {
162 if (!Op->isReg())
163 continue;
164
165 Register OtherReg = Op->getReg();
166 if (OtherReg.isPhysical())
167 return false;
168
169 if (OtherReg != Reg && RewriteRegs.insert(OtherReg))
170 Worklist.push_back(OtherReg);
171 }
172
173 if (!is_contained(RewriteCandidates, MI)) {
174 LLVM_DEBUG({
175 Register VDstPhysReg = VRM.getPhys(VDst->getReg());
176 dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
177 << " Dst=[" << printReg(VDst->getReg()) << " => "
178 << printReg(VDstPhysReg, &TRI);
179
180 if (Src2->isReg()) {
181 Register Src2PhysReg = VRM.getPhys(Src2->getReg());
182 dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
183 << printReg(Src2PhysReg, &TRI);
184 }
185
186 dbgs() << "]: " << MI;
187 });
188
189 RewriteCandidates.push_back(MI);
190 }
191
192 continue;
193 }
194
195 unsigned OpNo = &MO - &MI->getOperand(0);
196 NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);
197 if (!NewRC || NewRC == OldRC) {
198 LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
199 << " cannot be reassigned to "
200 << TRI.getRegClassName(NewRC) << ": " << *MI);
201 return false;
202 }
203 }
204 }
205
206 return true;
207}
208
209bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
210 MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
211 // src2 and dst have the same physical class constraint; try to preserve
212 // the original src2 subclass if one were to exist.
213 SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
214 SmallSetVector<Register, 4> RewriteRegs;
215
216 // Make sure we reassign the MFMA we found the copy from first. We want
217 // to ensure dst ends up in the physreg we were originally copying to.
218 RewriteRegs.insert(MFMAHintReg);
219
220 // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
221 // that we can trivially rewrite src2 to use the new AGPR. If we can't
222 // trivially replace it, we're going to induce as many copies as we would have
223 // emitted in the first place, as well as need to assign another register, and
224 // need to figure out where to put them. The live range splitting is smarter
225 // than anything we're doing here, so trust it did something reasonable.
226 //
227 // Note recomputeRegClassExceptRewritable will consider the constraints of
228 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
229 if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
230 RewriteRegs)) {
231 LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
232 << printReg(MFMAHintReg, &TRI) << '\n');
233 return false;
234 }
235
236 // If src2 and dst are different registers, we need to also reassign the
237 // input to an available AGPR if it is compatible with all other uses.
238 //
239 // If we can't reassign it, we'd need to introduce a different copy
240 // which is likely worse than the copy we'd be saving.
241 //
242 // It's likely that the MFMA is used in sequence with other MFMAs; if we
243 // cannot migrate the full use/def chain of MFMAs, we would need to
244 // introduce intermediate copies somewhere. So we only make the
245 // transform if all the interfering MFMAs can also be migrated. Collect
246 // the set of rewritable MFMAs and check if we can assign an AGPR at
247 // that point.
248 //
249 // If any of the MFMAs aren't reassignable, we give up and rollback to
250 // the original register assignments.
251
252 using RecoloringStack =
254 RecoloringStack TentativeReassignments;
255
256 for (Register RewriteReg : RewriteRegs) {
257 LiveInterval &LI = LIS.getInterval(RewriteReg);
258 TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
259 LRM.unassign(LI);
260 }
261
262 if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
263 // Roll back the register assignments to the original state.
264 for (auto [LI, OldAssign] : TentativeReassignments) {
265 if (VRM.hasPhys(LI->reg()))
266 LRM.unassign(*LI);
267 LRM.assign(*LI, OldAssign);
268 }
269
270 return false;
271 }
272
273 // Fixup the register classes of the virtual registers now that we've
274 // committed to the reassignments.
275 for (Register InterferingReg : RewriteRegs) {
276 const TargetRegisterClass *EquivalentAGPRRegClass =
277 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
278 MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
279 }
280
281 for (MachineInstr *RewriteCandidate : RewriteCandidates) {
282 int NewMFMAOp =
283 AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
284 RewriteCandidate->setDesc(TII.get(NewMFMAOp));
285 ++NumMFMAsRewrittenToAGPR;
286 }
287
288 return true;
289}
290
291/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
292/// preference to use \p PhysReg first. Returns false if the reassignments
293/// cannot be trivially performed.
294bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
295 SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const {
296 // FIXME: The ordering may matter here, but we're just taking uselistorder
297 // with the special case of ensuring to process the starting instruction
298 // first. We probably should extract the priority advisor out of greedy and
299 // use that ordering.
300 for (Register InterferingReg : InterferingRegs) {
301 LiveInterval &ReassignLI = LIS.getInterval(InterferingReg);
302 const TargetRegisterClass *EquivalentAGPRRegClass =
303 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
304
305 MCPhysReg Assignable = AMDGPU::NoRegister;
306 if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
307 LRM.checkInterference(ReassignLI, PrefPhysReg) ==
309 // First try to assign to the AGPR we were already copying to. This
310 // should be the first assignment we attempt. We have to guard
311 // against the use being a subregister (which doesn't have an exact
312 // class match).
313
314 // TODO: If this does happen to be a subregister use, we should
315 // still try to assign to a subregister of the original copy result.
316 Assignable = PrefPhysReg;
317 } else {
318 ArrayRef<MCPhysReg> AllocOrder =
319 RegClassInfo.getOrder(EquivalentAGPRRegClass);
320 for (MCPhysReg Reg : AllocOrder) {
321 if (LRM.checkInterference(ReassignLI, Reg) == LiveRegMatrix::IK_Free) {
322 Assignable = Reg;
323 break;
324 }
325 }
326 }
327
328 if (!Assignable) {
329 LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
330 << printReg(InterferingReg, &TRI)
331 << " to a free AGPR\n");
332 return false;
333 }
334
335 LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
336 << " to " << printReg(Assignable, &TRI) << '\n');
337 LRM.assign(ReassignLI, Assignable);
338 }
339
340 return true;
341}
342
343/// Identify copies that look like:
344/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
345/// %agpr = COPY %vgpr
346///
347/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
348/// versions of the MFMA. This should cover the common case.
349bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
350 Register VReg, MCRegister AssignedAGPR) const {
351 bool MadeChange = false;
352 for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
353 if (!UseMI.isCopy())
354 continue;
355
356 Register CopySrcReg = UseMI.getOperand(1).getReg();
357 if (!CopySrcReg.isVirtual())
358 continue;
359
360 // TODO: Handle loop phis copied to AGPR. e.g.
361 //
362 // loop:
363 // %phi:vgpr = COPY %mfma:vgpr
364 // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
365 // s_cbranch_vccnz loop
366 //
367 // endloop:
368 // %agpr = mfma
369 //
370 // We need to be sure that %phi is assigned to the same physical register as
371 // %mfma, or else we will just be moving copies into the loop.
372
373 for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
374 if (isRewriteCandidate(CopySrcDefMI) &&
375 tryReassigningMFMAChain(
376 CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))
377 MadeChange = true;
378 }
379 }
380
381 return MadeChange;
382}
383
384/// Identify copies that look like:
385/// %src:vgpr = COPY %src:agpr
386/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
387///
388/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
389/// versions of the MFMA. This should cover rarer cases, and will generally be
390/// redundant with tryFoldCopiesToAGPR.
391bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
392 Register VReg, MCRegister AssignedAGPR) const {
393 bool MadeChange = false;
394 for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
395 if (!UseMI.isCopy())
396 continue;
397
398 Register CopyDstReg = UseMI.getOperand(0).getReg();
399 if (!CopyDstReg.isVirtual())
400 continue;
401 for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
402 if (!CopyUseMO.readsReg())
403 continue;
404
405 MachineInstr &CopyUseMI = *CopyUseMO.getParent();
406 if (isRewriteCandidate(CopyUseMI)) {
407 if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,
408 VRM.getPhys(CopyDstReg)))
409 MadeChange = true;
410 }
411 }
412 }
413
414 return MadeChange;
415}
416
417void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
418 MachineInstr &SpillMI, int SpillFI, Register VReg) const {
419 const DebugLoc &DL = SpillMI.getDebugLoc();
420 MachineBasicBlock &MBB = *SpillMI.getParent();
421 MachineInstr *NewCopy;
422 if (SpillMI.mayStore()) {
423 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY), VReg)
424 .add(SpillMI.getOperand(0));
425 } else {
426 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY))
427 .add(SpillMI.getOperand(0))
428 .addReg(VReg);
429 }
430
431 LIS.ReplaceMachineInstrInMaps(SpillMI, *NewCopy);
432 SpillMI.eraseFromParent();
433}
434
435void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
436 ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {
437
438 SmallSet<int, 4> NeededFrameIndexes;
439 for (const LiveInterval *LI : StackIntervals)
440 NeededFrameIndexes.insert(LI->reg().stackSlotIndex());
441
442 for (MachineBasicBlock &MBB : MF) {
443 for (MachineInstr &MI : MBB) {
444 for (MachineOperand &MO : MI.operands()) {
445 if (!MO.isFI() || !NeededFrameIndexes.count(MO.getIndex()))
446 continue;
447
448 if (TII.isVGPRSpill(MI)) {
449 SmallVector<MachineInstr *, 4> &References = Map[MO.getIndex()];
450 References.push_back(&MI);
451 break;
452 }
453
454 // Verify this was really a spill instruction, if it's not just ignore
455 // all uses.
456
457 // TODO: This should probably be verifier enforced.
458 NeededFrameIndexes.erase(MO.getIndex());
459 Map.erase(MO.getIndex());
460 }
461 }
462 }
463}
464
465void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
466 unsigned NumSlots = LSS.getNumIntervals();
467 if (NumSlots == 0)
468 return;
469
470 MachineFrameInfo &MFI = MF.getFrameInfo();
471
472 SmallVector<LiveInterval *, 32> StackIntervals;
473 StackIntervals.reserve(NumSlots);
474
475 for (auto &[Slot, LI] : LSS) {
476 if (!MFI.isSpillSlotObjectIndex(Slot) || MFI.isDeadObjectIndex(Slot))
477 continue;
478
479 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
480 if (TRI.hasVGPRs(RC))
481 StackIntervals.push_back(&LI);
482 }
483
484 sort(StackIntervals, [](const LiveInterval *A, const LiveInterval *B) {
485 /// Sort heaviest intervals first to prioritize their unspilling
486 if (A->weight() > B->weight())
487 return true;
488
489 if (A->getSize() > B->getSize())
490 return true;
491
492 // Tie breaker by number to avoid need for stable sort
493 return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
494 });
495
496 // FIXME: The APIs for dealing with the LiveInterval of a frame index are
497 // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
498 // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
499 // and must create a substitute virtual register to do so. This makes
500 // incremental updating here difficult; we need to actually perform the IR
501 // mutation to get the new vreg references in place to compute the register
502 // LiveInterval to perform an assignment to track the new interference
503 // correctly, and we can't simply migrate the LiveInterval we already have.
504 //
505 // To avoid walking through the entire function for each index, pre-collect
506 // all the instructions slot referencess.
507
509 collectSpillIndexUses(StackIntervals, SpillSlotReferences);
510
511 for (LiveInterval *LI : StackIntervals) {
512 int Slot = LI->reg().stackSlotIndex();
513 auto SpillReferences = SpillSlotReferences.find(Slot);
514 if (SpillReferences == SpillSlotReferences.end())
515 continue;
516
517 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
518
519 LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
520 << " by reassigning\n");
521
522 ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
523
524 for (MCPhysReg PhysReg : AllocOrder) {
525 if (LRM.checkInterference(*LI, PhysReg) != LiveRegMatrix::IK_Free)
526 continue;
527
528 LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
529 << printReg(PhysReg, &TRI) << '\n');
530
531 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
532 Register NewVReg = MRI.createVirtualRegister(RC);
533
534 for (MachineInstr *SpillMI : SpillReferences->second)
535 replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg);
536
537 // TODO: We should be able to transfer the information from the stack
538 // slot's LiveInterval without recomputing from scratch with the
539 // replacement vreg uses.
540 LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(NewVReg);
541 VRM.grow();
542 LRM.assign(NewLI, PhysReg);
543 MFI.RemoveStackObject(Slot);
544 break;
545 }
546 }
547}
548
549bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
550 // This only applies on subtargets that have a configurable AGPR vs. VGPR
551 // allocation.
552 if (!ST.hasGFX90AInsts())
553 return false;
554
555 // Early exit if no AGPRs were assigned.
556 if (!LRM.isPhysRegUsed(AMDGPU::AGPR0)) {
557 LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
558 return false;
559 }
560
561 bool MadeChange = false;
562
563 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
565 MCRegister AssignedAGPR = getAssignedAGPR(VReg);
566 if (!AssignedAGPR)
567 continue;
568
569 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
570 MadeChange = true;
571 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
572 MadeChange = true;
573 }
574
575 // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
576 // pressure. See if we can eliminate some spills now that those registers are
577 // more available.
578 if (MadeChange)
579 eliminateSpillsOfReassignedVGPRs();
580
581 return MadeChange;
582}
583
584class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
585public:
586 static char ID;
587 RegisterClassInfo RegClassInfo;
588
589 AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {
592 }
593
594 bool runOnMachineFunction(MachineFunction &MF) override;
595
596 StringRef getPassName() const override {
597 return "AMDGPU Rewrite AGPR-Copy-MFMA";
598 }
599
600 void getAnalysisUsage(AnalysisUsage &AU) const override {
605
610
611 AU.setPreservesAll();
613 }
614};
615
616} // End anonymous namespace.
617
618INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
619 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
624INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
625 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
626
627char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;
628
630 AMDGPURewriteAGPRCopyMFMALegacy::ID;
631
632bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
633 MachineFunction &MF) {
634 if (skipFunction(MF.getFunction()))
635 return false;
636
637 RegClassInfo.runOnMachineFunction(MF);
638
639 auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
640 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
641 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
642 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
643 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
644 return Impl.run(MF);
645}
646
650 VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
653 LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(MF);
654 RegisterClassInfo RegClassInfo;
655 RegClassInfo.runOnMachineFunction(MF);
656
657 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
658 if (!Impl.run(MF))
659 return PreservedAnalyses::all();
661 PA.preserveSet<CFGAnalyses>();
662 PA.preserve<LiveStacksAnalysis>();
663 return PA;
664}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
AMDGPU Rewrite AGPR Copy MFMA
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A debug info location.
Definition DebugLoc.h:124
bool hasGFX90AInsts() const
LiveInterval - This class represents the liveness of a register, or stack slot.
LiveInterval & getInterval(Register Reg)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_Free
No interference, go ahead and assign.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
unsigned getNumIntervals() const
Definition LiveStacks.h:59
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:67
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:197
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition VirtRegMap.h:91
LLVM_ABI void grow()
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition VirtRegMap.h:87
LLVM_READONLY int getMFMASrcCVDstAGPROp(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
char & AMDGPURewriteAGPRCopyMFMALegacyID
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.