LLVM 20.0.0git
SILowerWWMCopies.cpp
Go to the documentation of this file.
1//===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Lowering the WWM_COPY instructions for various register classes.
11/// AMDGPU target generates WWM_COPY instruction to differentiate WWM
12/// copy from COPY. This pass generates the necessary exec mask manipulation
13/// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
14/// COPY.
15//
16//===----------------------------------------------------------------------===//
17
18#include "AMDGPU.h"
19#include "GCNSubtarget.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-lower-wwm-copies"
30
31namespace {
32
33class SILowerWWMCopies : public MachineFunctionPass {
34public:
35 static char ID;
36
37 SILowerWWMCopies() : MachineFunctionPass(ID) {
39 }
40
41 bool runOnMachineFunction(MachineFunction &MF) override;
42
43 StringRef getPassName() const override { return "SI Lower WWM Copies"; }
44
45 void getAnalysisUsage(AnalysisUsage &AU) const override {
46 AU.setPreservesAll();
48 }
49
50private:
51 bool isSCCLiveAtMI(const MachineInstr &MI);
52 void addToWWMSpills(MachineFunction &MF, Register Reg);
53
54 LiveIntervals *LIS;
55 SlotIndexes *Indexes;
56 VirtRegMap *VRM;
57 const SIRegisterInfo *TRI;
60};
61
62} // End anonymous namespace.
63
64INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies",
65 false, false)
68INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false,
69 false)
70
71char SILowerWWMCopies::ID = 0;
72
73char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID;
74
75bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
76 // We can't determine the liveness info if LIS isn't available. Early return
77 // in that case and always assume SCC is live.
78 if (!LIS)
79 return true;
80
81 LiveRange &LR =
82 LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
83 SlotIndex Idx = LIS->getInstructionIndex(MI);
84 return LR.liveAt(Idx);
85}
86
87// If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
88// for preserving its entire lanes at function prolog/epilog.
89void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
90 if (Reg.isPhysical())
91 return;
92
93 MCRegister PhysReg = VRM->getPhys(Reg);
94 assert(PhysReg && "should have allocated a physical register");
95
96 MFI->allocateWWMSpill(MF, PhysReg);
97}
98
99bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) {
101 const SIInstrInfo *TII = ST.getInstrInfo();
102
103 MFI = MF.getInfo<SIMachineFunctionInfo>();
104 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
105 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
106 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
107 Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
108 auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
109 VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;
110 TRI = ST.getRegisterInfo();
111 MRI = &MF.getRegInfo();
112
113 if (!MFI->hasVRegFlags())
114 return false;
115
116 bool Changed = false;
117 for (MachineBasicBlock &MBB : MF) {
118 for (MachineInstr &MI : MBB) {
119 if (MI.getOpcode() != AMDGPU::WWM_COPY)
120 continue;
121
122 // TODO: Club adjacent WWM ops between same exec save/restore
123 assert(TII->isVGPRCopy(MI));
124
125 // For WWM vector copies, manipulate the exec mask around the copy
126 // instruction.
127 const DebugLoc &DL = MI.getDebugLoc();
128 MachineBasicBlock::iterator InsertPt = MI.getIterator();
129 Register RegForExecCopy = MFI->getSGPRForEXECCopy();
130 TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy,
131 isSCCLiveAtMI(MI), Indexes);
132 TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes);
133 addToWWMSpills(MF, MI.getOperand(0).getReg());
134 LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
135
136 // Lower WWM_COPY back to COPY
137 MI.setDesc(TII->get(AMDGPU::COPY));
138 Changed |= true;
139 }
140 }
141
142 return Changed;
143}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower WWM Copies
#define DEBUG_TYPE
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
A debug info location.
Definition: DebugLoc.h:33
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:74
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndexes pass.
Definition: SlotIndexes.h:297
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & SILowerWWMCopiesID
void initializeSILowerWWMCopiesPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163