LLVM 20.0.0git
GCNPreRAOptimizations.cpp
Go to the documentation of this file.
1//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass combines split register tuple initialization into a single pseudo:
11///
12/// undef %0.sub1:sreg_64 = S_MOV_B32 1
13/// %0.sub0:sreg_64 = S_MOV_B32 2
14/// =>
15/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16///
17/// This is to allow rematerialization of a value instead of spilling. It is
18/// supposed to be done after register coalescer to allow it to do its job and
19/// before actual register allocation to allow rematerialization.
20///
21/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22/// although the same shall be possible with other register classes and
23/// instructions if necessary.
24///
25//===----------------------------------------------------------------------===//
26
27#include "AMDGPU.h"
28#include "GCNSubtarget.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37
38namespace {
39
40class GCNPreRAOptimizations : public MachineFunctionPass {
41private:
42 const SIInstrInfo *TII;
43 const SIRegisterInfo *TRI;
45 LiveIntervals *LIS;
46
47 bool processReg(Register Reg);
48
49public:
50 static char ID;
51
52 GCNPreRAOptimizations() : MachineFunctionPass(ID) {
54 }
55
56 bool runOnMachineFunction(MachineFunction &MF) override;
57
58 StringRef getPassName() const override {
59 return "AMDGPU Pre-RA optimizations";
60 }
61
62 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 AU.setPreservesAll();
66 }
67};
68
69} // End anonymous namespace.
70
71INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
72 "AMDGPU Pre-RA optimizations", false, false)
74INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
76
77char GCNPreRAOptimizations::ID = 0;
78
79char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
80
82 return new GCNPreRAOptimizations();
83}
84
85bool GCNPreRAOptimizations::processReg(Register Reg) {
86 MachineInstr *Def0 = nullptr;
87 MachineInstr *Def1 = nullptr;
88 uint64_t Init = 0;
89 bool Changed = false;
90 SmallSet<Register, 32> ModifiedRegs;
91 bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
92
93 for (MachineInstr &I : MRI->def_instructions(Reg)) {
94 switch (I.getOpcode()) {
95 default:
96 return false;
97 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
98 break;
99 case AMDGPU::COPY: {
100 // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
101 // intermdiate temporary VGPR register. Try to find the defining
102 // accvgpr_write to avoid temporary registers.
103
104 if (!IsAGPRDst)
105 return false;
106
107 Register SrcReg = I.getOperand(1).getReg();
108
109 if (!SrcReg.isVirtual())
110 break;
111
112 // Check if source of copy is from another AGPR.
113 bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
114 if (!IsAGPRSrc)
115 break;
116
117 // def_instructions() does not look at subregs so it may give us a
118 // different instruction that defines the same vreg but different subreg
119 // so we have to manually check subreg.
120 Register SrcSubReg = I.getOperand(1).getSubReg();
121 for (auto &Def : MRI->def_instructions(SrcReg)) {
122 if (SrcSubReg != Def.getOperand(0).getSubReg())
123 continue;
124
125 if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
126 MachineOperand DefSrcMO = Def.getOperand(1);
127
128 // Immediates are not an issue and can be propagated in
129 // postrapseudos pass. Only handle cases where defining
130 // accvgpr_write source is a vreg.
131 if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
132 // Propagate source reg of accvgpr write to this copy instruction
133 I.getOperand(1).setReg(DefSrcMO.getReg());
134 I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
135
136 // Reg uses were changed, collect unique set of registers to update
137 // live intervals at the end.
138 ModifiedRegs.insert(DefSrcMO.getReg());
139 ModifiedRegs.insert(SrcReg);
140
141 Changed = true;
142 }
143
144 // Found the defining accvgpr_write, stop looking any further.
145 break;
146 }
147 }
148 break;
149 }
150 case AMDGPU::S_MOV_B32:
151 if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
152 I.getNumOperands() != 2)
153 return false;
154
155 switch (I.getOperand(0).getSubReg()) {
156 default:
157 return false;
158 case AMDGPU::sub0:
159 if (Def0)
160 return false;
161 Def0 = &I;
162 Init |= Lo_32(I.getOperand(1).getImm());
163 break;
164 case AMDGPU::sub1:
165 if (Def1)
166 return false;
167 Def1 = &I;
168 Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
169 break;
170 }
171 break;
172 }
173 }
174
175 // For AGPR reg, check if live intervals need to be updated.
176 if (IsAGPRDst) {
177 if (Changed) {
178 for (Register RegToUpdate : ModifiedRegs) {
179 LIS->removeInterval(RegToUpdate);
180 LIS->createAndComputeVirtRegInterval(RegToUpdate);
181 }
182 }
183
184 return Changed;
185 }
186
187 // For SGPR reg, check if we can combine instructions.
188 if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
189 return Changed;
190
191 LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
192 << " =>\n");
193
194 if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
195 LIS->getInstructionIndex(*Def0)))
196 std::swap(Def0, Def1);
197
198 LIS->RemoveMachineInstrFromMaps(*Def0);
199 LIS->RemoveMachineInstrFromMaps(*Def1);
200 auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
201 TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
202 .addImm(Init);
203
204 Def0->eraseFromParent();
205 Def1->eraseFromParent();
206 LIS->InsertMachineInstrInMaps(*NewI);
207 LIS->removeInterval(Reg);
208 LIS->createAndComputeVirtRegInterval(Reg);
209
210 LLVM_DEBUG(dbgs() << " " << *NewI);
211
212 return true;
213}
214
215bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
216 if (skipFunction(MF.getFunction()))
217 return false;
218
220 TII = ST.getInstrInfo();
221 MRI = &MF.getRegInfo();
222 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
223 TRI = ST.getRegisterInfo();
224
225 bool Changed = false;
226
227 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
229 if (!LIS->hasInterval(Reg))
230 continue;
231 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
232 if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
233 (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
234 continue;
235
236 Changed |= processReg(Reg);
237 }
238
239 return Changed;
240}
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
#define LLVM_DEBUG(...)
Definition: Debug.h:106
Pre RA optimizations
#define DEBUG_TYPE
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
SI optimize exec mask operations pre RA
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition: SlotIndexes.h:182
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
const MCRegisterClass * MC
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & GCNPreRAOptimizationsID
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createGCNPreRAOptimizationsPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860