LLVM 22.0.0git
AMDGPULowerVGPREncoding.cpp
Go to the documentation of this file.
1//===- AMDGPULowerVGPREncoding.cpp - lower VGPRs above v255 ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Lower VGPRs above first 256 on gfx1250.
11///
12/// The pass scans used VGPRs and inserts S_SET_VGPR_MSB instructions to switch
13/// VGPR addressing mode. The mode change is effective until the next change.
14/// This instruction provides high bits of a VGPR address for four of the
15/// operands: vdst, src0, src1, and src2, or other 4 operands depending on the
16/// instruction encoding. If bits are set they are added as MSB to the
17/// corresponding operand VGPR number.
18///
19/// There is no need to replace actual register operands because encoding of the
20/// high and low VGPRs is the same. I.e. v0 has the encoding 0x100, so does
21/// v256. v1 has the encoding 0x101 and v257 has the same encoding. So high
22/// VGPRs will survive until actual encoding and will result in a same actual
23/// bit encoding.
24///
25/// As a result the pass only inserts S_SET_VGPR_MSB to provide an actual offset
26/// to a VGPR address of the subseqent instructions. The InstPrinter will take
27/// care of the printing a low VGPR instead of a high one. In prinicple this
28/// shall be viable to print actual high VGPR numbers, but that would disagree
29/// with a disasm printing and create a situation where asm text is not
30/// deterministic.
31///
32/// This pass creates a convention where non-fall through basic blocks shall
33/// start with all 4 MSBs zero. Otherwise a disassembly would not be readable.
34/// An optimization here is possible but deemed not desirable because of the
35/// readbility concerns.
36///
37/// Consequentially the ABI is set to expect all 4 MSBs to be zero on entry.
38/// The pass must run very late in the pipeline to make sure no changes to VGPR
39/// operands will be made after it.
40//
41//===----------------------------------------------------------------------===//
42
44#include "AMDGPU.h"
45#include "GCNSubtarget.h"
47#include "SIInstrInfo.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"
53
54namespace {
55
56class AMDGPULowerVGPREncoding {
57 static constexpr unsigned OpNum = 4;
58 static constexpr unsigned BitsPerField = 2;
59 static constexpr unsigned NumFields = 4;
60 static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
61 static constexpr unsigned ModeWidth = NumFields * BitsPerField;
62 static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
63 using ModeType = PackedVector<unsigned, BitsPerField,
64 std::bitset<BitsPerField * NumFields>>;
65
66 class ModeTy : public ModeType {
67 public:
68 // bitset constructor will set all bits to zero
69 ModeTy() : ModeType(0) {}
70
71 operator int64_t() const { return raw_bits().to_ulong(); }
72
73 static ModeTy fullMask() {
74 ModeTy M;
75 M.raw_bits().flip();
76 return M;
77 }
78 };
79
80public:
81 bool run(MachineFunction &MF);
82
83private:
84 const SIInstrInfo *TII;
85 const SIRegisterInfo *TRI;
86
87 // Current basic block.
89
90 /// Most recent s_set_* instruction.
91 MachineInstr *MostRecentModeSet;
92
93 /// Current mode bits.
94 ModeTy CurrentMode;
95
96 /// Current mask of mode bits that instructions since MostRecentModeSet care
97 /// about.
98 ModeTy CurrentMask;
99
100 /// Number of current hard clause instructions.
101 unsigned ClauseLen;
102
103 /// Number of hard clause instructions remaining.
104 unsigned ClauseRemaining;
105
106 /// Clause group breaks.
107 unsigned ClauseBreaks;
108
109 /// Last hard clause instruction.
111
112 /// Insert mode change before \p I. \returns true if mode was changed.
113 bool setMode(ModeTy NewMode, ModeTy Mask,
115
116 /// Reset mode to default.
117 void resetMode(MachineBasicBlock::instr_iterator I) {
118 setMode(ModeTy(), ModeTy::fullMask(), I);
119 }
120
121 /// If \p MO references VGPRs, return the MSBs. Otherwise, return nullopt.
122 std::optional<unsigned> getMSBs(const MachineOperand &MO) const;
123
124 /// Handle single \p MI. \return true if changed.
125 bool runOnMachineInstr(MachineInstr &MI);
126
127 /// Compute the mode and mode mask for a single \p MI given \p Ops operands
128 /// bit mapping. Optionally takes second array \p Ops2 for VOPD.
129 /// If provided and an operand from \p Ops is not a VGPR, then \p Ops2
130 /// is checked.
131 void computeMode(ModeTy &NewMode, ModeTy &Mask, MachineInstr &MI,
132 const AMDGPU::OpName Ops[OpNum],
133 const AMDGPU::OpName *Ops2 = nullptr);
134
135 /// Check if an instruction \p I is within a clause and returns a suitable
136 /// iterator to insert mode change. It may also modify the S_CLAUSE
137 /// instruction to extend it or drop the clause if it cannot be adjusted.
140
141 /// Check if an instruction \p I is immediately after another program state
142 /// instruction which it cannot coissue with. If so, insert before that
143 /// instruction to encourage more coissuing.
146};
147
148bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
150 assert((NewMode.raw_bits() & ~Mask.raw_bits()).none());
151
152 auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();
153
154 if ((Delta & Mask.raw_bits()).none()) {
155 CurrentMask |= Mask;
156 return false;
157 }
158
159 if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {
160 CurrentMode |= NewMode;
161 CurrentMask |= Mask;
162
163 MachineOperand &Op = MostRecentModeSet->getOperand(0);
164
165 // Carry old mode bits from the existing instruction.
166 int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
167
168 Op.setImm(CurrentMode | OldModeBits);
169 return true;
170 }
171
172 // Record previous mode into high 8 bits of the immediate.
173 int64_t OldModeBits = CurrentMode << ModeWidth;
174
175 I = handleClause(I);
176 I = handleCoissue(I);
177 MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
178 .addImm(NewMode | OldModeBits);
179
180 CurrentMode = NewMode;
181 CurrentMask = Mask;
182 return true;
183}
184
185std::optional<unsigned>
186AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {
187 if (!MO.isReg())
188 return std::nullopt;
189
190 MCRegister Reg = MO.getReg();
191 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
192 if (!RC || !TRI->isVGPRClass(RC))
193 return std::nullopt;
194
195 unsigned Idx = TRI->getHWRegIndex(Reg);
196 return Idx >> 8;
197}
198
199void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode, ModeTy &Mask,
201 const AMDGPU::OpName Ops[OpNum],
202 const AMDGPU::OpName *Ops2) {
203 NewMode = {};
204 Mask = {};
205
206 for (unsigned I = 0; I < OpNum; ++I) {
207 MachineOperand *Op = TII->getNamedOperand(MI, Ops[I]);
208
209 std::optional<unsigned> MSBits;
210 if (Op)
211 MSBits = getMSBs(*Op);
212
213#if !defined(NDEBUG)
214 if (MSBits.has_value() && Ops2) {
215 auto Op2 = TII->getNamedOperand(MI, Ops2[I]);
216 if (Op2) {
217 std::optional<unsigned> MSBits2;
218 MSBits2 = getMSBs(*Op2);
219 if (MSBits2.has_value() && MSBits != MSBits2)
220 llvm_unreachable("Invalid VOPD pair was created");
221 }
222 }
223#endif
224
225 if (!MSBits.has_value() && Ops2) {
226 Op = TII->getNamedOperand(MI, Ops2[I]);
227 if (Op)
228 MSBits = getMSBs(*Op);
229 }
230
231 if (!MSBits.has_value())
232 continue;
233
234 // Skip tied uses of src2 of VOP2, these will be handled along with defs and
235 // only vdst bit affects these operands. We cannot skip tied uses of VOP3,
236 // these uses are real even if must match the vdst.
237 if (Ops[I] == AMDGPU::OpName::src2 && !Op->isDef() && Op->isTied() &&
240 TII->hasVALU32BitEncoding(MI.getOpcode()))))
241 continue;
242
243 NewMode[I] = MSBits.value();
244 Mask[I] = FieldMask;
245 }
246}
247
248bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
250 if (Ops.first) {
251 ModeTy NewMode, Mask;
252 computeMode(NewMode, Mask, MI, Ops.first, Ops.second);
253 return setMode(NewMode, Mask, MI.getIterator());
254 }
255 assert(!TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());
256
257 return false;
258}
259
261AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
262 if (!ClauseRemaining)
263 return I;
264
265 // A clause cannot start with a special instruction, place it right before
266 // the clause.
267 if (ClauseRemaining == ClauseLen) {
268 I = Clause->getPrevNode()->getIterator();
269 assert(I->isBundle());
270 return I;
271 }
272
273 // If a clause defines breaks each group cannot start with a mode change.
274 // just drop the clause.
275 if (ClauseBreaks) {
276 Clause->eraseFromBundle();
277 ClauseRemaining = 0;
278 return I;
279 }
280
281 // Otherwise adjust a number of instructions in the clause if it fits.
282 // If it does not clause will just become shorter. Since the length
283 // recorded in the clause is one less, increment the length after the
284 // update. Note that SIMM16[5:0] must be 1-62, not 0 or 63.
285 if (ClauseLen < 63)
286 Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));
287
288 ++ClauseLen;
289
290 return I;
291}
292
294AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
295 if (I.isEnd())
296 return I;
297
298 if (I == I->getParent()->begin())
299 return I;
300
301 MachineBasicBlock::instr_iterator Prev = std::prev(I);
302 auto isProgramStateSALU = [this](MachineInstr *MI) {
303 return TII->isBarrier(MI->getOpcode()) ||
304 TII->isWaitcnt(MI || (SIInstrInfo::isProgramStateSALU(*MI) &&
305 MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
306 };
307
308 if (!isProgramStateSALU(&*Prev))
309 return I;
310
311 while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
312 isProgramStateSALU(&*Prev)) {
313 --Prev;
314 }
315 return Prev;
316}
317
318bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
319 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
320 if (!ST.has1024AddressableVGPRs())
321 return false;
322
323 TII = ST.getInstrInfo();
324 TRI = ST.getRegisterInfo();
325
326 bool Changed = false;
327 ClauseLen = ClauseRemaining = 0;
328 CurrentMode.reset();
329 CurrentMask.reset();
330 for (auto &MBB : MF) {
331 MostRecentModeSet = nullptr;
332 this->MBB = &MBB;
333
334 for (auto &MI : llvm::make_early_inc_range(MBB.instrs())) {
335 if (MI.isMetaInstruction())
336 continue;
337
338 if (MI.isTerminator() || MI.isCall()) {
339 if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
340 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)
341 CurrentMode.reset();
342 else
343 resetMode(MI.getIterator());
344 continue;
345 }
346
347 if (MI.isInlineAsm()) {
348 if (TII->hasVGPRUses(MI))
349 resetMode(MI.getIterator());
350 continue;
351 }
352
353 if (MI.getOpcode() == AMDGPU::S_CLAUSE) {
354 assert(!ClauseRemaining && "Nested clauses are not supported");
355 ClauseLen = MI.getOperand(0).getImm();
356 ClauseBreaks = (ClauseLen >> 8) & 15;
357 ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;
358 Clause = &MI;
359 continue;
360 }
361
362 Changed |= runOnMachineInstr(MI);
363
364 if (ClauseRemaining)
365 --ClauseRemaining;
366 }
367
368 // Reset the mode if we are falling through.
369 resetMode(MBB.instr_end());
370 }
371
372 return Changed;
373}
374
375class AMDGPULowerVGPREncodingLegacy : public MachineFunctionPass {
376public:
377 static char ID;
378
379 AMDGPULowerVGPREncodingLegacy() : MachineFunctionPass(ID) {}
380
381 bool runOnMachineFunction(MachineFunction &MF) override {
382 return AMDGPULowerVGPREncoding().run(MF);
383 }
384
385 void getAnalysisUsage(AnalysisUsage &AU) const override {
386 AU.setPreservesCFG();
388 }
389};
390
391} // namespace
392
393char AMDGPULowerVGPREncodingLegacy::ID = 0;
394
395char &llvm::AMDGPULowerVGPREncodingLegacyID = AMDGPULowerVGPREncodingLegacy::ID;
396
397INITIALIZE_PASS(AMDGPULowerVGPREncodingLegacy, DEBUG_TYPE,
398 "AMDGPU Lower VGPR Encoding", false, false)
399
403 if (!AMDGPULowerVGPREncoding().run(MF))
404 return PreservedAnalyses::all();
405
408 return PA;
409}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
This file implements the PackedVector class.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Interface definition for SIInstrInfo.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Instructions::iterator instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Store a vector of values using a specific number of bits for each value.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
static bool isVOP2(const MachineInstr &MI)
static bool isProgramStateSALU(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
DWARFExpression::Operation Op
char & AMDGPULowerVGPREncodingLegacyID