LLVM 19.0.0git
AMDGPUInsertSingleUseVDST.cpp
Go to the documentation of this file.
1//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
11/// instructions that produce single-use VGPR values. If the value is forwarded
12/// to the consumer instruction prior to VGPR writeback, the hardware can
13/// then skip (kill) the VGPR write.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
18#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
20#include "SIRegisterInfo.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/StringRef.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/MC/MCRegister.h"
35#include "llvm/Pass.h"
36#include <array>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
41
42namespace {
43class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
44private:
45 const SIInstrInfo *SII;
46 class SingleUseInstruction {
47 private:
48 static const unsigned MaxSkipRange = 0b111;
49 static const unsigned MaxNumberOfSkipRegions = 2;
50
51 unsigned LastEncodedPositionEnd;
52 MachineInstr *ProducerInstr;
53
54 std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
56
57 // Adds a skip region into the instruction.
58 void skip(const unsigned ProducerPosition) {
59 while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
60 SkipRegions.push_back(MaxSkipRange);
61 LastEncodedPositionEnd += MaxSkipRange;
62 }
63 SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
64 LastEncodedPositionEnd = ProducerPosition;
65 }
66
67 bool currentRegionHasSpace() {
68 const auto Region = SkipRegions.size();
69 // The first region has an extra bit of encoding space.
70 return SingleUseRegions[Region] <
71 ((Region == MaxNumberOfSkipRegions) ? 0b1111 : 0b111);
72 }
73
74 unsigned encodeImm() {
75 // Handle the first Single Use Region separately as it has an extra bit
76 // of encoding space.
77 unsigned Imm = SingleUseRegions[SkipRegions.size()];
78 unsigned ShiftAmount = 4;
79 for (unsigned i = SkipRegions.size(); i > 0; i--) {
80 Imm |= SkipRegions[i - 1] << ShiftAmount;
81 ShiftAmount += 3;
82 Imm |= SingleUseRegions[i - 1] << ShiftAmount;
83 ShiftAmount += 3;
84 }
85 return Imm;
86 }
87
88 public:
89 SingleUseInstruction(const unsigned ProducerPosition,
90 MachineInstr *Producer)
91 : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
92 SingleUseRegions({1, 0, 0}) {}
93
94 // Returns false if adding a new single use producer failed. This happens
95 // because it could not be encoded, either because there is no room to
96 // encode another single use producer region or that this single use
97 // producer is too far away to encode the amount of instructions to skip.
98 bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
99 // Producer is too far away to encode into this instruction or another
100 // skip region is needed and SkipRegions.size() = 2 so there's no room for
101 // another skip region, therefore a new instruction is needed.
102 if (LastEncodedPositionEnd +
103 (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
104 ProducerPosition)
105 return false;
106
107 // If a skip region is needed.
108 if (LastEncodedPositionEnd != ProducerPosition ||
109 !currentRegionHasSpace()) {
110 // If the current region is out of space therefore a skip region would
111 // be needed, but there is no room for another skip region.
112 if (SkipRegions.size() == MaxNumberOfSkipRegions)
113 return false;
114 skip(ProducerPosition);
115 }
116
117 SingleUseRegions[SkipRegions.size()]++;
118 LastEncodedPositionEnd = ProducerPosition + 1;
119 ProducerInstr = MI;
120 return true;
121 }
122
123 auto emit(const SIInstrInfo *SII) {
124 return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
125 SII->get(AMDGPU::S_SINGLEUSE_VDST))
126 .addImm(encodeImm());
127 }
128 };
129
130public:
131 static char ID;
132
133 AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
134
135 void insertSingleUseInstructions(
136 ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
138
139 for (auto &[Position, MI] : SingleUseProducers) {
140 // Encode this position into the last single use instruction if possible.
141 if (Instructions.empty() ||
142 !Instructions.back().tryAddProducer(Position, MI)) {
143 // If not, add a new instruction.
144 Instructions.push_back(SingleUseInstruction(Position, MI));
145 }
146 }
147
148 for (auto &Instruction : Instructions)
149 Instruction.emit(SII);
150 }
151
152 bool runOnMachineFunction(MachineFunction &MF) override {
153 const auto &ST = MF.getSubtarget<GCNSubtarget>();
154 if (!ST.hasVGPRSingleUseHintInsts())
155 return false;
156
157 SII = ST.getInstrInfo();
158 const auto *TRI = &SII->getRegisterInfo();
159 bool InstructionEmitted = false;
160
161 for (MachineBasicBlock &MBB : MF) {
162 DenseMap<MCRegUnit, unsigned> RegisterUseCount;
163
164 // Handle boundaries at the end of basic block separately to avoid
165 // false positives. If they are live at the end of a basic block then
166 // assume it has more uses later on.
167 for (const auto &Liveout : MBB.liveouts()) {
168 for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();
169 ++Units) {
170 const auto [Unit, Mask] = *Units;
171 if ((Mask & Liveout.LaneMask).any())
172 RegisterUseCount[Unit] = 2;
173 }
174 }
175
177 SingleUseProducerPositions;
178
179 unsigned VALUInstrCount = 0;
180 for (MachineInstr &MI : reverse(MBB.instrs())) {
181 // All registers in all operands need to be single use for an
182 // instruction to be marked as a single use producer.
183 bool AllProducerOperandsAreSingleUse = true;
184
185 // Gather a list of Registers used before updating use counts to avoid
186 // double counting registers that appear multiple times in a single
187 // MachineInstr.
188 SmallVector<MCRegUnit> RegistersUsed;
189
190 for (const auto &Operand : MI.all_defs()) {
191 const auto Reg = Operand.getReg();
192
193 const auto RegUnits = TRI->regunits(Reg);
194 if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
195 return RegisterUseCount[Unit] > 1;
196 }))
197 AllProducerOperandsAreSingleUse = false;
198
199 // Reset uses count when a register is no longer live.
200 for (const MCRegUnit Unit : RegUnits)
201 RegisterUseCount.erase(Unit);
202 }
203
204 for (const auto &Operand : MI.all_uses()) {
205 const auto Reg = Operand.getReg();
206
207 // Count the number of times each register is read.
208 for (const MCRegUnit Unit : TRI->regunits(Reg)) {
209 if (!is_contained(RegistersUsed, Unit))
210 RegistersUsed.push_back(Unit);
211 }
212 }
213 for (const MCRegUnit Unit : RegistersUsed)
214 RegisterUseCount[Unit]++;
215
216 // Do not attempt to optimise across exec mask changes.
217 if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
218 for (auto &UsedReg : RegisterUseCount)
219 UsedReg.second = 2;
220 }
221
223 continue;
224 if (AllProducerOperandsAreSingleUse) {
225 SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
226 InstructionEmitted = true;
227 }
228 VALUInstrCount++;
229 }
230 insertSingleUseInstructions(SingleUseProducerPositions);
231 }
232 return InstructionEmitted;
233 }
234};
235} // namespace
236
237char AMDGPUInsertSingleUseVDST::ID = 0;
238
239char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
240
241INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
242 "AMDGPU Insert SingleUseVDST", false, false)
MachineBasicBlock & MBB
#define DEBUG_TYPE
dxil metadata emit
This file defines the DenseMap class.
AMD GCN specific subclass of TargetSubtarget.
IRTranslator LLVM IR MI
static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges)
Skip an InlineInfo object in the specified data at the specified offset.
Definition: InlineInfo.cpp:77
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
iterator_range< liveout_iterator > liveouts() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:416
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & AMDGPUInsertSingleUseVDSTID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879