40#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
46 class SingleUseInstruction {
48 static const unsigned MaxSkipRange = 0b111;
49 static const unsigned MaxNumberOfSkipRegions = 2;
51 unsigned LastEncodedPositionEnd;
54 std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
58 void skip(
const unsigned ProducerPosition) {
59 while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
61 LastEncodedPositionEnd += MaxSkipRange;
63 SkipRegions.
push_back(ProducerPosition - LastEncodedPositionEnd);
64 LastEncodedPositionEnd = ProducerPosition;
67 bool currentRegionHasSpace() {
70 return SingleUseRegions[
Region] <
71 ((
Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);
74 unsigned encodeImm() {
77 unsigned Imm = SingleUseRegions[SkipRegions.
size()];
78 unsigned ShiftAmount = 4;
79 for (
unsigned i = SkipRegions.
size(); i > 0; i--) {
80 Imm |= SkipRegions[i - 1] << ShiftAmount;
82 Imm |= SingleUseRegions[i - 1] << ShiftAmount;
89 SingleUseInstruction(
const unsigned ProducerPosition,
91 : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
92 SingleUseRegions({1, 0, 0}) {}
98 bool tryAddProducer(
const unsigned ProducerPosition,
MachineInstr *
MI) {
102 if (LastEncodedPositionEnd +
103 (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.
size())) <
108 if (LastEncodedPositionEnd != ProducerPosition ||
109 !currentRegionHasSpace()) {
112 if (SkipRegions.
size() == MaxNumberOfSkipRegions)
114 skip(ProducerPosition);
117 SingleUseRegions[SkipRegions.
size()]++;
118 LastEncodedPositionEnd = ProducerPosition + 1;
125 SII->get(AMDGPU::S_SINGLEUSE_VDST))
135 void insertSingleUseInstructions(
136 ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers)
const {
139 for (
auto &[Position,
MI] : SingleUseProducers) {
141 if (Instructions.empty() ||
142 !Instructions.back().tryAddProducer(Position,
MI)) {
144 Instructions.push_back(SingleUseInstruction(Position,
MI));
154 if (!ST.hasVGPRSingleUseHintInsts())
157 SII = ST.getInstrInfo();
159 bool InstructionEmitted =
false;
170 const auto [Unit, Mask] = *Units;
171 if ((Mask & Liveout.LaneMask).any())
172 RegisterUseCount[Unit] = 2;
177 SingleUseProducerPositions;
179 unsigned VALUInstrCount = 0;
183 bool AllProducerOperandsAreSingleUse =
true;
190 for (
const auto &Operand :
MI.all_defs()) {
191 const auto Reg = Operand.getReg();
193 const auto RegUnits =
TRI->regunits(Reg);
195 return RegisterUseCount[Unit] > 1;
197 AllProducerOperandsAreSingleUse =
false;
201 RegisterUseCount.
erase(Unit);
204 for (
const auto &Operand :
MI.all_uses()) {
205 const auto Reg = Operand.getReg();
213 for (
const MCRegUnit Unit : RegistersUsed)
214 RegisterUseCount[Unit]++;
217 if (
MI.modifiesRegister(AMDGPU::EXEC,
TRI)) {
218 for (
auto &UsedReg : RegisterUseCount)
224 if (AllProducerOperandsAreSingleUse) {
225 SingleUseProducerPositions.
push_back({VALUInstrCount, &
MI});
226 InstructionEmitted =
true;
230 insertSingleUseInstructions(SingleUseProducerPositions);
232 return InstructionEmitted;
237char AMDGPUInsertSingleUseVDST::ID = 0;
242 "AMDGPU Insert SingleUseVDST",
false,
false)
This file defines the DenseMap class.
AMD GCN specific subclass of TargetSubtarget.
static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges)
Skip an InlineInfo object in the specified data at the specified offset.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool erase(const KeyT &Val)
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
iterator_range< liveout_iterator > liveouts() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const SIRegisterInfo & getRegisterInfo() const
static bool isVALU(const MachineInstr &MI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & AMDGPUInsertSingleUseVDSTID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.